(in-package #:user-side-compiler) (defvar *parser-debug* nil "Parser will try to print with FORMAT here. Bind it to a stream to see the parser's debug output.") ;;; Token stream (defvar *token-stream*) (defvar *token-comment* nil) (defun previous-token () (cdr *token-stream*)) (defun peek-token () (caar *token-stream*)) (defun match-token (token) (labels ((pop-token () (setf (cdr *token-stream*) (pop (car *token-stream*))) (let ((got (previous-token))) (if (comment-p (peek-token)) (setf *token-comment* (pop (car *token-stream*))) (setf *token-comment* nil)) got))) (cond ((and (keywordp token) (typep (peek-token) 'token-name) (eql token (name (peek-token)))) (prog1 (pop-token) (format *parser-debug* "Matched a keyword token ~A (Context: ~A)~%" token (if (boundp '*current-matcher*) *current-matcher* nil)))) ((and (not (keywordp token)) (typep (peek-token) token)) (prog1 (pop-token) (format *parser-debug* "Matched a token ~A against type ~A (Context: ~A)~%" (previous-token) token (if (boundp '*current-matcher*) *current-matcher* nil)))) (t nil)))) (defun consume-token (token &optional error-message) (unless (match-token token) (error (if (null error-message) (format nil "Expected token of type ~A, got a ~A.~%Source: ~A" token (type-of (peek-token)) (source (previous-token))) (format nil "~A~%Got ~A.~%Source: ~A" error-message (previous-token) (source (previous-token))))))) (defun make-token-stream (tokens) (cons tokens nil)) ;;; Matcher (defparameter *syntax-matchers* (make-hash-table)) (defvar *syntax-source*) (defvar *current-matcher*) (defmacro define-syntax-matcher (name (&rest vars) &body body) `(setf (gethash ',name *syntax-matchers*) (lambda () (let (,@vars (*syntax-source* (source (peek-token))) (*current-matcher* ',name)) ,@body)))) (defun %match-syntax (matcher-name) (multiple-value-bind (function existsp) (gethash matcher-name *syntax-matchers*) (unless existsp (error "No such syntax matcher ~A" matcher-name)) (format *parser-debug* "Entering matcher ~A~%" matcher-name) (prog1 (funcall function) (format *parser-debug* "Exiting matcher ~A~%" matcher-name)))) (defmacro match-syntax (matcher-name) `(%match-syntax ',matcher-name)) ;;; Syntax patterns (defclass node-expr-equality (node-expr-binary) ((%operator :accessor operator :initarg :operator) (%operands :accessor operands :initarg :operands))) (defmacro define-binary-expr-matcher (name next &rest operator-node-pairs) `(define-syntax-matcher ,name (left operator right) (setf left (match-syntax ,next)) (loop :while (or ,@(loop :for (operator node) :in operator-node-pairs :collect `(match-token ',operator))) :do (setf operator (previous-token)) :do (setf right (match-syntax ,next)) :do (setf left (make-instance (etypecase operator ,@(mapcar (lambda (pair) `(,(car pair) ',(cadr pair))) operator-node-pairs)) :operator-token operator :source *syntax-source* :operands (list left right)))) left)) (define-syntax-matcher expression () (match-syntax call)) (define-syntax-matcher arglist (arguments) (setf arguments (list (match-syntax expression))) (loop :while (match-token 'token-comma) :do (push (match-syntax expression) arguments)) (nreverse arguments)) (define-syntax-matcher call (name arguments) (setf name (match-syntax equality)) (cond ((and (typep name 'token-name) (match-token 'token-open-paren)) (setf arguments (match-syntax arglist)) (consume-token 'token-close-paren "Close parenthesis ')' is required to end function call argument list.") (make-instance 'node-call :source *syntax-source* :comment *token-comment* :callee (transform name 'asm-function) :arguments arguments)) (t name))) (define-binary-expr-matcher equality comparison (token-equal-equal node-expr-test-equal) (token-not-equal node-expr-test-not-equal)) (define-binary-expr-matcher comparison term ;; TODO: The rest (token-less-than node-expr-test-equal) (token-greater-than node-expr-test-not-equal)) (define-binary-expr-matcher term factor (token-plus node-expr-plus) (token-minus node-expr-minus)) (define-binary-expr-matcher factor unary (token-star node-expr-multiply) (token-slash node-expr-divide)) (define-syntax-matcher unary () (let ((unary-tokens '(token-not token-minus))) (if (some #'match-token unary-tokens) (make-instance (etypecase (previous-token) (token-not 'node-expr-not) (token-minus 'node-expr-negate)) :source *syntax-source* :comment *token-comment* :operator-token (previous-token) :operand (match-syntax primary)) (match-syntax primary)))) (define-syntax-matcher primary () (cond ((or (match-token 'token-name) (match-token 'token-number)) (previous-token)) ((match-token 'token-open-paren) (let ((expr (match-syntax expression))) (unless (match-token 'token-close-paren) ;; TODO (error "Parser error: Closing parenthesis ')' required after expression.")) (make-instance 'node-expr-grouping :source *syntax-source* :comment *token-comment* :expression expr))) ((match-token 'token-end-of-statement) (error "End-of-statement in the middle of an expression")) (t (error "What token?! Is this?! (DAH DAH DAAAAH)~%~S~%" (peek-token))))) (define-syntax-matcher assignment (l-value r-value) (setf l-value (match-syntax expression)) (if (and (typep l-value 'token-name) (match-token 'token-equal)) (progn (setf r-value (match-syntax expression)) (make-instance 'node-assignment :source *syntax-source* :comment *token-comment* :variable (transform l-value 'reference-variable) :value r-value)) l-value)) (define-syntax-matcher statement () (cond ((match-token :if) (prog1 (match-syntax if) (unless (or (match-token 'token-end-of-statement) (null (peek-token))) (error "EOS required after IF, got ~A" (peek-token))))) ((match-token :for) (prog1 (match-syntax for-do-times) (unless (or (match-token 'token-end-of-statement) (null (peek-token))) (error "EOS required after FOR, got ~A" (peek-token))))) ((match-token 'token-end-of-statement) ;; Empty statement, might contain comment (make-instance 'node-nop :source *syntax-source* :comment *token-comment*)) (t (let ((expr (match-syntax assignment))) (setf (comment expr) *token-comment*) (consume-token 'token-end-of-statement (format nil "Couldn't find end of expression. ~A found instead." (peek-token))) expr)))) (defun wire-up-statements (statements) (loop :for statement :in statements :for rest := (rest statements) :then (rest rest) :for next := (first rest) :do (setf (normal-next statement) next)) statements) (define-syntax-matcher block (statements) (loop :for statement := (match-syntax statement) :unless (typep statement 'node-nop) :do (push statement statements) :until (or (match-token :end) (match-token :else))) (setf statements (wire-up-statements (nreverse statements))) (make-instance 'node-block :source *syntax-source* :statements statements :next (first statements))) (defmacro match-syntax-pattern (syntax-list error-messages) (append '(progn) (loop :for syntax :in syntax-list :collect (cond ((symbolp syntax) ;; Match token, ignore everything except its existence `(consume-token ',syntax ,(cadr (assoc syntax error-messages)))) ((and (listp syntax) (symbolp (second syntax)) (not (keywordp (second syntax)))) ;; Match a variable via a matcher `(setf ,(first syntax) (match-syntax ,(second syntax)))) ((and (listp syntax) (listp (second syntax)) (eql (first (second syntax)) :token)) ;; Match a variable to a token (let ((var (first syntax)) (desired-token (second (second syntax)))) `(progn (setf ,var (match-token ',desired-token)) (when (null ,var) (error ,(cadr (assoc var error-messages))))))))))) (define-syntax-matcher for-do-times (var n code) (match-syntax-pattern ((var (:token token-name)) :do (n expression) :times token-end-of-statement (code block)) ((:do "FOR requires a DO.") (:times "FOR loop count must be followed by TIMES.") (var "A loop index variable must be supplied."))) (unless (eql (name (previous-token)) :end) (error "FOR DO TIMES loop may only end with END. Found ~A." (previous-token))) (make-instance 'node-dotimes :source *syntax-source* :counter-ref (transform var 'reference-variable) :stop-ref (transform n 'reference) :loopee-node (transform code 'node))) (defclass node-conditional (node) ((%test :accessor test-node :initarg :test) (%then :accessor then-node :initarg :then) (%else :accessor else-node :initarg :else :initform nil))) (defmethod node-nexts ((node node-conditional)) (append (list (test-node node) (then-node node)) (unless (null (else-node node)) (list (else-node node))) (call-next-method))) (define-syntax-matcher if (test then else) (match-syntax-pattern ((test expression) :then token-end-of-statement (then block)) ((token-end-of-statement "THEN must be followed by a line break") (:then "IF condition must be followed by THEN"))) (cond ((eql (name (previous-token)) :end)) ((eql (name (previous-token)) :else) (setf else (match-syntax block)) #+(or) (unless (match-token :end) (error "An IF's ELSE block must be followed by END.~%Found ~A instead.~%ELSE IF is not yet supported." (peek-token)))) (t (error "IF block ended very strangely, found ~A." (peek-token)))) (make-instance 'node-conditional :source *syntax-source* :test (transform test 'node-expr) :then (transform then 'node) :else (if (null else) nil (transform else 'node)))) (define-syntax-matcher program (statements) (let ((*token-comment* nil)) (loop :for statement := (match-syntax statement) :unless (typep statement 'node-nop) :do (push statement statements) :until (null (peek-token))) (setf statements (wire-up-statements (nreverse statements))) (make-instance 'node-program :source *syntax-source* :statements statements :next (first statements)))) ;;; Testing jigs (defmacro do-node-tree ((node start-node) &body body) (let ((stack (gensym)) (current (gensym))) `(loop :with ,stack := (list (normal-next ,start-node)) :for ,current := (pop ,stack) :until (null ,current) :do (setf ,stack (append (remove nil (node-nexts ,current)) ,stack)) :do (let ((,node ,current)) ,@body))))