(in-package #:user-side-compiler) (defvar *parser-debug* nil "Parser will try to print with FORMAT here. Bind it to a stream to see the parser's debug output.") ;;; Token stream (defvar *token-stream*) (defun previous-token () (cdr *token-stream*)) (defun peek-token () (caar *token-stream*)) (defun match-token (token) (labels ((pop-token () (setf (cdr *token-stream*) (pop (car *token-stream*))) (let ((got (previous-token))) got))) (cond ((and (keywordp token) (typep (peek-token) 'token-keyword) (eql token (name (peek-token)))) (prog1 (pop-token) (format *parser-debug* "Matched a keyword token ~A (Context: ~A)~%" token (if (boundp '*current-matcher*) *current-matcher* nil)))) ((and (not (keywordp token)) (typep (peek-token) token)) (prog1 (pop-token) (format *parser-debug* "Matched a token ~A against type ~A (Context: ~A)~%" (previous-token) token (if (boundp '*current-matcher*) *current-matcher* nil)))) ((null (peek-token)) (error-parser (source (previous-token)) "Ran out of tokens while parsing.")) (t nil)))) (defun consume-token (token &optional error-message) (unless (match-token token) (if (null error-message) (error-parser (source (previous-token)) "Expected token of type ~A, got a ~A." token (type-of (peek-token))) (error-parser (source (previous-token)) "~A~%Got token ~A instead." error-message (peek-token) (source (previous-token)))))) (defun make-token-stream (tokens) ;; Appending an end-of-statement if one is missing so that we always ;; have something to latch onto when dealing with syntax errors at the end ;; of the program's text. It has the same source location as the actual ;; last token. (let ((last (last tokens))) (unless (typep (car last) 'token-end-of-statement) (setf (cdr last) (cons (make-instance 'token-end-of-statement :source (source (car last))) nil)))) (assert (every (complement #'null) tokens)) (cons tokens nil)) ;;; Matcher (defparameter *syntax-matchers* (make-hash-table)) (defvar *syntax-source*) (defvar *current-matcher*) (defmacro define-syntax-matcher (name (&rest vars) &body body) `(setf (gethash ',name *syntax-matchers*) (lambda () (let (,@vars (*syntax-source* (source (peek-token))) (*current-matcher* ',name)) ,@body)))) (defun %match-syntax (matcher-name) (multiple-value-bind (function existsp) (gethash matcher-name *syntax-matchers*) (unless existsp (error "No such syntax matcher ~A" matcher-name)) (format *parser-debug* "Entering matcher ~A~%" matcher-name) (prog1 (funcall function) (format *parser-debug* "Exiting matcher ~A~%" matcher-name)))) (defmacro match-syntax (matcher-name) `(%match-syntax ',matcher-name)) ;;; Syntax patterns (defmacro define-binary-expr-matcher (name next &rest operator-node-pairs) `(define-syntax-matcher ,name (left operator right) (setf left (match-syntax ,next)) (loop :while (or ,@(loop :for (operator node) :in operator-node-pairs :collect `(match-token ',operator))) :do (setf operator (previous-token)) :do (setf right (match-syntax ,next)) :do (setf left (make-instance (etypecase operator ,@(mapcar (lambda (pair) `(,(car pair) ',(cadr pair))) operator-node-pairs)) :operator-token operator :source *syntax-source* :operands (list left right)))) left)) (define-syntax-matcher expression () (match-syntax equality)) (define-binary-expr-matcher equality comparison (token-equal-equal node-expr-test-equal) (token-not-equal node-expr-test-not-equal)) (define-binary-expr-matcher comparison term (token-less-than node-expr-test-less) (token-less-or-equal node-expr-test-less-or-equal) (token-greater-than node-expr-test-greater) (token-greater-or-equal node-expr-test-greater-or-equal)) (define-binary-expr-matcher term factor (token-plus node-expr-plus) (token-minus node-expr-minus)) (define-binary-expr-matcher factor unary (token-star node-expr-multiply) (token-slash node-expr-divide)) (define-syntax-matcher unary () (let ((unary-tokens '(token-not token-minus))) (if (some #'match-token unary-tokens) (make-instance (etypecase (previous-token) (token-not 'node-expr-not) (token-minus 'node-expr-negate)) :source *syntax-source* :operator-token (previous-token) :operand (match-syntax primary)) (match-syntax call)))) (define-syntax-matcher arglist (arguments) (setf arguments (list (match-syntax expression))) (loop :while (match-token 'token-comma) :do (push (match-syntax expression) arguments)) (nreverse arguments)) (define-syntax-matcher call (name arguments) (setf name (match-syntax primary)) (cond ((and (typep name 'token-name) (match-token 'token-open-paren)) (cond ((match-token 'token-close-paren) (setf arguments '())) (t (setf arguments (match-syntax arglist)) (consume-token 'token-close-paren "Close parenthesis ')' is required to end function call argument list, comma ',' is required to separate arguments."))) (make-instance 'node-call :source *syntax-source* :callee (transform name 'asm-function) :arguments arguments)) (t name))) (define-syntax-matcher primary () (cond ((or (match-token 'token-name) (match-token 'token-number)) (previous-token)) ((match-token 'token-open-paren) (let ((expr (match-syntax expression))) (consume-token 'token-close-paren "Closing parenthesis ')' required after grouping expression.") (make-instance 'node-expr-grouping :source *syntax-source* :expression expr))) ((match-token 'token-end-of-statement) (error-parser (source (previous-token)) "End-of-statement in the middle of an expression.")) (t (error-parser (source (peek-token)) "Unexpected token ~S in unexpected situation." (peek-token))))) (define-syntax-matcher assignment (l-value r-value) (setf l-value (match-syntax expression)) (if (and (typep l-value 'token-name) (match-token 'token-equal)) (progn (setf r-value (match-syntax expression)) (make-instance 'node-assignment :source *syntax-source* :variable (transform l-value 'reference-variable) :value r-value)) l-value)) (define-syntax-matcher statement () (cond ((match-token :if) (prog1 (match-syntax if) (unless (or (match-token 'token-end-of-statement) (null (peek-token))) (error-parser (source (previous-token)) "End-of-statement required after IF's THEN, got ~A" (peek-token))))) ((match-token :for) (prog1 (match-syntax for-do-times) (unless (or (match-token 'token-end-of-statement) (null (peek-token))) (error-parser (source (previous-token)) "End-of-statement required after FOR's THEN, got ~A" (peek-token))))) ((match-token 'token-end-of-statement) ;; Empty statement (make-instance 'node-nop :source *syntax-source*)) (t (let ((expr (match-syntax assignment))) (consume-token 'token-end-of-statement "Couldn't find end of expression.") expr)))) (defun wire-up-statements (statements) (loop :for statement :in statements :for rest := (rest statements) :then (rest rest) :for next := (first rest)) statements) (define-syntax-matcher block (statements) (loop :for statement := (match-syntax statement) :unless (typep statement 'node-nop) :do (push statement statements) :when (null (peek-token)) :do (error-parser (source (previous-token)) "Unterminated block, check for missing ENDs and ELSEs.") :until (or (match-token :end) (match-token :else))) (setf statements (wire-up-statements (nreverse statements))) (make-instance 'node-block :source *syntax-source* :statements statements)) (defmacro match-syntax-pattern (syntax-list error-messages) (append '(progn) (loop :for syntax :in syntax-list :collect (cond ((symbolp syntax) ;; Match token, ignore everything except its existence `(consume-token ',syntax ,(cadr (assoc syntax error-messages)))) ((and (listp syntax) (symbolp (second syntax)) (not (keywordp (second syntax)))) ;; Match a variable via a matcher `(setf ,(first syntax) (match-syntax ,(second syntax)))) ((and (listp syntax) (listp (second syntax)) (eql (first (second syntax)) :token)) ;; Match a variable to a token (let ((var (first syntax)) (desired-token (second (second syntax)))) `(progn (setf ,var (match-token ',desired-token)) (when (null ,var) (error-parser (source (previous-token)) ,(cadr (assoc var error-messages))))))))))) (define-syntax-matcher for-do-times (var n code) (match-syntax-pattern ((var (:token token-name)) :do (n expression) :times token-end-of-statement (code block)) ((:do "FOR requires a DO.") (:times "FOR loop count must be followed by TIMES.") (var "A loop index variable must be supplied."))) (unless (eql (name (previous-token)) :end) (error-parser (source (previous-token)) "FOR DO TIMES loop may only end with END. Found ~A." (previous-token))) (make-instance 'node-dotimes :source *syntax-source* :counter-ref (transform var 'reference-variable) :stop-ref (transform n 'reference) :loopee-node (transform code 'node))) (define-syntax-matcher if (test then else) (match-syntax-pattern ((test expression) :then token-end-of-statement (then block)) ((token-end-of-statement "THEN must be followed by a line break") (:then "IF condition must be followed by THEN"))) (cond ((eql (name (previous-token)) :end)) ((eql (name (previous-token)) :else) (setf else (match-syntax block))) (t (error-parser (source (previous-token)) "IF block must be closed with END, found ~A." (previous-token)))) (make-instance 'node-conditional :source *syntax-source* :test test :then (transform then 'node) :else (if (null else) nil (transform else 'node)))) (define-syntax-matcher program (statements) (loop :for statement := (match-syntax statement) :unless (typep statement 'node-nop) :do (push statement statements) :until (null (peek-token))) (setf statements (wire-up-statements (nreverse statements))) (make-instance 'node-program :source *syntax-source* :statements statements))