313 lines
12 KiB
Common Lisp
313 lines
12 KiB
Common Lisp
(in-package #:user-side-compiler)
|
|
|
|
(defvar *parser-debug* nil
|
|
"Parser will try to print with FORMAT here. Bind it to a stream to see the
|
|
parser's debug output.")
|
|
|
|
;;; Token stream
|
|
|
|
(defvar *token-stream*)
|
|
|
|
(defun previous-token ()
|
|
(cdr *token-stream*))
|
|
|
|
(defun peek-token ()
|
|
(caar *token-stream*))
|
|
|
|
(defun match-token (token)
|
|
(labels
|
|
((pop-token ()
|
|
(setf (cdr *token-stream*) (pop (car *token-stream*)))
|
|
(let ((got (previous-token)))
|
|
got)))
|
|
(cond ((and (keywordp token)
|
|
(typep (peek-token) 'token-keyword)
|
|
(eql token (name (peek-token))))
|
|
(prog1
|
|
(pop-token)
|
|
(format *parser-debug* "Matched a keyword token ~A (Context: ~A)~%"
|
|
token
|
|
(if (boundp '*current-matcher*)
|
|
*current-matcher*
|
|
nil))))
|
|
((and (not (keywordp token))
|
|
(typep (peek-token) token))
|
|
(prog1
|
|
(pop-token)
|
|
(format *parser-debug* "Matched a token ~A against type ~A (Context: ~A)~%"
|
|
(previous-token) token
|
|
(if (boundp '*current-matcher*)
|
|
*current-matcher*
|
|
nil))))
|
|
((null (peek-token))
|
|
(error-parser (source (previous-token))
|
|
"Ran out of tokens while parsing."))
|
|
(t nil))))
|
|
|
|
(defun consume-token (token &optional error-message)
|
|
(unless (match-token token)
|
|
(if (null error-message)
|
|
(error-parser (source (previous-token))
|
|
"Expected token of type ~A, got a ~A."
|
|
token (type-of (peek-token)))
|
|
(error-parser (source (previous-token))
|
|
"~A~%Got token ~A instead."
|
|
error-message (peek-token)
|
|
(source (previous-token))))))
|
|
|
|
(defun make-token-stream (tokens)
|
|
;; Appending an end-of-statement if one is missing so that we always
|
|
;; have something to latch onto when dealing with syntax errors at the end
|
|
;; of the program's text. It has the same source location as the actual
|
|
;; last token.
|
|
(let ((last (last tokens)))
|
|
(unless (typep (car last) 'token-end-of-statement)
|
|
(setf (cdr last) (cons (make-instance 'token-end-of-statement
|
|
:source (source (car last)))
|
|
nil))))
|
|
(assert (every (complement #'null) tokens))
|
|
(cons tokens nil))
|
|
|
|
;;; Matcher
|
|
|
|
(defparameter *syntax-matchers* (make-hash-table))
|
|
(defvar *syntax-source*)
|
|
(defvar *current-matcher*)
|
|
|
|
(defmacro define-syntax-matcher (name (&rest vars) &body body)
|
|
`(setf (gethash ',name *syntax-matchers*)
|
|
(lambda ()
|
|
(let (,@vars
|
|
(*syntax-source* (source (peek-token)))
|
|
(*current-matcher* ',name))
|
|
,@body))))
|
|
|
|
(defun %match-syntax (matcher-name)
|
|
(multiple-value-bind (function existsp)
|
|
(gethash matcher-name *syntax-matchers*)
|
|
(unless existsp
|
|
(error "No such syntax matcher ~A" matcher-name))
|
|
(format *parser-debug* "Entering matcher ~A~%" matcher-name)
|
|
(prog1
|
|
(funcall function)
|
|
(format *parser-debug* "Exiting matcher ~A~%" matcher-name))))
|
|
|
|
(defmacro match-syntax (matcher-name)
|
|
`(%match-syntax ',matcher-name))
|
|
|
|
;;; Syntax patterns
|
|
|
|
(defmacro define-binary-expr-matcher (name next &rest operator-node-pairs)
|
|
`(define-syntax-matcher ,name (left operator right)
|
|
(setf left (match-syntax ,next))
|
|
(loop :while (or ,@(loop :for (operator node) :in operator-node-pairs
|
|
:collect `(match-token ',operator)))
|
|
:do (setf operator (previous-token))
|
|
:do (setf right (match-syntax ,next))
|
|
:do (setf left (make-instance (etypecase operator
|
|
,@(mapcar (lambda (pair)
|
|
`(,(car pair) ',(cadr pair)))
|
|
operator-node-pairs))
|
|
:operator-token operator
|
|
:source *syntax-source*
|
|
:operands (list left right))))
|
|
left))
|
|
|
|
(define-syntax-matcher expression ()
|
|
(match-syntax equality))
|
|
|
|
(define-binary-expr-matcher equality comparison
|
|
(token-equal-equal node-expr-test-equal)
|
|
(token-not-equal node-expr-test-not-equal))
|
|
|
|
(define-binary-expr-matcher comparison term
|
|
(token-less-than node-expr-test-less)
|
|
(token-less-or-equal node-expr-test-less-or-equal)
|
|
(token-greater-than node-expr-test-greater)
|
|
(token-greater-or-equal node-expr-test-greater-or-equal))
|
|
|
|
(define-binary-expr-matcher term factor
|
|
(token-plus node-expr-plus)
|
|
(token-minus node-expr-minus))
|
|
|
|
(define-binary-expr-matcher factor unary
|
|
(token-star node-expr-multiply)
|
|
(token-slash node-expr-divide))
|
|
|
|
(define-syntax-matcher unary ()
|
|
(let ((unary-tokens '(token-not token-minus)))
|
|
(if (some #'match-token unary-tokens)
|
|
(make-instance (etypecase (previous-token)
|
|
(token-not 'node-expr-not)
|
|
(token-minus 'node-expr-negate))
|
|
:source *syntax-source*
|
|
:operator-token (previous-token)
|
|
:operand (match-syntax primary))
|
|
(match-syntax call))))
|
|
|
|
(define-syntax-matcher arglist (arguments)
|
|
(setf arguments (list (match-syntax expression)))
|
|
(loop :while (match-token 'token-comma)
|
|
:do (push (match-syntax expression) arguments))
|
|
(nreverse arguments))
|
|
|
|
(define-syntax-matcher call (name arguments)
|
|
(setf name (match-syntax primary))
|
|
(cond ((and (typep name 'token-name)
|
|
(match-token 'token-open-paren))
|
|
(cond ((match-token 'token-close-paren)
|
|
(setf arguments '()))
|
|
(t
|
|
(setf arguments (match-syntax arglist))
|
|
(consume-token 'token-close-paren
|
|
"Close parenthesis ')' is required to end function call argument list,
|
|
comma ',' is required to separate arguments.")))
|
|
(make-instance 'node-call
|
|
:source *syntax-source*
|
|
:callee (transform name 'asm-function)
|
|
:arguments arguments))
|
|
(t name)))
|
|
|
|
(define-syntax-matcher primary ()
|
|
(cond ((or (match-token 'token-name)
|
|
(match-token 'token-number))
|
|
(previous-token))
|
|
((match-token 'token-open-paren)
|
|
(let ((expr (match-syntax expression)))
|
|
(consume-token 'token-close-paren
|
|
"Closing parenthesis ')' required after grouping expression.")
|
|
(make-instance 'node-expr-grouping
|
|
:source *syntax-source*
|
|
:expression expr)))
|
|
((match-token 'token-end-of-statement)
|
|
(error-parser (source (previous-token))
|
|
"End-of-statement in the middle of an expression."))
|
|
(t (error-parser (source (peek-token))
|
|
"Unexpected token ~S in unexpected situation."
|
|
(peek-token)))))
|
|
|
|
(define-syntax-matcher assignment (l-value r-value)
|
|
(setf l-value (match-syntax expression))
|
|
(if (and (typep l-value 'token-name)
|
|
(match-token 'token-equal))
|
|
(progn
|
|
(setf r-value (match-syntax expression))
|
|
(make-instance 'node-assignment
|
|
:source *syntax-source*
|
|
:variable (transform l-value 'reference-variable)
|
|
:value r-value))
|
|
l-value))
|
|
|
|
(define-syntax-matcher statement ()
|
|
(cond ((match-token :if)
|
|
(prog1
|
|
(match-syntax if)
|
|
(unless (or (match-token 'token-end-of-statement)
|
|
(null (peek-token)))
|
|
(error-parser (source (previous-token))
|
|
"End-of-statement required after IF's THEN, got ~A"
|
|
(peek-token)))))
|
|
((match-token :for)
|
|
(prog1
|
|
(match-syntax for-do-times)
|
|
(unless (or (match-token 'token-end-of-statement)
|
|
(null (peek-token)))
|
|
(error-parser (source (previous-token))
|
|
"End-of-statement required after FOR's THEN, got ~A"
|
|
(peek-token)))))
|
|
((match-token 'token-end-of-statement)
|
|
;; Empty statement
|
|
(make-instance 'node-nop :source *syntax-source*))
|
|
(t
|
|
(let ((expr (match-syntax assignment)))
|
|
(consume-token 'token-end-of-statement
|
|
"Couldn't find end of expression.")
|
|
expr))))
|
|
|
|
(defun wire-up-statements (statements)
|
|
(loop :for statement :in statements
|
|
:for rest := (rest statements) :then (rest rest)
|
|
:for next := (first rest))
|
|
statements)
|
|
|
|
(define-syntax-matcher block (statements)
|
|
(loop :for statement := (match-syntax statement)
|
|
:unless (typep statement 'node-nop)
|
|
:do (push statement statements)
|
|
:when (null (peek-token))
|
|
:do (error-parser (source (previous-token))
|
|
"Unterminated block, check for missing ENDs and ELSEs.")
|
|
:until (or (match-token :end)
|
|
(match-token :else)))
|
|
(setf statements (wire-up-statements (nreverse statements)))
|
|
(make-instance 'node-block
|
|
:source *syntax-source*
|
|
:statements statements))
|
|
|
|
(defmacro match-syntax-pattern (syntax-list error-messages)
|
|
(append
|
|
'(progn)
|
|
(loop :for syntax :in syntax-list
|
|
:collect (cond ((symbolp syntax)
|
|
;; Match token, ignore everything except its existence
|
|
`(consume-token ',syntax
|
|
,(cadr (assoc syntax error-messages))))
|
|
((and (listp syntax) (symbolp (second syntax))
|
|
(not (keywordp (second syntax))))
|
|
;; Match a variable via a matcher
|
|
`(setf ,(first syntax) (match-syntax ,(second syntax))))
|
|
((and (listp syntax) (listp (second syntax))
|
|
(eql (first (second syntax)) :token))
|
|
;; Match a variable to a token
|
|
(let ((var (first syntax))
|
|
(desired-token (second (second syntax))))
|
|
`(progn
|
|
(setf ,var (match-token ',desired-token))
|
|
(when (null ,var)
|
|
(error-parser (source (previous-token))
|
|
,(cadr (assoc var error-messages)))))))))))
|
|
|
|
(define-syntax-matcher for-do-times (var n code)
|
|
(match-syntax-pattern
|
|
((var (:token token-name)) :do (n expression) :times
|
|
token-end-of-statement
|
|
(code block))
|
|
((:do "FOR requires a DO.")
|
|
(:times "FOR loop count must be followed by TIMES.")
|
|
(var "A loop index variable must be supplied.")))
|
|
(unless (eql (name (previous-token)) :end)
|
|
(error-parser (source (previous-token))
|
|
"FOR DO TIMES loop may only end with END. Found ~A." (previous-token)))
|
|
(make-instance 'node-dotimes
|
|
:source *syntax-source*
|
|
:counter-ref (transform var 'reference-variable)
|
|
:stop-ref (transform n 'reference)
|
|
:loopee-node (transform code 'node)))
|
|
|
|
(define-syntax-matcher if (test then else)
|
|
(match-syntax-pattern
|
|
((test expression) :then token-end-of-statement (then block))
|
|
((token-end-of-statement "THEN must be followed by a line break")
|
|
(:then "IF condition must be followed by THEN")))
|
|
(cond ((eql (name (previous-token)) :end))
|
|
((eql (name (previous-token)) :else)
|
|
(setf else (match-syntax block)))
|
|
(t (error-parser (source (previous-token))
|
|
"IF block must be closed with END, found ~A."
|
|
(previous-token))))
|
|
(make-instance 'node-conditional
|
|
:source *syntax-source*
|
|
:test test
|
|
:then (transform then 'node)
|
|
:else (if (null else) nil
|
|
(transform else 'node))))
|
|
|
|
(define-syntax-matcher program (statements)
|
|
(loop :for statement := (match-syntax statement)
|
|
:unless (typep statement 'node-nop)
|
|
:do (push statement statements)
|
|
:until (null (peek-token)))
|
|
(setf statements (wire-up-statements (nreverse statements)))
|
|
(make-instance 'node-program
|
|
:source *syntax-source*
|
|
:statements statements))
|