Add user-side compiler's parser
It ingests tokens via a "token stream", and feeds out a node tree. It's a mostly handwritten recursive descent parser with the occasional Lisp macros for convenience.
This commit is contained in:
parent
f9f2694b96
commit
928cdfd318
3 changed files with 450 additions and 2 deletions
332
wip-duuqnd/user-side-compiler/parser.lisp
Normal file
332
wip-duuqnd/user-side-compiler/parser.lisp
Normal file
|
@ -0,0 +1,332 @@
|
|||
(in-package #:user-side-compiler)
|
||||
|
||||
(defvar *parser-debug* nil
|
||||
"Parser will try to print with FORMAT here. Bind it to a stream to see the
|
||||
parser's debug output.")
|
||||
|
||||
;;; Token stream
|
||||
|
||||
(defvar *token-stream*)
|
||||
(defvar *token-comment* nil)
|
||||
|
||||
(defun previous-token ()
|
||||
(cdr *token-stream*))
|
||||
|
||||
(defun peek-token ()
|
||||
(caar *token-stream*))
|
||||
|
||||
(defun match-token (token)
|
||||
(labels
|
||||
((pop-token ()
|
||||
(setf (cdr *token-stream*) (pop (car *token-stream*)))
|
||||
(let ((got (previous-token)))
|
||||
(if (comment-p (peek-token))
|
||||
(setf *token-comment* (pop (car *token-stream*)))
|
||||
(setf *token-comment* nil))
|
||||
got)))
|
||||
(cond ((and (keywordp token)
|
||||
(typep (peek-token) 'token-name)
|
||||
(eql token (name (peek-token))))
|
||||
(prog1
|
||||
(pop-token)
|
||||
(format *parser-debug* "Matched a keyword token ~A (Context: ~A)~%"
|
||||
token
|
||||
(if (boundp '*current-matcher*)
|
||||
*current-matcher*
|
||||
nil))))
|
||||
((and (not (keywordp token))
|
||||
(typep (peek-token) token))
|
||||
(prog1
|
||||
(pop-token)
|
||||
(format *parser-debug* "Matched a token ~A against type ~A (Context: ~A)~%"
|
||||
(previous-token) token
|
||||
(if (boundp '*current-matcher*)
|
||||
*current-matcher*
|
||||
nil))))
|
||||
(t nil))))
|
||||
|
||||
(defun consume-token (token &optional error-message)
|
||||
(unless (match-token token)
|
||||
(error (if (null error-message)
|
||||
(format nil "Expected token of type ~A, got a ~A.~%Source: ~A"
|
||||
token (type-of (peek-token))
|
||||
(source (previous-token)))
|
||||
(format nil "~A~%Got ~A.~%Source: ~A"
|
||||
error-message
|
||||
(previous-token)
|
||||
(source (previous-token)))))))
|
||||
|
||||
(defun make-token-stream (tokens)
|
||||
(cons tokens nil))
|
||||
|
||||
;;; Matcher
|
||||
|
||||
(defparameter *syntax-matchers* (make-hash-table))
|
||||
(defvar *syntax-source*)
|
||||
(defvar *current-matcher*)
|
||||
|
||||
(defmacro define-syntax-matcher (name (&rest vars) &body body)
|
||||
`(setf (gethash ',name *syntax-matchers*)
|
||||
(lambda ()
|
||||
(let (,@vars
|
||||
(*syntax-source* (source (peek-token)))
|
||||
(*current-matcher* ',name))
|
||||
,@body))))
|
||||
|
||||
(defun %match-syntax (matcher-name)
|
||||
(multiple-value-bind (function existsp)
|
||||
(gethash matcher-name *syntax-matchers*)
|
||||
(unless existsp
|
||||
(error "No such syntax matcher ~A" matcher-name))
|
||||
(format *parser-debug* "Entering matcher ~A~%" matcher-name)
|
||||
(prog1
|
||||
(funcall function)
|
||||
(format *parser-debug* "Exiting matcher ~A~%" matcher-name))))
|
||||
|
||||
(defmacro match-syntax (matcher-name)
|
||||
`(%match-syntax ',matcher-name))
|
||||
|
||||
;;; Syntax patterns
|
||||
|
||||
(defclass node-expr-equality (node-expr-binary)
|
||||
((%operator :accessor operator :initarg :operator)
|
||||
(%operands :accessor operands :initarg :operands)))
|
||||
|
||||
(defmacro define-binary-expr-matcher (name next &rest operator-node-pairs)
|
||||
`(define-syntax-matcher ,name (left operator right)
|
||||
(setf left (match-syntax ,next))
|
||||
(loop :while (or ,@(loop :for (operator node) :in operator-node-pairs
|
||||
:collect `(match-token ',operator)))
|
||||
:do (setf operator (previous-token))
|
||||
:do (setf right (match-syntax ,next))
|
||||
:do (setf left (make-instance (etypecase operator
|
||||
,@(mapcar (lambda (pair)
|
||||
`(,(car pair) ',(cadr pair)))
|
||||
operator-node-pairs))
|
||||
:operator-token operator
|
||||
:source *syntax-source*
|
||||
:operands (list left right))))
|
||||
left))
|
||||
|
||||
(define-syntax-matcher expression ()
|
||||
(match-syntax call))
|
||||
|
||||
(define-syntax-matcher arglist (arguments)
|
||||
(setf arguments (list (match-syntax expression)))
|
||||
(loop :while (match-token 'token-comma)
|
||||
:do (push (match-syntax expression) arguments))
|
||||
(nreverse arguments))
|
||||
|
||||
(define-syntax-matcher call (name arguments)
|
||||
(setf name (match-syntax equality))
|
||||
(cond ((and (typep name 'token-name)
|
||||
(match-token 'token-open-paren))
|
||||
(setf arguments (match-syntax arglist))
|
||||
(consume-token 'token-close-paren
|
||||
"Close parenthesis ')' is required to end function call argument list.")
|
||||
(make-instance 'node-call
|
||||
:source *syntax-source*
|
||||
:comment *token-comment*
|
||||
:callee (transform name 'asm-function)
|
||||
:arguments arguments))
|
||||
(t name)))
|
||||
|
||||
(define-binary-expr-matcher equality comparison
|
||||
(token-equal-equal node-expr-test-equal)
|
||||
(token-not-equal node-expr-test-not-equal))
|
||||
|
||||
(define-binary-expr-matcher comparison term
|
||||
;; TODO: The rest
|
||||
(token-less-than node-expr-test-equal)
|
||||
(token-greater-than node-expr-test-not-equal))
|
||||
|
||||
(define-binary-expr-matcher term factor
|
||||
(token-plus node-expr-plus)
|
||||
(token-minus node-expr-minus))
|
||||
|
||||
(define-binary-expr-matcher factor unary
|
||||
(token-star node-expr-multiply)
|
||||
(token-slash node-expr-divide))
|
||||
|
||||
(define-syntax-matcher unary ()
|
||||
(let ((unary-tokens '(token-not token-minus)))
|
||||
(if (some #'match-token unary-tokens)
|
||||
(make-instance (etypecase (previous-token)
|
||||
(token-not 'node-expr-not)
|
||||
(token-minus 'node-expr-negate))
|
||||
:source *syntax-source*
|
||||
:comment *token-comment*
|
||||
:operator-token (previous-token)
|
||||
:operand (match-syntax primary))
|
||||
(match-syntax primary))))
|
||||
|
||||
(define-syntax-matcher primary ()
|
||||
(cond ((or (match-token 'token-name)
|
||||
(match-token 'token-number))
|
||||
(previous-token))
|
||||
((match-token 'token-open-paren)
|
||||
(let ((expr (match-syntax expression)))
|
||||
(unless (match-token 'token-close-paren)
|
||||
;; TODO
|
||||
(error "Parser error: Closing parenthesis ')' required after expression."))
|
||||
(make-instance 'node-expr-grouping
|
||||
:source *syntax-source*
|
||||
:comment *token-comment*
|
||||
:expression expr)))
|
||||
((match-token 'token-end-of-statement)
|
||||
(error "End-of-statement in the middle of an expression"))
|
||||
(t (error "What token?! Is this?! (DAH DAH DAAAAH)~%~S~%" (peek-token)))))
|
||||
|
||||
(define-syntax-matcher assignment (l-value r-value)
|
||||
(setf l-value (match-syntax expression))
|
||||
(if (and (typep l-value 'token-name)
|
||||
(match-token 'token-equal))
|
||||
(progn
|
||||
(setf r-value (match-syntax expression))
|
||||
(make-instance 'node-assignment
|
||||
:source *syntax-source*
|
||||
:comment *token-comment*
|
||||
:variable (transform l-value 'reference-variable)
|
||||
:value r-value))
|
||||
l-value))
|
||||
|
||||
(define-syntax-matcher statement ()
|
||||
(cond ((match-token :if)
|
||||
(prog1
|
||||
(match-syntax if)
|
||||
(unless (or (match-token 'token-end-of-statement)
|
||||
(null (peek-token)))
|
||||
(error "EOS required after IF, got ~A"
|
||||
(peek-token)))))
|
||||
((match-token :for)
|
||||
(prog1
|
||||
(match-syntax for-do-times)
|
||||
(unless (or (match-token 'token-end-of-statement)
|
||||
(null (peek-token)))
|
||||
(error "EOS required after FOR, got ~A"
|
||||
(peek-token)))))
|
||||
((match-token 'token-end-of-statement)
|
||||
;; Empty statement, might contain comment
|
||||
(make-instance 'node-nop
|
||||
:source *syntax-source*
|
||||
:comment *token-comment*))
|
||||
(t
|
||||
(let ((expr (match-syntax assignment)))
|
||||
(setf (comment expr) *token-comment*)
|
||||
(consume-token 'token-end-of-statement
|
||||
(format nil "Couldn't find end of expression. ~A found instead."
|
||||
(peek-token)))
|
||||
expr))))
|
||||
|
||||
(defun wire-up-statements (statements)
|
||||
(loop :for statement :in statements
|
||||
:for rest := (rest statements) :then (rest rest)
|
||||
:for next := (first rest)
|
||||
:do (setf (normal-next statement) next))
|
||||
statements)
|
||||
|
||||
(define-syntax-matcher block (statements)
|
||||
(loop :for statement := (match-syntax statement)
|
||||
:unless (typep statement 'node-nop)
|
||||
:do (push statement statements)
|
||||
:until (or (match-token :end)
|
||||
(match-token :else)))
|
||||
(setf statements (wire-up-statements (nreverse statements)))
|
||||
(make-instance 'node-block
|
||||
:source *syntax-source*
|
||||
:statements statements
|
||||
:next (first statements)))
|
||||
|
||||
(defmacro match-syntax-pattern (syntax-list error-messages)
|
||||
(append
|
||||
'(progn)
|
||||
(loop :for syntax :in syntax-list
|
||||
:collect (cond ((symbolp syntax)
|
||||
;; Match token, ignore everything except its existence
|
||||
`(consume-token ',syntax
|
||||
,(cadr (assoc syntax error-messages))))
|
||||
((and (listp syntax) (symbolp (second syntax))
|
||||
(not (keywordp (second syntax))))
|
||||
;; Match a variable via a matcher
|
||||
`(setf ,(first syntax) (match-syntax ,(second syntax))))
|
||||
((and (listp syntax) (listp (second syntax))
|
||||
(eql (first (second syntax)) :token))
|
||||
;; Match a variable to a token
|
||||
(let ((var (first syntax))
|
||||
(desired-token (second (second syntax))))
|
||||
`(progn
|
||||
(setf ,var (match-token ',desired-token))
|
||||
(when (null ,var)
|
||||
(error ,(cadr (assoc var error-messages)))))))))))
|
||||
|
||||
(define-syntax-matcher for-do-times (var n code)
|
||||
(match-syntax-pattern
|
||||
((var (:token token-name)) :do (n expression) :times
|
||||
token-end-of-statement
|
||||
(code block))
|
||||
((:do "FOR requires a DO.")
|
||||
(:times "FOR loop count must be followed by TIMES.")
|
||||
(var "A loop index variable must be supplied.")))
|
||||
(unless (eql (name (previous-token)) :end)
|
||||
(error "FOR DO TIMES loop may only end with END. Found ~A." (previous-token)))
|
||||
(make-instance 'node-dotimes
|
||||
:source *syntax-source*
|
||||
:counter-ref (transform var 'reference-variable)
|
||||
:stop-ref (transform n 'reference)
|
||||
:loopee-node (transform code 'node)))
|
||||
|
||||
(defclass node-conditional (node)
|
||||
((%test :accessor test-node :initarg :test)
|
||||
(%then :accessor then-node :initarg :then)
|
||||
(%else :accessor else-node :initarg :else :initform nil)))
|
||||
|
||||
(defmethod node-nexts ((node node-conditional))
|
||||
(append (list (test-node node) (then-node node))
|
||||
(unless (null (else-node node))
|
||||
(list (else-node node)))
|
||||
(call-next-method)))
|
||||
|
||||
(define-syntax-matcher if (test then else)
|
||||
(match-syntax-pattern
|
||||
((test expression) :then token-end-of-statement (then block))
|
||||
((token-end-of-statement "THEN must be followed by a line break")
|
||||
(:then "IF condition must be followed by THEN")))
|
||||
(cond ((eql (name (previous-token)) :end))
|
||||
((eql (name (previous-token)) :else)
|
||||
(setf else (match-syntax block))
|
||||
#+(or)
|
||||
(unless (match-token :end)
|
||||
(error "An IF's ELSE block must be followed by END.~%Found ~A instead.~%ELSE IF is not yet supported."
|
||||
(peek-token))))
|
||||
(t (error "IF block ended very strangely, found ~A."
|
||||
(peek-token))))
|
||||
(make-instance 'node-conditional
|
||||
:source *syntax-source*
|
||||
:test (transform test 'node-expr)
|
||||
:then (transform then 'node)
|
||||
:else (if (null else) nil
|
||||
(transform else 'node))))
|
||||
|
||||
(define-syntax-matcher program (statements)
|
||||
(let ((*token-comment* nil))
|
||||
(loop :for statement := (match-syntax statement)
|
||||
:unless (typep statement 'node-nop)
|
||||
:do (push statement statements)
|
||||
:until (null (peek-token)))
|
||||
(setf statements (wire-up-statements (nreverse statements)))
|
||||
(make-instance 'node-program
|
||||
:source *syntax-source*
|
||||
:statements statements
|
||||
:next (first statements))))
|
||||
|
||||
;;; Testing jigs
|
||||
|
||||
(defmacro do-node-tree ((node start-node) &body body)
|
||||
(let ((stack (gensym))
|
||||
(current (gensym)))
|
||||
`(loop :with ,stack := (list (normal-next ,start-node))
|
||||
:for ,current := (pop ,stack)
|
||||
:until (null ,current)
|
||||
:do (setf ,stack (append (remove nil (node-nexts ,current)) ,stack))
|
||||
:do (let ((,node ,current))
|
||||
,@body))))
|
113
wip-duuqnd/user-side-compiler/s-print.lisp
Normal file
113
wip-duuqnd/user-side-compiler/s-print.lisp
Normal file
|
@ -0,0 +1,113 @@
|
|||
(in-package #:user-side-compiler)
|
||||
|
||||
;;; S-Printer (the S stood for S-expression, but that's no longer true).
|
||||
;;; It reprints a valid program out of a syntax tree. Start it by passing
|
||||
;;; S-PRINT a NODE-PROGRAM and a stream. Otherwise, be sure to properly bind
|
||||
;;; the dynamic variables.
|
||||
|
||||
(defparameter *s-print-indent-size* 4)
|
||||
(defvar *s-print-indent* 0)
|
||||
(defvar *s-print-indent-p* nil)
|
||||
|
||||
(defun s-indent (stream)
|
||||
(when *s-print-indent-p*
|
||||
(loop :repeat (* *s-print-indent* *s-print-indent-size*)
|
||||
:do (write-char #\Space stream))))
|
||||
|
||||
(defgeneric s-print (node stream))
|
||||
|
||||
(defmethod s-print ((node null) stream)
|
||||
(format stream "~%~%END OF FILE~%"))
|
||||
|
||||
(defmethod s-print ((node node) stream)
|
||||
(s-print (normal-next node)))
|
||||
|
||||
(defmethod s-print :around (node (stream symbol))
|
||||
(cond ((eql stream t) (s-print node *standard-output*))
|
||||
((eql stream nil) (with-output-to-string (s)
|
||||
(s-print node s)))
|
||||
(t (error "~A is not a valid stream. Must be T, NIL, or a STREAM."
|
||||
stream))))
|
||||
|
||||
(defmethod s-print ((node node-expr-grouping) stream)
|
||||
(format stream "(~A)" (s-print (expression node) nil)))
|
||||
|
||||
(defmethod s-print ((node node-expr) stream)
|
||||
(let ((operator (car (rassoc (class-name (class-of (operator-token node)))
|
||||
*operator-token-classes*
|
||||
:key #'car))))
|
||||
(assert (= (length (operands node)) 2))
|
||||
(format stream "~A ~A ~A"
|
||||
(s-print (first (operands node)) nil)
|
||||
operator
|
||||
(s-print (second (operands node)) nil))))
|
||||
|
||||
(defmethod s-print ((node node-assignment) stream)
|
||||
(s-indent stream)
|
||||
(format stream "~A = ~A~%"
|
||||
(s-print (dst-variable node) nil)
|
||||
(s-print (value node) nil)))
|
||||
|
||||
(defmethod s-print ((node node-conditional) stream)
|
||||
(s-indent stream)
|
||||
(format stream "IF ~A THEN~%~A"
|
||||
(s-print (test-node node) nil)
|
||||
(prog2
|
||||
(incf *s-print-indent*)
|
||||
(s-print (then-node node) nil)
|
||||
(decf *s-print-indent*)))
|
||||
(s-indent stream)
|
||||
(if (null (else-node node))
|
||||
(format stream "END~%")
|
||||
(progn
|
||||
(format stream "ELSE~%~A"
|
||||
(prog2
|
||||
(incf *s-print-indent*)
|
||||
(s-print (else-node node) nil)
|
||||
(decf *s-print-indent*)))
|
||||
(s-indent stream)
|
||||
(format stream "END~%"))))
|
||||
|
||||
(defmethod s-print ((node reference-variable) stream)
|
||||
(write-string (name node) stream))
|
||||
|
||||
(defmethod s-print ((node reference-constant) stream)
|
||||
(format stream "~D" (ref-value node)))
|
||||
|
||||
(defmethod s-print ((node node-dotimes) stream)
|
||||
(s-indent stream)
|
||||
(format stream "FOR ~A DO ~A TIMES~%"
|
||||
(s-print (counter-ref node) nil)
|
||||
(s-print (stop-ref node) nil))
|
||||
(incf *s-print-indent*)
|
||||
(s-print (loopee-node node) stream)
|
||||
(decf *s-print-indent*)
|
||||
(s-indent stream)
|
||||
(format stream "END~%"))
|
||||
|
||||
(defmethod s-print ((node token-name) stream)
|
||||
(format stream "~A" (name node)))
|
||||
|
||||
(defmethod s-print ((node token-number) stream)
|
||||
(format stream "~A" (value node)))
|
||||
|
||||
(defmethod s-print ((node node-block) stream)
|
||||
(dolist (s (statements node))
|
||||
(s-print s stream)))
|
||||
|
||||
(defmethod s-print ((node node-program) stream)
|
||||
(let ((*s-print-indent* 0)
|
||||
(*s-print-indent-p* t))
|
||||
(dolist (s (statements node))
|
||||
(s-print s stream))))
|
||||
|
||||
(defmethod s-print ((node node-call) stream)
|
||||
(s-indent stream)
|
||||
(format stream "~A(~A~{, ~A~})~%"
|
||||
(s-print (callee node) nil)
|
||||
(s-print (first (arguments node)) nil)
|
||||
(mapcar (lambda (a) (s-print a nil))
|
||||
(rest (arguments node)))))
|
||||
|
||||
(defmethod s-print ((node asm-function) stream)
|
||||
(format stream "~A" (string-upcase (name node))))
|
|
@ -2,13 +2,16 @@
|
|||
|
||||
(defsystem #:user-side-compiler
|
||||
:serial t
|
||||
:depends-on (#:closer-mop)
|
||||
:components
|
||||
((:file "package")
|
||||
(:file "transform")
|
||||
(:file "reference")
|
||||
(:file "symbol-table")
|
||||
(:file "asm-function")
|
||||
(:file "tokenizer")
|
||||
(:file "asm-function")
|
||||
(:file "parser")
|
||||
(:file "label")
|
||||
(:file "high-level")
|
||||
(:file "instruction")))
|
||||
(:file "instruction")
|
||||
(:file "s-print")))
|
||||
|
|
Loading…
Add table
Reference in a new issue