diff --git a/wip-duuqnd/user-side-compiler/parser.lisp b/wip-duuqnd/user-side-compiler/parser.lisp new file mode 100644 index 0000000..1e0ad73 --- /dev/null +++ b/wip-duuqnd/user-side-compiler/parser.lisp @@ -0,0 +1,332 @@ +(in-package #:user-side-compiler) + +(defvar *parser-debug* nil + "Parser will try to print with FORMAT here. Bind it to a stream to see the +parser's debug output.") + +;;; Token stream + +(defvar *token-stream*) +(defvar *token-comment* nil) + +(defun previous-token () + (cdr *token-stream*)) + +(defun peek-token () + (caar *token-stream*)) + +(defun match-token (token) + (labels + ((pop-token () + (setf (cdr *token-stream*) (pop (car *token-stream*))) + (let ((got (previous-token))) + (if (comment-p (peek-token)) + (setf *token-comment* (pop (car *token-stream*))) + (setf *token-comment* nil)) + got))) + (cond ((and (keywordp token) + (typep (peek-token) 'token-name) + (eql token (name (peek-token)))) + (prog1 + (pop-token) + (format *parser-debug* "Matched a keyword token ~A (Context: ~A)~%" + token + (if (boundp '*current-matcher*) + *current-matcher* + nil)))) + ((and (not (keywordp token)) + (typep (peek-token) token)) + (prog1 + (pop-token) + (format *parser-debug* "Matched a token ~A against type ~A (Context: ~A)~%" + (previous-token) token + (if (boundp '*current-matcher*) + *current-matcher* + nil)))) + (t nil)))) + +(defun consume-token (token &optional error-message) + (unless (match-token token) + (error (if (null error-message) + (format nil "Expected token of type ~A, got a ~A.~%Source: ~A" + token (type-of (peek-token)) + (source (previous-token))) + (format nil "~A~%Got ~A.~%Source: ~A" + error-message + (previous-token) + (source (previous-token))))))) + +(defun make-token-stream (tokens) + (cons tokens nil)) + +;;; Matcher + +(defparameter *syntax-matchers* (make-hash-table)) +(defvar *syntax-source*) +(defvar *current-matcher*) + +(defmacro define-syntax-matcher (name (&rest vars) &body body) + `(setf (gethash ',name *syntax-matchers*) + (lambda () + (let (,@vars + (*syntax-source* (source (peek-token))) + (*current-matcher* ',name)) + ,@body)))) + +(defun %match-syntax (matcher-name) + (multiple-value-bind (function existsp) + (gethash matcher-name *syntax-matchers*) + (unless existsp + (error "No such syntax matcher ~A" matcher-name)) + (format *parser-debug* "Entering matcher ~A~%" matcher-name) + (prog1 + (funcall function) + (format *parser-debug* "Exiting matcher ~A~%" matcher-name)))) + +(defmacro match-syntax (matcher-name) + `(%match-syntax ',matcher-name)) + +;;; Syntax patterns + +(defclass node-expr-equality (node-expr-binary) + ((%operator :accessor operator :initarg :operator) + (%operands :accessor operands :initarg :operands))) + +(defmacro define-binary-expr-matcher (name next &rest operator-node-pairs) + `(define-syntax-matcher ,name (left operator right) + (setf left (match-syntax ,next)) + (loop :while (or ,@(loop :for (operator node) :in operator-node-pairs + :collect `(match-token ',operator))) + :do (setf operator (previous-token)) + :do (setf right (match-syntax ,next)) + :do (setf left (make-instance (etypecase operator + ,@(mapcar (lambda (pair) + `(,(car pair) ',(cadr pair))) + operator-node-pairs)) + :operator-token operator + :source *syntax-source* + :operands (list left right)))) + left)) + +(define-syntax-matcher expression () + (match-syntax call)) + +(define-syntax-matcher arglist (arguments) + (setf arguments (list (match-syntax expression))) + (loop :while (match-token 'token-comma) + :do (push (match-syntax expression) arguments)) + (nreverse arguments)) + +(define-syntax-matcher call (name arguments) + (setf name (match-syntax equality)) + (cond ((and (typep name 'token-name) + (match-token 'token-open-paren)) + (setf arguments (match-syntax arglist)) + (consume-token 'token-close-paren + "Close parenthesis ')' is required to end function call argument list.") + (make-instance 'node-call + :source *syntax-source* + :comment *token-comment* + :callee (transform name 'asm-function) + :arguments arguments)) + (t name))) + +(define-binary-expr-matcher equality comparison + (token-equal-equal node-expr-test-equal) + (token-not-equal node-expr-test-not-equal)) + +(define-binary-expr-matcher comparison term + ;; TODO: The rest + (token-less-than node-expr-test-equal) + (token-greater-than node-expr-test-not-equal)) + +(define-binary-expr-matcher term factor + (token-plus node-expr-plus) + (token-minus node-expr-minus)) + +(define-binary-expr-matcher factor unary + (token-star node-expr-multiply) + (token-slash node-expr-divide)) + +(define-syntax-matcher unary () + (let ((unary-tokens '(token-not token-minus))) + (if (some #'match-token unary-tokens) + (make-instance (etypecase (previous-token) + (token-not 'node-expr-not) + (token-minus 'node-expr-negate)) + :source *syntax-source* + :comment *token-comment* + :operator-token (previous-token) + :operand (match-syntax primary)) + (match-syntax primary)))) + +(define-syntax-matcher primary () + (cond ((or (match-token 'token-name) + (match-token 'token-number)) + (previous-token)) + ((match-token 'token-open-paren) + (let ((expr (match-syntax expression))) + (unless (match-token 'token-close-paren) + ;; TODO + (error "Parser error: Closing parenthesis ')' required after expression.")) + (make-instance 'node-expr-grouping + :source *syntax-source* + :comment *token-comment* + :expression expr))) + ((match-token 'token-end-of-statement) + (error "End-of-statement in the middle of an expression")) + (t (error "What token?! Is this?! (DAH DAH DAAAAH)~%~S~%" (peek-token))))) + +(define-syntax-matcher assignment (l-value r-value) + (setf l-value (match-syntax expression)) + (if (and (typep l-value 'token-name) + (match-token 'token-equal)) + (progn + (setf r-value (match-syntax expression)) + (make-instance 'node-assignment + :source *syntax-source* + :comment *token-comment* + :variable (transform l-value 'reference-variable) + :value r-value)) + l-value)) + +(define-syntax-matcher statement () + (cond ((match-token :if) + (prog1 + (match-syntax if) + (unless (or (match-token 'token-end-of-statement) + (null (peek-token))) + (error "EOS required after IF, got ~A" + (peek-token))))) + ((match-token :for) + (prog1 + (match-syntax for-do-times) + (unless (or (match-token 'token-end-of-statement) + (null (peek-token))) + (error "EOS required after FOR, got ~A" + (peek-token))))) + ((match-token 'token-end-of-statement) + ;; Empty statement, might contain comment + (make-instance 'node-nop + :source *syntax-source* + :comment *token-comment*)) + (t + (let ((expr (match-syntax assignment))) + (setf (comment expr) *token-comment*) + (consume-token 'token-end-of-statement + (format nil "Couldn't find end of expression. ~A found instead." + (peek-token))) + expr)))) + +(defun wire-up-statements (statements) + (loop :for statement :in statements + :for rest := (rest statements) :then (rest rest) + :for next := (first rest) + :do (setf (normal-next statement) next)) + statements) + +(define-syntax-matcher block (statements) + (loop :for statement := (match-syntax statement) + :unless (typep statement 'node-nop) + :do (push statement statements) + :until (or (match-token :end) + (match-token :else))) + (setf statements (wire-up-statements (nreverse statements))) + (make-instance 'node-block + :source *syntax-source* + :statements statements + :next (first statements))) + +(defmacro match-syntax-pattern (syntax-list error-messages) + (append + '(progn) + (loop :for syntax :in syntax-list + :collect (cond ((symbolp syntax) + ;; Match token, ignore everything except its existence + `(consume-token ',syntax + ,(cadr (assoc syntax error-messages)))) + ((and (listp syntax) (symbolp (second syntax)) + (not (keywordp (second syntax)))) + ;; Match a variable via a matcher + `(setf ,(first syntax) (match-syntax ,(second syntax)))) + ((and (listp syntax) (listp (second syntax)) + (eql (first (second syntax)) :token)) + ;; Match a variable to a token + (let ((var (first syntax)) + (desired-token (second (second syntax)))) + `(progn + (setf ,var (match-token ',desired-token)) + (when (null ,var) + (error ,(cadr (assoc var error-messages))))))))))) + +(define-syntax-matcher for-do-times (var n code) + (match-syntax-pattern + ((var (:token token-name)) :do (n expression) :times + token-end-of-statement + (code block)) + ((:do "FOR requires a DO.") + (:times "FOR loop count must be followed by TIMES.") + (var "A loop index variable must be supplied."))) + (unless (eql (name (previous-token)) :end) + (error "FOR DO TIMES loop may only end with END. Found ~A." (previous-token))) + (make-instance 'node-dotimes + :source *syntax-source* + :counter-ref (transform var 'reference-variable) + :stop-ref (transform n 'reference) + :loopee-node (transform code 'node))) + +(defclass node-conditional (node) + ((%test :accessor test-node :initarg :test) + (%then :accessor then-node :initarg :then) + (%else :accessor else-node :initarg :else :initform nil))) + +(defmethod node-nexts ((node node-conditional)) + (append (list (test-node node) (then-node node)) + (unless (null (else-node node)) + (list (else-node node))) + (call-next-method))) + +(define-syntax-matcher if (test then else) + (match-syntax-pattern + ((test expression) :then token-end-of-statement (then block)) + ((token-end-of-statement "THEN must be followed by a line break") + (:then "IF condition must be followed by THEN"))) + (cond ((eql (name (previous-token)) :end)) + ((eql (name (previous-token)) :else) + (setf else (match-syntax block)) + #+(or) + (unless (match-token :end) + (error "An IF's ELSE block must be followed by END.~%Found ~A instead.~%ELSE IF is not yet supported." + (peek-token)))) + (t (error "IF block ended very strangely, found ~A." + (peek-token)))) + (make-instance 'node-conditional + :source *syntax-source* + :test (transform test 'node-expr) + :then (transform then 'node) + :else (if (null else) nil + (transform else 'node)))) + +(define-syntax-matcher program (statements) + (let ((*token-comment* nil)) + (loop :for statement := (match-syntax statement) + :unless (typep statement 'node-nop) + :do (push statement statements) + :until (null (peek-token))) + (setf statements (wire-up-statements (nreverse statements))) + (make-instance 'node-program + :source *syntax-source* + :statements statements + :next (first statements)))) + +;;; Testing jigs + +(defmacro do-node-tree ((node start-node) &body body) + (let ((stack (gensym)) + (current (gensym))) + `(loop :with ,stack := (list (normal-next ,start-node)) + :for ,current := (pop ,stack) + :until (null ,current) + :do (setf ,stack (append (remove nil (node-nexts ,current)) ,stack)) + :do (let ((,node ,current)) + ,@body)))) diff --git a/wip-duuqnd/user-side-compiler/s-print.lisp b/wip-duuqnd/user-side-compiler/s-print.lisp new file mode 100644 index 0000000..ae21009 --- /dev/null +++ b/wip-duuqnd/user-side-compiler/s-print.lisp @@ -0,0 +1,113 @@ +(in-package #:user-side-compiler) + +;;; S-Printer (the S stood for S-expression, but that's no longer true). +;;; It reprints a valid program out of a syntax tree. Start it by passing +;;; S-PRINT a NODE-PROGRAM and a stream. Otherwise, be sure to properly bind +;;; the dynamic variables. + +(defparameter *s-print-indent-size* 4) +(defvar *s-print-indent* 0) +(defvar *s-print-indent-p* nil) + +(defun s-indent (stream) + (when *s-print-indent-p* + (loop :repeat (* *s-print-indent* *s-print-indent-size*) + :do (write-char #\Space stream)))) + +(defgeneric s-print (node stream)) + +(defmethod s-print ((node null) stream) + (format stream "~%~%END OF FILE~%")) + +(defmethod s-print ((node node) stream) + (s-print (normal-next node))) + +(defmethod s-print :around (node (stream symbol)) + (cond ((eql stream t) (s-print node *standard-output*)) + ((eql stream nil) (with-output-to-string (s) + (s-print node s))) + (t (error "~A is not a valid stream. Must be T, NIL, or a STREAM." + stream)))) + +(defmethod s-print ((node node-expr-grouping) stream) + (format stream "(~A)" (s-print (expression node) nil))) + +(defmethod s-print ((node node-expr) stream) + (let ((operator (car (rassoc (class-name (class-of (operator-token node))) + *operator-token-classes* + :key #'car)))) + (assert (= (length (operands node)) 2)) + (format stream "~A ~A ~A" + (s-print (first (operands node)) nil) + operator + (s-print (second (operands node)) nil)))) + +(defmethod s-print ((node node-assignment) stream) + (s-indent stream) + (format stream "~A = ~A~%" + (s-print (dst-variable node) nil) + (s-print (value node) nil))) + +(defmethod s-print ((node node-conditional) stream) + (s-indent stream) + (format stream "IF ~A THEN~%~A" + (s-print (test-node node) nil) + (prog2 + (incf *s-print-indent*) + (s-print (then-node node) nil) + (decf *s-print-indent*))) + (s-indent stream) + (if (null (else-node node)) + (format stream "END~%") + (progn + (format stream "ELSE~%~A" + (prog2 + (incf *s-print-indent*) + (s-print (else-node node) nil) + (decf *s-print-indent*))) + (s-indent stream) + (format stream "END~%")))) + +(defmethod s-print ((node reference-variable) stream) + (write-string (name node) stream)) + +(defmethod s-print ((node reference-constant) stream) + (format stream "~D" (ref-value node))) + +(defmethod s-print ((node node-dotimes) stream) + (s-indent stream) + (format stream "FOR ~A DO ~A TIMES~%" + (s-print (counter-ref node) nil) + (s-print (stop-ref node) nil)) + (incf *s-print-indent*) + (s-print (loopee-node node) stream) + (decf *s-print-indent*) + (s-indent stream) + (format stream "END~%")) + +(defmethod s-print ((node token-name) stream) + (format stream "~A" (name node))) + +(defmethod s-print ((node token-number) stream) + (format stream "~A" (value node))) + +(defmethod s-print ((node node-block) stream) + (dolist (s (statements node)) + (s-print s stream))) + +(defmethod s-print ((node node-program) stream) + (let ((*s-print-indent* 0) + (*s-print-indent-p* t)) + (dolist (s (statements node)) + (s-print s stream)))) + +(defmethod s-print ((node node-call) stream) + (s-indent stream) + (format stream "~A(~A~{, ~A~})~%" + (s-print (callee node) nil) + (s-print (first (arguments node)) nil) + (mapcar (lambda (a) (s-print a nil)) + (rest (arguments node))))) + +(defmethod s-print ((node asm-function) stream) + (format stream "~A" (string-upcase (name node)))) diff --git a/wip-duuqnd/user-side-compiler/user-side-compiler.asd b/wip-duuqnd/user-side-compiler/user-side-compiler.asd index 1a65611..06ec631 100644 --- a/wip-duuqnd/user-side-compiler/user-side-compiler.asd +++ b/wip-duuqnd/user-side-compiler/user-side-compiler.asd @@ -2,13 +2,16 @@ (defsystem #:user-side-compiler :serial t + :depends-on (#:closer-mop) :components ((:file "package") (:file "transform") (:file "reference") (:file "symbol-table") - (:file "asm-function") (:file "tokenizer") + (:file "asm-function") + (:file "parser") (:file "label") (:file "high-level") - (:file "instruction"))) + (:file "instruction") + (:file "s-print")))