From 4d6ad30eae7e8db438dd2e4d27c268e07689a848 Mon Sep 17 00:00:00 2001 From: John Lorentzson Date: Mon, 7 Jul 2025 19:44:57 +0200 Subject: [PATCH] Add some proper error handling to user-side compiler --- .../backend/code-generator.lisp | 2 +- .../user-side-compiler/error-handling.lisp | 47 +++++++++++ wip-duuqnd/user-side-compiler/parser.lisp | 78 ++++++++++++------- wip-duuqnd/user-side-compiler/tokenizer.lisp | 17 +++- .../user-side-compiler/user-side-compiler.asd | 1 + 5 files changed, 112 insertions(+), 33 deletions(-) create mode 100644 wip-duuqnd/user-side-compiler/error-handling.lisp diff --git a/wip-duuqnd/user-side-compiler/backend/code-generator.lisp b/wip-duuqnd/user-side-compiler/backend/code-generator.lisp index b6c4d3f..97e9f95 100644 --- a/wip-duuqnd/user-side-compiler/backend/code-generator.lisp +++ b/wip-duuqnd/user-side-compiler/backend/code-generator.lisp @@ -368,7 +368,7 @@ is the responsibility of the pre-assembly compilation step." (defun quick-and-dirty-test-compile (text &key print-ir-p print-alloc-p make-asm-p) (with-input-from-string (source-stream text) - (let ((*token-stream* (make-token-stream (tokenize source-stream)))) + (let ((*token-stream* (make-token-stream (tokenize source-stream text)))) (let ((rb (with-compilation-setup (root-block builder) (compile-node (match-syntax program) builder) root-block)) diff --git a/wip-duuqnd/user-side-compiler/error-handling.lisp b/wip-duuqnd/user-side-compiler/error-handling.lisp new file mode 100644 index 0000000..cc41c17 --- /dev/null +++ b/wip-duuqnd/user-side-compiler/error-handling.lisp @@ -0,0 +1,47 @@ +(in-package #:user-side-compiler) + +(defun string-nth-line (string line) + (assert (> line 0)) + (let ((start (loop :repeat line + :for start := 0 :then (1+ (position #\Newline string + :start start)) + :finally (return start)))) + (subseq string + start + (position #\Newline string :start (1+ start))))) + +(defun point-out-source (source) + (if (null source) + "No source info." + (destructuring-bind (text line . column) + source + (cond ((null text) + (format nil "Line ~D, column ~D" line column)) + ((stringp text) + (with-output-to-string (output) + (format output "~%Found on line ~D, column ~D:~%~A~%" + line column + (string-nth-line text line)) + (loop :repeat column :do (write-char #\Space output)) + (write-char #\^ output))))))) + +(define-condition usc-error (error) + ((%source :accessor source :initarg :source :initform nil) + (%format-control :accessor format-control :initarg :format-control) + (%format-arguments :accessor format-arguments :initarg :format-arguments :initform '()) + (%context-string :reader context-string :initform "User-side compiler error at unknown stage:")) + (:report (lambda (c s) + (format s "~A~%~A~%~A" (context-string c) + (apply #'format nil (format-control c) (format-arguments c)) + (point-out-source (source c)))))) + +(define-condition tokenizer-error (usc-error) + ((%context-string :initform "Error in tokenizer:"))) + +(define-condition parser-error (usc-error) + ()) + + +(defun error-parser (source format-control &rest format-arguments) + (error 'parser-error :source source :format-control format-control + :format-arguments format-arguments)) diff --git a/wip-duuqnd/user-side-compiler/parser.lisp b/wip-duuqnd/user-side-compiler/parser.lisp index f9583b0..28e6b89 100644 --- a/wip-duuqnd/user-side-compiler/parser.lisp +++ b/wip-duuqnd/user-side-compiler/parser.lisp @@ -25,7 +25,7 @@ parser's debug output.") (setf *token-comment* nil)) got))) (cond ((and (keywordp token) - (typep (peek-token) 'token-name) + (typep (peek-token) 'token-keyword) (eql token (name (peek-token)))) (prog1 (pop-token) @@ -43,20 +43,33 @@ parser's debug output.") (if (boundp '*current-matcher*) *current-matcher* nil)))) + ((null (peek-token)) + (error-parser (source (previous-token)) + "Ran out of tokens while parsing.")) (t nil)))) (defun consume-token (token &optional error-message) (unless (match-token token) - (error (if (null error-message) - (format nil "Expected token of type ~A, got a ~A.~%Source: ~A" - token (type-of (peek-token)) - (source (previous-token))) - (format nil "~A~%Got ~A.~%Source: ~A" - error-message - (previous-token) - (source (previous-token))))))) + (if (null error-message) + (error-parser (source (previous-token)) + "Expected token of type ~A, got a ~A." + token (type-of (peek-token))) + (error-parser (source (previous-token)) + "~A~%Got token ~A instead." + error-message (previous-token) + (source (previous-token)))))) (defun make-token-stream (tokens) + ;; Appending an end-of-statement if one is missing so that we always + ;; have something to latch onto when dealing with syntax errors at the end + ;; of the program's text. It has the same source location as the actual + ;; last token. + (let ((last (last tokens))) + (unless (typep (car last) 'token-end-of-statement) + (setf (cdr last) (cons (make-instance 'token-end-of-statement + :source (source (car last))) + nil)))) + (assert (every (complement #'null) tokens)) (cons tokens nil)) ;;; Matcher @@ -152,7 +165,8 @@ parser's debug output.") (t (setf arguments (match-syntax arglist)) (consume-token 'token-close-paren - "Close parenthesis ')' is required to end function call argument list."))) + "Close parenthesis ')' is required to end function call argument list, +comma ',' is required to separate arguments."))) (make-instance 'node-call :source *syntax-source* :comment *token-comment* @@ -166,16 +180,18 @@ parser's debug output.") (previous-token)) ((match-token 'token-open-paren) (let ((expr (match-syntax expression))) - (unless (match-token 'token-close-paren) - ;; TODO - (error "Parser error: Closing parenthesis ')' required after expression.")) + (consume-token 'token-close-paren + "Closing parenthesis ')' required after grouping expression.") (make-instance 'node-expr-grouping :source *syntax-source* :comment *token-comment* :expression expr))) ((match-token 'token-end-of-statement) - (error "End-of-statement in the middle of an expression")) - (t (error "What token?! Is this?! (DAH DAH DAAAAH)~%~S~%" (peek-token))))) + (error-parser (source (previous-token)) + "End-of-statement in the middle of an expression.")) + (t (error-parser (source (peek-token)) + "Unexpected token ~S in unexpected situation." + (peek-token))))) (define-syntax-matcher assignment (l-value r-value) (setf l-value (match-syntax expression)) @@ -196,15 +212,17 @@ parser's debug output.") (match-syntax if) (unless (or (match-token 'token-end-of-statement) (null (peek-token))) - (error "EOS required after IF, got ~A" - (peek-token))))) + (error-parser (source (previous-token)) + "End-of-statement required after IF's THEN, got ~A" + (peek-token))))) ((match-token :for) (prog1 (match-syntax for-do-times) (unless (or (match-token 'token-end-of-statement) (null (peek-token))) - (error "EOS required after FOR, got ~A" - (peek-token))))) + (error-parser (source (previous-token)) + "End-of-statement required after FOR's THEN, got ~A" + (peek-token))))) ((match-token 'token-end-of-statement) ;; Empty statement, might contain comment (make-instance 'node-nop @@ -212,7 +230,7 @@ parser's debug output.") :comment *token-comment*)) (t (let ((expr (match-syntax assignment))) - (setf (comment expr) *token-comment*) + ;;(setf (comment expr) *token-comment*) (consume-token 'token-end-of-statement (format nil "Couldn't find end of expression. ~A found instead." (peek-token))) @@ -229,6 +247,9 @@ parser's debug output.") (loop :for statement := (match-syntax statement) :unless (typep statement 'node-nop) :do (push statement statements) + :when (null (peek-token)) + :do (error-parser (source (previous-token)) + "Unterminated block, check for missing ENDs and ELSEs.") :until (or (match-token :end) (match-token :else))) (setf statements (wire-up-statements (nreverse statements))) @@ -257,7 +278,8 @@ parser's debug output.") `(progn (setf ,var (match-token ',desired-token)) (when (null ,var) - (error ,(cadr (assoc var error-messages))))))))))) + (error-parser (source (previous-token)) + ,(cadr (assoc var error-messages))))))))))) (define-syntax-matcher for-do-times (var n code) (match-syntax-pattern @@ -268,7 +290,8 @@ parser's debug output.") (:times "FOR loop count must be followed by TIMES.") (var "A loop index variable must be supplied."))) (unless (eql (name (previous-token)) :end) - (error "FOR DO TIMES loop may only end with END. Found ~A." (previous-token))) + (error-parser (source (previous-token)) + "FOR DO TIMES loop may only end with END. Found ~A." (previous-token))) (make-instance 'node-dotimes :source *syntax-source* :counter-ref (transform var 'reference-variable) @@ -288,13 +311,10 @@ parser's debug output.") (:then "IF condition must be followed by THEN"))) (cond ((eql (name (previous-token)) :end)) ((eql (name (previous-token)) :else) - (setf else (match-syntax block)) - #+(or) - (unless (match-token :end) - (error "An IF's ELSE block must be followed by END.~%Found ~A instead.~%ELSE IF is not yet supported." - (peek-token)))) - (t (error "IF block ended very strangely, found ~A." - (peek-token)))) + (setf else (match-syntax block))) + (t (error-parser (source (previous-token)) + "IF block must be closed with END, found ~A." + (previous-token)))) (make-instance 'node-conditional :source *syntax-source* :test test diff --git a/wip-duuqnd/user-side-compiler/tokenizer.lisp b/wip-duuqnd/user-side-compiler/tokenizer.lisp index df65635..dc9b651 100644 --- a/wip-duuqnd/user-side-compiler/tokenizer.lisp +++ b/wip-duuqnd/user-side-compiler/tokenizer.lisp @@ -26,6 +26,14 @@ reading immediately. Should be a subset of *SPECIAL-TOKEN-CHARS*.") (defclass token-name (token) ((%name :accessor name :initarg :name))) +(defun valid-name-p (string) + (assert (stringp string)) + (and (not (zerop (length string))) + (alpha-char-p (char string 0)) + (loop :for char :across (subseq string 1) + :always (or (alphanumericp char) + (char= char #\_))))) + (defmethod print-object ((object token-name) stream) (print-unreadable-object (object stream :type t :identity t) (when (slot-boundp object '%name) @@ -113,7 +121,7 @@ reading immediately. Should be a subset of *SPECIAL-TOKEN-CHARS*.") ;;; Keyword tokens -(defclass token-keyword (token-name) +(defclass token-keyword (token) ((%name :accessor name :initarg :name))) (defparameter *syntax-keywords* @@ -140,7 +148,10 @@ reading immediately. Should be a subset of *SPECIAL-TOKEN-CHARS*.") :source source :text (if (null start) "" (subseq text start))))) - (t (make-instance 'token-name :source source :name text)))))) + ((valid-name-p text) + (make-instance 'token-name :source source :name text)) + (t (error 'tokenizer-error :source source :format-arguments (list text) + :format-control "Invalid name or unknown token \"~A\".")))))) (defun tokenize (stream &optional source) (let ((token-text-buffer (make-array 32 :element-type 'character :fill-pointer 0 :adjustable t)) @@ -155,7 +166,7 @@ reading immediately. Should be a subset of *SPECIAL-TOKEN-CHARS*.") (push (text-to-token (copy-seq token-text-buffer) token-source) tokens) (setf (fill-pointer token-text-buffer) 0 new-token-p t - token-source (cons source (file-position stream))))) + token-source (cons source (cons line column))))) (loop :for char := (read-char stream) :for next := (peek-char nil stream nil :eof) :do (incf column) diff --git a/wip-duuqnd/user-side-compiler/user-side-compiler.asd b/wip-duuqnd/user-side-compiler/user-side-compiler.asd index 05f3cf0..b82e539 100644 --- a/wip-duuqnd/user-side-compiler/user-side-compiler.asd +++ b/wip-duuqnd/user-side-compiler/user-side-compiler.asd @@ -5,6 +5,7 @@ :depends-on (#:closer-mop #:alexandria) :components ((:file "package") + (:file "error-handling") (:file "toolkit") (:file "transform") (:file "reference")