Add some proper error handling to user-side compiler

This commit is contained in:
John Lorentzson 2025-07-07 19:44:57 +02:00
parent b5fa71c710
commit 4d6ad30eae
5 changed files with 112 additions and 33 deletions

View file

@ -368,7 +368,7 @@ is the responsibility of the pre-assembly compilation step."
(defun quick-and-dirty-test-compile (text &key print-ir-p print-alloc-p make-asm-p) (defun quick-and-dirty-test-compile (text &key print-ir-p print-alloc-p make-asm-p)
(with-input-from-string (source-stream text) (with-input-from-string (source-stream text)
(let ((*token-stream* (make-token-stream (tokenize source-stream)))) (let ((*token-stream* (make-token-stream (tokenize source-stream text))))
(let ((rb (with-compilation-setup (root-block builder) (let ((rb (with-compilation-setup (root-block builder)
(compile-node (match-syntax program) builder) (compile-node (match-syntax program) builder)
root-block)) root-block))

View file

@ -0,0 +1,47 @@
(in-package #:user-side-compiler)
(defun string-nth-line (string line)
(assert (> line 0))
(let ((start (loop :repeat line
:for start := 0 :then (1+ (position #\Newline string
:start start))
:finally (return start))))
(subseq string
start
(position #\Newline string :start (1+ start)))))
(defun point-out-source (source)
(if (null source)
"No source info."
(destructuring-bind (text line . column)
source
(cond ((null text)
(format nil "Line ~D, column ~D" line column))
((stringp text)
(with-output-to-string (output)
(format output "~%Found on line ~D, column ~D:~%~A~%"
line column
(string-nth-line text line))
(loop :repeat column :do (write-char #\Space output))
(write-char #\^ output)))))))
(define-condition usc-error (error)
((%source :accessor source :initarg :source :initform nil)
(%format-control :accessor format-control :initarg :format-control)
(%format-arguments :accessor format-arguments :initarg :format-arguments :initform '())
(%context-string :reader context-string :initform "User-side compiler error at unknown stage:"))
(:report (lambda (c s)
(format s "~A~%~A~%~A" (context-string c)
(apply #'format nil (format-control c) (format-arguments c))
(point-out-source (source c))))))
(define-condition tokenizer-error (usc-error)
((%context-string :initform "Error in tokenizer:")))
(define-condition parser-error (usc-error)
())
(defun error-parser (source format-control &rest format-arguments)
(error 'parser-error :source source :format-control format-control
:format-arguments format-arguments))

View file

@ -25,7 +25,7 @@ parser's debug output.")
(setf *token-comment* nil)) (setf *token-comment* nil))
got))) got)))
(cond ((and (keywordp token) (cond ((and (keywordp token)
(typep (peek-token) 'token-name) (typep (peek-token) 'token-keyword)
(eql token (name (peek-token)))) (eql token (name (peek-token))))
(prog1 (prog1
(pop-token) (pop-token)
@ -43,20 +43,33 @@ parser's debug output.")
(if (boundp '*current-matcher*) (if (boundp '*current-matcher*)
*current-matcher* *current-matcher*
nil)))) nil))))
((null (peek-token))
(error-parser (source (previous-token))
"Ran out of tokens while parsing."))
(t nil)))) (t nil))))
(defun consume-token (token &optional error-message) (defun consume-token (token &optional error-message)
(unless (match-token token) (unless (match-token token)
(error (if (null error-message) (if (null error-message)
(format nil "Expected token of type ~A, got a ~A.~%Source: ~A" (error-parser (source (previous-token))
token (type-of (peek-token)) "Expected token of type ~A, got a ~A."
(source (previous-token))) token (type-of (peek-token)))
(format nil "~A~%Got ~A.~%Source: ~A" (error-parser (source (previous-token))
error-message "~A~%Got token ~A instead."
(previous-token) error-message (previous-token)
(source (previous-token))))))) (source (previous-token))))))
(defun make-token-stream (tokens) (defun make-token-stream (tokens)
;; Appending an end-of-statement if one is missing so that we always
;; have something to latch onto when dealing with syntax errors at the end
;; of the program's text. It has the same source location as the actual
;; last token.
(let ((last (last tokens)))
(unless (typep (car last) 'token-end-of-statement)
(setf (cdr last) (cons (make-instance 'token-end-of-statement
:source (source (car last)))
nil))))
(assert (every (complement #'null) tokens))
(cons tokens nil)) (cons tokens nil))
;;; Matcher ;;; Matcher
@ -152,7 +165,8 @@ parser's debug output.")
(t (t
(setf arguments (match-syntax arglist)) (setf arguments (match-syntax arglist))
(consume-token 'token-close-paren (consume-token 'token-close-paren
"Close parenthesis ')' is required to end function call argument list."))) "Close parenthesis ')' is required to end function call argument list,
comma ',' is required to separate arguments.")))
(make-instance 'node-call (make-instance 'node-call
:source *syntax-source* :source *syntax-source*
:comment *token-comment* :comment *token-comment*
@ -166,16 +180,18 @@ parser's debug output.")
(previous-token)) (previous-token))
((match-token 'token-open-paren) ((match-token 'token-open-paren)
(let ((expr (match-syntax expression))) (let ((expr (match-syntax expression)))
(unless (match-token 'token-close-paren) (consume-token 'token-close-paren
;; TODO "Closing parenthesis ')' required after grouping expression.")
(error "Parser error: Closing parenthesis ')' required after expression."))
(make-instance 'node-expr-grouping (make-instance 'node-expr-grouping
:source *syntax-source* :source *syntax-source*
:comment *token-comment* :comment *token-comment*
:expression expr))) :expression expr)))
((match-token 'token-end-of-statement) ((match-token 'token-end-of-statement)
(error "End-of-statement in the middle of an expression")) (error-parser (source (previous-token))
(t (error "What token?! Is this?! (DAH DAH DAAAAH)~%~S~%" (peek-token))))) "End-of-statement in the middle of an expression."))
(t (error-parser (source (peek-token))
"Unexpected token ~S in unexpected situation."
(peek-token)))))
(define-syntax-matcher assignment (l-value r-value) (define-syntax-matcher assignment (l-value r-value)
(setf l-value (match-syntax expression)) (setf l-value (match-syntax expression))
@ -196,14 +212,16 @@ parser's debug output.")
(match-syntax if) (match-syntax if)
(unless (or (match-token 'token-end-of-statement) (unless (or (match-token 'token-end-of-statement)
(null (peek-token))) (null (peek-token)))
(error "EOS required after IF, got ~A" (error-parser (source (previous-token))
"End-of-statement required after IF's THEN, got ~A"
(peek-token))))) (peek-token)))))
((match-token :for) ((match-token :for)
(prog1 (prog1
(match-syntax for-do-times) (match-syntax for-do-times)
(unless (or (match-token 'token-end-of-statement) (unless (or (match-token 'token-end-of-statement)
(null (peek-token))) (null (peek-token)))
(error "EOS required after FOR, got ~A" (error-parser (source (previous-token))
"End-of-statement required after FOR's THEN, got ~A"
(peek-token))))) (peek-token)))))
((match-token 'token-end-of-statement) ((match-token 'token-end-of-statement)
;; Empty statement, might contain comment ;; Empty statement, might contain comment
@ -212,7 +230,7 @@ parser's debug output.")
:comment *token-comment*)) :comment *token-comment*))
(t (t
(let ((expr (match-syntax assignment))) (let ((expr (match-syntax assignment)))
(setf (comment expr) *token-comment*) ;;(setf (comment expr) *token-comment*)
(consume-token 'token-end-of-statement (consume-token 'token-end-of-statement
(format nil "Couldn't find end of expression. ~A found instead." (format nil "Couldn't find end of expression. ~A found instead."
(peek-token))) (peek-token)))
@ -229,6 +247,9 @@ parser's debug output.")
(loop :for statement := (match-syntax statement) (loop :for statement := (match-syntax statement)
:unless (typep statement 'node-nop) :unless (typep statement 'node-nop)
:do (push statement statements) :do (push statement statements)
:when (null (peek-token))
:do (error-parser (source (previous-token))
"Unterminated block, check for missing ENDs and ELSEs.")
:until (or (match-token :end) :until (or (match-token :end)
(match-token :else))) (match-token :else)))
(setf statements (wire-up-statements (nreverse statements))) (setf statements (wire-up-statements (nreverse statements)))
@ -257,7 +278,8 @@ parser's debug output.")
`(progn `(progn
(setf ,var (match-token ',desired-token)) (setf ,var (match-token ',desired-token))
(when (null ,var) (when (null ,var)
(error ,(cadr (assoc var error-messages))))))))))) (error-parser (source (previous-token))
,(cadr (assoc var error-messages)))))))))))
(define-syntax-matcher for-do-times (var n code) (define-syntax-matcher for-do-times (var n code)
(match-syntax-pattern (match-syntax-pattern
@ -268,7 +290,8 @@ parser's debug output.")
(:times "FOR loop count must be followed by TIMES.") (:times "FOR loop count must be followed by TIMES.")
(var "A loop index variable must be supplied."))) (var "A loop index variable must be supplied.")))
(unless (eql (name (previous-token)) :end) (unless (eql (name (previous-token)) :end)
(error "FOR DO TIMES loop may only end with END. Found ~A." (previous-token))) (error-parser (source (previous-token))
"FOR DO TIMES loop may only end with END. Found ~A." (previous-token)))
(make-instance 'node-dotimes (make-instance 'node-dotimes
:source *syntax-source* :source *syntax-source*
:counter-ref (transform var 'reference-variable) :counter-ref (transform var 'reference-variable)
@ -288,13 +311,10 @@ parser's debug output.")
(:then "IF condition must be followed by THEN"))) (:then "IF condition must be followed by THEN")))
(cond ((eql (name (previous-token)) :end)) (cond ((eql (name (previous-token)) :end))
((eql (name (previous-token)) :else) ((eql (name (previous-token)) :else)
(setf else (match-syntax block)) (setf else (match-syntax block)))
#+(or) (t (error-parser (source (previous-token))
(unless (match-token :end) "IF block must be closed with END, found ~A."
(error "An IF's ELSE block must be followed by END.~%Found ~A instead.~%ELSE IF is not yet supported." (previous-token))))
(peek-token))))
(t (error "IF block ended very strangely, found ~A."
(peek-token))))
(make-instance 'node-conditional (make-instance 'node-conditional
:source *syntax-source* :source *syntax-source*
:test test :test test

View file

@ -26,6 +26,14 @@ reading immediately. Should be a subset of *SPECIAL-TOKEN-CHARS*.")
(defclass token-name (token) (defclass token-name (token)
((%name :accessor name :initarg :name))) ((%name :accessor name :initarg :name)))
(defun valid-name-p (string)
(assert (stringp string))
(and (not (zerop (length string)))
(alpha-char-p (char string 0))
(loop :for char :across (subseq string 1)
:always (or (alphanumericp char)
(char= char #\_)))))
(defmethod print-object ((object token-name) stream) (defmethod print-object ((object token-name) stream)
(print-unreadable-object (object stream :type t :identity t) (print-unreadable-object (object stream :type t :identity t)
(when (slot-boundp object '%name) (when (slot-boundp object '%name)
@ -113,7 +121,7 @@ reading immediately. Should be a subset of *SPECIAL-TOKEN-CHARS*.")
;;; Keyword tokens ;;; Keyword tokens
(defclass token-keyword (token-name) (defclass token-keyword (token)
((%name :accessor name :initarg :name))) ((%name :accessor name :initarg :name)))
(defparameter *syntax-keywords* (defparameter *syntax-keywords*
@ -140,7 +148,10 @@ reading immediately. Should be a subset of *SPECIAL-TOKEN-CHARS*.")
:source source :source source
:text (if (null start) "" :text (if (null start) ""
(subseq text start))))) (subseq text start)))))
(t (make-instance 'token-name :source source :name text)))))) ((valid-name-p text)
(make-instance 'token-name :source source :name text))
(t (error 'tokenizer-error :source source :format-arguments (list text)
:format-control "Invalid name or unknown token \"~A\"."))))))
(defun tokenize (stream &optional source) (defun tokenize (stream &optional source)
(let ((token-text-buffer (make-array 32 :element-type 'character :fill-pointer 0 :adjustable t)) (let ((token-text-buffer (make-array 32 :element-type 'character :fill-pointer 0 :adjustable t))
@ -155,7 +166,7 @@ reading immediately. Should be a subset of *SPECIAL-TOKEN-CHARS*.")
(push (text-to-token (copy-seq token-text-buffer) token-source) tokens) (push (text-to-token (copy-seq token-text-buffer) token-source) tokens)
(setf (fill-pointer token-text-buffer) 0 (setf (fill-pointer token-text-buffer) 0
new-token-p t new-token-p t
token-source (cons source (file-position stream))))) token-source (cons source (cons line column)))))
(loop :for char := (read-char stream) (loop :for char := (read-char stream)
:for next := (peek-char nil stream nil :eof) :for next := (peek-char nil stream nil :eof)
:do (incf column) :do (incf column)

View file

@ -5,6 +5,7 @@
:depends-on (#:closer-mop #:alexandria) :depends-on (#:closer-mop #:alexandria)
:components :components
((:file "package") ((:file "package")
(:file "error-handling")
(:file "toolkit") (:file "toolkit")
(:file "transform") (:file "transform")
(:file "reference") (:file "reference")