Add some proper error handling to user-side compiler

This commit is contained in:
John Lorentzson 2025-07-07 19:44:57 +02:00
parent b5fa71c710
commit 4d6ad30eae
5 changed files with 112 additions and 33 deletions

View file

@ -368,7 +368,7 @@ is the responsibility of the pre-assembly compilation step."
(defun quick-and-dirty-test-compile (text &key print-ir-p print-alloc-p make-asm-p)
(with-input-from-string (source-stream text)
(let ((*token-stream* (make-token-stream (tokenize source-stream))))
(let ((*token-stream* (make-token-stream (tokenize source-stream text))))
(let ((rb (with-compilation-setup (root-block builder)
(compile-node (match-syntax program) builder)
root-block))

View file

@ -0,0 +1,47 @@
(in-package #:user-side-compiler)
(defun string-nth-line (string line)
(assert (> line 0))
(let ((start (loop :repeat line
:for start := 0 :then (1+ (position #\Newline string
:start start))
:finally (return start))))
(subseq string
start
(position #\Newline string :start (1+ start)))))
(defun point-out-source (source)
(if (null source)
"No source info."
(destructuring-bind (text line . column)
source
(cond ((null text)
(format nil "Line ~D, column ~D" line column))
((stringp text)
(with-output-to-string (output)
(format output "~%Found on line ~D, column ~D:~%~A~%"
line column
(string-nth-line text line))
(loop :repeat column :do (write-char #\Space output))
(write-char #\^ output)))))))
(define-condition usc-error (error)
((%source :accessor source :initarg :source :initform nil)
(%format-control :accessor format-control :initarg :format-control)
(%format-arguments :accessor format-arguments :initarg :format-arguments :initform '())
(%context-string :reader context-string :initform "User-side compiler error at unknown stage:"))
(:report (lambda (c s)
(format s "~A~%~A~%~A" (context-string c)
(apply #'format nil (format-control c) (format-arguments c))
(point-out-source (source c))))))
(define-condition tokenizer-error (usc-error)
((%context-string :initform "Error in tokenizer:")))
(define-condition parser-error (usc-error)
())
(defun error-parser (source format-control &rest format-arguments)
(error 'parser-error :source source :format-control format-control
:format-arguments format-arguments))

View file

@ -25,7 +25,7 @@ parser's debug output.")
(setf *token-comment* nil))
got)))
(cond ((and (keywordp token)
(typep (peek-token) 'token-name)
(typep (peek-token) 'token-keyword)
(eql token (name (peek-token))))
(prog1
(pop-token)
@ -43,20 +43,33 @@ parser's debug output.")
(if (boundp '*current-matcher*)
*current-matcher*
nil))))
((null (peek-token))
(error-parser (source (previous-token))
"Ran out of tokens while parsing."))
(t nil))))
(defun consume-token (token &optional error-message)
(unless (match-token token)
(error (if (null error-message)
(format nil "Expected token of type ~A, got a ~A.~%Source: ~A"
token (type-of (peek-token))
(source (previous-token)))
(format nil "~A~%Got ~A.~%Source: ~A"
error-message
(previous-token)
(source (previous-token)))))))
(if (null error-message)
(error-parser (source (previous-token))
"Expected token of type ~A, got a ~A."
token (type-of (peek-token)))
(error-parser (source (previous-token))
"~A~%Got token ~A instead."
error-message (previous-token)
(source (previous-token))))))
(defun make-token-stream (tokens)
;; Appending an end-of-statement if one is missing so that we always
;; have something to latch onto when dealing with syntax errors at the end
;; of the program's text. It has the same source location as the actual
;; last token.
(let ((last (last tokens)))
(unless (typep (car last) 'token-end-of-statement)
(setf (cdr last) (cons (make-instance 'token-end-of-statement
:source (source (car last)))
nil))))
(assert (every (complement #'null) tokens))
(cons tokens nil))
;;; Matcher
@ -152,7 +165,8 @@ parser's debug output.")
(t
(setf arguments (match-syntax arglist))
(consume-token 'token-close-paren
"Close parenthesis ')' is required to end function call argument list.")))
"Close parenthesis ')' is required to end function call argument list,
comma ',' is required to separate arguments.")))
(make-instance 'node-call
:source *syntax-source*
:comment *token-comment*
@ -166,16 +180,18 @@ parser's debug output.")
(previous-token))
((match-token 'token-open-paren)
(let ((expr (match-syntax expression)))
(unless (match-token 'token-close-paren)
;; TODO
(error "Parser error: Closing parenthesis ')' required after expression."))
(consume-token 'token-close-paren
"Closing parenthesis ')' required after grouping expression.")
(make-instance 'node-expr-grouping
:source *syntax-source*
:comment *token-comment*
:expression expr)))
((match-token 'token-end-of-statement)
(error "End-of-statement in the middle of an expression"))
(t (error "What token?! Is this?! (DAH DAH DAAAAH)~%~S~%" (peek-token)))))
(error-parser (source (previous-token))
"End-of-statement in the middle of an expression."))
(t (error-parser (source (peek-token))
"Unexpected token ~S in unexpected situation."
(peek-token)))))
(define-syntax-matcher assignment (l-value r-value)
(setf l-value (match-syntax expression))
@ -196,15 +212,17 @@ parser's debug output.")
(match-syntax if)
(unless (or (match-token 'token-end-of-statement)
(null (peek-token)))
(error "EOS required after IF, got ~A"
(peek-token)))))
(error-parser (source (previous-token))
"End-of-statement required after IF's THEN, got ~A"
(peek-token)))))
((match-token :for)
(prog1
(match-syntax for-do-times)
(unless (or (match-token 'token-end-of-statement)
(null (peek-token)))
(error "EOS required after FOR, got ~A"
(peek-token)))))
(error-parser (source (previous-token))
"End-of-statement required after FOR's THEN, got ~A"
(peek-token)))))
((match-token 'token-end-of-statement)
;; Empty statement, might contain comment
(make-instance 'node-nop
@ -212,7 +230,7 @@ parser's debug output.")
:comment *token-comment*))
(t
(let ((expr (match-syntax assignment)))
(setf (comment expr) *token-comment*)
;;(setf (comment expr) *token-comment*)
(consume-token 'token-end-of-statement
(format nil "Couldn't find end of expression. ~A found instead."
(peek-token)))
@ -229,6 +247,9 @@ parser's debug output.")
(loop :for statement := (match-syntax statement)
:unless (typep statement 'node-nop)
:do (push statement statements)
:when (null (peek-token))
:do (error-parser (source (previous-token))
"Unterminated block, check for missing ENDs and ELSEs.")
:until (or (match-token :end)
(match-token :else)))
(setf statements (wire-up-statements (nreverse statements)))
@ -257,7 +278,8 @@ parser's debug output.")
`(progn
(setf ,var (match-token ',desired-token))
(when (null ,var)
(error ,(cadr (assoc var error-messages)))))))))))
(error-parser (source (previous-token))
,(cadr (assoc var error-messages)))))))))))
(define-syntax-matcher for-do-times (var n code)
(match-syntax-pattern
@ -268,7 +290,8 @@ parser's debug output.")
(:times "FOR loop count must be followed by TIMES.")
(var "A loop index variable must be supplied.")))
(unless (eql (name (previous-token)) :end)
(error "FOR DO TIMES loop may only end with END. Found ~A." (previous-token)))
(error-parser (source (previous-token))
"FOR DO TIMES loop may only end with END. Found ~A." (previous-token)))
(make-instance 'node-dotimes
:source *syntax-source*
:counter-ref (transform var 'reference-variable)
@ -288,13 +311,10 @@ parser's debug output.")
(:then "IF condition must be followed by THEN")))
(cond ((eql (name (previous-token)) :end))
((eql (name (previous-token)) :else)
(setf else (match-syntax block))
#+(or)
(unless (match-token :end)
(error "An IF's ELSE block must be followed by END.~%Found ~A instead.~%ELSE IF is not yet supported."
(peek-token))))
(t (error "IF block ended very strangely, found ~A."
(peek-token))))
(setf else (match-syntax block)))
(t (error-parser (source (previous-token))
"IF block must be closed with END, found ~A."
(previous-token))))
(make-instance 'node-conditional
:source *syntax-source*
:test test

View file

@ -26,6 +26,14 @@ reading immediately. Should be a subset of *SPECIAL-TOKEN-CHARS*.")
(defclass token-name (token)
((%name :accessor name :initarg :name)))
(defun valid-name-p (string)
(assert (stringp string))
(and (not (zerop (length string)))
(alpha-char-p (char string 0))
(loop :for char :across (subseq string 1)
:always (or (alphanumericp char)
(char= char #\_)))))
(defmethod print-object ((object token-name) stream)
(print-unreadable-object (object stream :type t :identity t)
(when (slot-boundp object '%name)
@ -113,7 +121,7 @@ reading immediately. Should be a subset of *SPECIAL-TOKEN-CHARS*.")
;;; Keyword tokens
(defclass token-keyword (token-name)
(defclass token-keyword (token)
((%name :accessor name :initarg :name)))
(defparameter *syntax-keywords*
@ -140,7 +148,10 @@ reading immediately. Should be a subset of *SPECIAL-TOKEN-CHARS*.")
:source source
:text (if (null start) ""
(subseq text start)))))
(t (make-instance 'token-name :source source :name text))))))
((valid-name-p text)
(make-instance 'token-name :source source :name text))
(t (error 'tokenizer-error :source source :format-arguments (list text)
:format-control "Invalid name or unknown token \"~A\"."))))))
(defun tokenize (stream &optional source)
(let ((token-text-buffer (make-array 32 :element-type 'character :fill-pointer 0 :adjustable t))
@ -155,7 +166,7 @@ reading immediately. Should be a subset of *SPECIAL-TOKEN-CHARS*.")
(push (text-to-token (copy-seq token-text-buffer) token-source) tokens)
(setf (fill-pointer token-text-buffer) 0
new-token-p t
token-source (cons source (file-position stream)))))
token-source (cons source (cons line column)))))
(loop :for char := (read-char stream)
:for next := (peek-char nil stream nil :eof)
:do (incf column)

View file

@ -5,6 +5,7 @@
:depends-on (#:closer-mop #:alexandria)
:components
((:file "package")
(:file "error-handling")
(:file "toolkit")
(:file "transform")
(:file "reference")