Better handling of operators, source, and comments in tokenizer
...among other minor changes to accomodate the parser.
This commit is contained in:
parent
8acbf2caec
commit
1307e31268
1 changed files with 68 additions and 22 deletions
|
@ -15,13 +15,16 @@ the processing of a new token.")
|
||||||
"Characters that are ALWAYS single-character tokens and will end their own
|
"Characters that are ALWAYS single-character tokens and will end their own
|
||||||
reading immediately. Should be a subset of *SPECIAL-TOKEN-CHARS*.")
|
reading immediately. Should be a subset of *SPECIAL-TOKEN-CHARS*.")
|
||||||
|
|
||||||
|
(defparameter *line-comment-chars*
|
||||||
|
'(#\# #\;))
|
||||||
|
|
||||||
(defclass token ()
|
(defclass token ()
|
||||||
((%source :accessor source :initarg :source :initform nil)))
|
((%source :accessor source :initarg :source :initform nil)))
|
||||||
|
|
||||||
;; Tokens containing user data
|
;; Tokens containing user data
|
||||||
|
|
||||||
(defclass token-name (token)
|
(defclass token-name (token)
|
||||||
((%name :accessor name :initarg :text)))
|
((%name :accessor name :initarg :name)))
|
||||||
|
|
||||||
(defmethod print-object ((object token-name) stream)
|
(defmethod print-object ((object token-name) stream)
|
||||||
(print-unreadable-object (object stream :type t :identity t)
|
(print-unreadable-object (object stream :type t :identity t)
|
||||||
|
@ -31,6 +34,15 @@ reading immediately. Should be a subset of *SPECIAL-TOKEN-CHARS*.")
|
||||||
(defclass token-number (token)
|
(defclass token-number (token)
|
||||||
((%value :accessor value :initarg :value)))
|
((%value :accessor value :initarg :value)))
|
||||||
|
|
||||||
|
(defclass token-comment (token)
|
||||||
|
((%text :accessor text :initarg :text)))
|
||||||
|
|
||||||
|
(defmethod comment-p (obj)
|
||||||
|
nil)
|
||||||
|
|
||||||
|
(defmethod comment-p ((obj token-comment))
|
||||||
|
t)
|
||||||
|
|
||||||
;; Special syntax tokens
|
;; Special syntax tokens
|
||||||
|
|
||||||
(defmacro define-atomic-token (name)
|
(defmacro define-atomic-token (name)
|
||||||
|
@ -77,7 +89,7 @@ reading immediately. Should be a subset of *SPECIAL-TOKEN-CHARS*.")
|
||||||
|
|
||||||
;;; Keyword tokens
|
;;; Keyword tokens
|
||||||
|
|
||||||
(defclass token-keyword (token)
|
(defclass token-keyword (token-name)
|
||||||
((%name :accessor name :initarg :name)))
|
((%name :accessor name :initarg :name)))
|
||||||
|
|
||||||
(defparameter *syntax-keywords*
|
(defparameter *syntax-keywords*
|
||||||
|
@ -98,12 +110,21 @@ reading immediately. Should be a subset of *SPECIAL-TOKEN-CHARS*.")
|
||||||
((member text *syntax-keywords* :test #'equalp)
|
((member text *syntax-keywords* :test #'equalp)
|
||||||
(make-instance 'token-keyword :source source
|
(make-instance 'token-keyword :source source
|
||||||
:name (intern (string-upcase text) (find-package '#:keyword))))
|
:name (intern (string-upcase text) (find-package '#:keyword))))
|
||||||
(t (make-instance 'token-name :source source :text text))))))
|
((member (aref text 0) *line-comment-chars*)
|
||||||
|
(let ((start (position-if-not #'whitespacep (subseq text 1))))
|
||||||
|
(make-instance 'token-comment
|
||||||
|
:source source
|
||||||
|
:text (if (null start) ""
|
||||||
|
(subseq text start)))))
|
||||||
|
(t (make-instance 'token-name :source source :name text))))))
|
||||||
|
|
||||||
(defun tokenize (stream &optional source)
|
(defun tokenize (stream &optional source)
|
||||||
(let ((token-text-buffer (make-array 32 :element-type 'character :fill-pointer 0 :adjustable t))
|
(let ((token-text-buffer (make-array 32 :element-type 'character :fill-pointer 0 :adjustable t))
|
||||||
(tokens '())
|
(tokens '())
|
||||||
(new-token-p t)
|
(new-token-p t)
|
||||||
|
(in-comment-p nil)
|
||||||
|
(line 1)
|
||||||
|
(column 0)
|
||||||
(token-source (cons source 0)))
|
(token-source (cons source 0)))
|
||||||
(labels
|
(labels
|
||||||
((next-token ()
|
((next-token ()
|
||||||
|
@ -113,26 +134,51 @@ reading immediately. Should be a subset of *SPECIAL-TOKEN-CHARS*.")
|
||||||
token-source (cons source (file-position stream)))))
|
token-source (cons source (file-position stream)))))
|
||||||
(loop :for char := (read-char stream)
|
(loop :for char := (read-char stream)
|
||||||
:for next := (peek-char nil stream nil :eof)
|
:for next := (peek-char nil stream nil :eof)
|
||||||
:until (eql :eof next)
|
:do (incf column)
|
||||||
:do
|
:do
|
||||||
(cond ((and (not new-token-p) (whitespacep char))
|
(cond
|
||||||
|
;; Break for next token at whitespace
|
||||||
|
((and (not new-token-p) (whitespacep char) (not in-comment-p))
|
||||||
(next-token))
|
(next-token))
|
||||||
|
;; Process and add a newline
|
||||||
((char= char #\Newline)
|
((char= char #\Newline)
|
||||||
;; this will be different later
|
(setf in-comment-p nil
|
||||||
|
line (1+ line)
|
||||||
|
column 0)
|
||||||
(unless (zerop (length token-text-buffer))
|
(unless (zerop (length token-text-buffer))
|
||||||
(next-token))
|
(next-token))
|
||||||
(vector-push #\Newline token-text-buffer)
|
(vector-push #\Newline token-text-buffer)
|
||||||
(next-token))
|
(next-token))
|
||||||
((not (whitespacep char))
|
;; Starting a comment
|
||||||
|
((and (member char *line-comment-chars* :test #'char=) (not in-comment-p))
|
||||||
|
(unless (zerop (length token-text-buffer))
|
||||||
|
(next-token))
|
||||||
(vector-push char token-text-buffer)
|
(vector-push char token-text-buffer)
|
||||||
(setf new-token-p nil)
|
(setf in-comment-p t))
|
||||||
;; Check if we should end here based on what's next
|
;; Comment character (next-token'd by newline)
|
||||||
(when (member next *special-token-chars*)
|
(in-comment-p
|
||||||
|
(vector-push char token-text-buffer))
|
||||||
|
;; Non-whitespace non-comment characters
|
||||||
|
((and (not (whitespacep char)) (not in-comment-p))
|
||||||
|
(vector-push char token-text-buffer)
|
||||||
|
(when new-token-p
|
||||||
|
(setf token-source (cons source (cons line column)))
|
||||||
|
(setf new-token-p nil))
|
||||||
|
(cond
|
||||||
|
;; End if the next is a non-alphanumeric token
|
||||||
|
((and (not (member char *special-token-chars*))
|
||||||
|
(member next *special-token-chars*))
|
||||||
|
(next-token))
|
||||||
|
;; End if the next is an alphanumeric token
|
||||||
|
((and (member char *special-token-chars*)
|
||||||
|
(not (member next *special-token-chars*)))
|
||||||
(next-token))
|
(next-token))
|
||||||
;; Or if we *are* a special one-off character (that's a different set)
|
;; Or if we *are* a special one-off character (that's a different set)
|
||||||
(when (member char *single-token-chars*)
|
((member char *single-token-chars*)
|
||||||
(next-token))))
|
(next-token))
|
||||||
:finally (next-token)))
|
((eql :eof next)
|
||||||
|
(next-token)))))
|
||||||
|
:until (eql :eof next)))
|
||||||
(nreverse tokens)))
|
(nreverse tokens)))
|
||||||
|
|
||||||
;;; Jigs
|
;;; Jigs
|
||||||
|
|
Loading…
Add table
Reference in a new issue