c64-livecoding/wip-duuqnd/user-side-compiler/tokenizer.lisp

255 lines
9.3 KiB
Common Lisp

(in-package #:user-side-compiler)
(defparameter *special-token-chars*
'(#\+ #\-
#\< #\>
#\( #\)
#\, #\.
#\= #\!)
"Characters that when encountered will finish up textual tokens and begin
the processing of a new token.")
(defparameter *single-token-chars*
'(#\, #\.
#\( #\))
"Characters that are ALWAYS single-character tokens and will end their own
reading immediately. Should be a subset of *SPECIAL-TOKEN-CHARS*.")
(defparameter *line-comment-chars*
'(#\# #\;))
(defclass token ()
((%source :accessor source :initarg :source :initform nil)))
;; Tokens containing user data
(defclass token-name (token)
((%name :accessor name :initarg :name)))
(defmethod print-object ((object token-name) stream)
(print-unreadable-object (object stream :type t :identity t)
(when (slot-boundp object '%name)
(format stream "\"~A\"" (name object)))))
(define-transformation (token (token-name reference-variable))
(multiple-value-bind (value existsp)
(find-reference-by-symbol (name token))
(if existsp
value
(add-reference-symbol (name token)
(make-instance 'reference-variable
:name (name token))))))
(defclass token-number (token)
((%value :accessor value :initarg :value)))
(defmethod print-object ((object token-number) stream)
(print-unreadable-object (object stream :type t :identity t)
(when (slot-boundp object '%value)
(format stream "~D" (value object)))))
(define-transformation (token (token-number reference-constant))
(make-instance 'reference-constant :value (value token)))
(define-transformation (token (token-number integer))
(value token))
(defclass token-comment (token)
((%text :accessor text :initarg :text)))
(defmethod comment-p (obj)
nil)
(defmethod comment-p ((obj token-comment))
t)
;; Special syntax tokens
(defmacro define-atomic-token (name)
`(defclass ,name (token) ()))
(define-atomic-token token-end-of-statement)
(define-atomic-token token-open-paren)
(define-atomic-token token-close-paren)
(define-atomic-token token-comma)
(define-atomic-token token-plus)
(define-atomic-token token-minus)
(define-atomic-token token-star)
(define-atomic-token token-slash)
(define-atomic-token token-less-than)
(define-atomic-token token-greater-than)
(define-atomic-token token-equal-equal)
(define-atomic-token token-not-equal)
(define-atomic-token token-not)
(define-atomic-token token-negate)
(define-atomic-token token-equal)
;;; String->class-name mappings for operator tokens
(defparameter *operator-token-classes*
`((,(string #\Newline) token-end-of-statement)
(";" token-end-of-statement)
("," token-comma)
("+" token-plus)
("-" token-minus)
("*" token-star)
("/" token-slash)
("(" token-open-paren)
(")" token-close-paren)
("<" token-less-than)
(">" token-greater-than)
("=" token-equal)
("==" token-equal-equal)
("!=" token-not-equal)))
;;; Keyword tokens
(defclass token-keyword (token-name)
((%name :accessor name :initarg :name)))
(defparameter *syntax-keywords*
'("end" "for" "do" "times" "if" "then" "else"))
(defun whitespacep (char)
(declare (type character char)
(optimize (speed 3)))
(not (null (member char '(#\Space #\Tab)))))
(defun text-to-token (text &optional source)
(let ((operator-token (cadr (find text *operator-token-classes* :test #'equalp :key #'car))))
(if (not (null operator-token))
(make-instance operator-token :source source)
(cond ((every #'digit-char-p text)
(make-instance 'token-number :source source
:value (parse-integer text)))
((member text *syntax-keywords* :test #'equalp)
(make-instance 'token-keyword :source source
:name (intern (string-upcase text) (find-package '#:keyword))))
((member (aref text 0) *line-comment-chars*)
(let ((start (position-if-not #'whitespacep (subseq text 1))))
(make-instance 'token-comment
:source source
:text (if (null start) ""
(subseq text start)))))
(t (make-instance 'token-name :source source :name text))))))
(defun tokenize (stream &optional source)
(let ((token-text-buffer (make-array 32 :element-type 'character :fill-pointer 0 :adjustable t))
(tokens '())
(new-token-p t)
(in-comment-p nil)
(line 1)
(column 0)
(token-source (cons source 0)))
(labels
((next-token ()
(push (text-to-token (copy-seq token-text-buffer) token-source) tokens)
(setf (fill-pointer token-text-buffer) 0
new-token-p t
token-source (cons source (file-position stream)))))
(loop :for char := (read-char stream)
:for next := (peek-char nil stream nil :eof)
:do (incf column)
:do
(cond
;; Break for next token at whitespace
((and (not new-token-p) (whitespacep char) (not in-comment-p))
(next-token))
;; Process and add a newline
((char= char #\Newline)
(setf in-comment-p nil
line (1+ line)
column 0)
(unless (zerop (length token-text-buffer))
(next-token))
(vector-push #\Newline token-text-buffer)
(next-token))
;; Starting a comment
((and (member char *line-comment-chars* :test #'char=) (not in-comment-p))
(unless (zerop (length token-text-buffer))
(next-token))
(vector-push char token-text-buffer)
(setf in-comment-p t))
;; Comment character (next-token'd by newline)
(in-comment-p
(vector-push char token-text-buffer))
;; Non-whitespace non-comment characters
((and (not (whitespacep char)) (not in-comment-p))
(vector-push char token-text-buffer)
(when new-token-p
(setf token-source (cons source (cons line column)))
(setf new-token-p nil))
(cond
;; End if the next is a non-alphanumeric token
((and (not (member char *special-token-chars*))
(member next *special-token-chars*))
(next-token))
;; End if the next is an alphanumeric token
((and (member char *special-token-chars*)
(not (member next *special-token-chars*)))
(next-token))
;; Or if we *are* a special one-off character (that's a different set)
((member char *single-token-chars*)
(next-token))
((eql :eof next)
(next-token)))))
:until (eql :eof next)))
(nreverse tokens)))
;;; Jigs
(defparameter *tokens-no-space-before*
'(token-comma
token-open-paren
token-close-paren
token-end-of-statement)
"Token classes which do not allow a space before being reprinted.")
(defparameter *tokens-no-space-after*
'(token-end-of-statement
token-open-paren)
"Token classes which do not add a space after being reprinted.")
(defparameter *reprint-indent-size* 4
"The amount of spaces that an indentation level adds to its line.")
(defun reprint-from-tokens (tokens)
(let ((indent-size 4))
(loop :with indent := 0
:with line-empty-p := t
:for token :in tokens
:for tokens-left := tokens :then (cdr tokens-left)
:for next-token := (second tokens-left)
;; Pre-token style actions
:do (when (and (typep token 'token-keyword)
(member (name token) '(:end :else)))
(decf indent indent-size))
;; Printing action
:do (when line-empty-p
(loop :repeat indent :do (write-char #\Space))
;;(format t "(~D)" indent)
(setf line-empty-p nil))
:do (let ((token-string
(cond ((typep token 'token-keyword)
(format nil "~A" (name token)))
((typep token 'token-name)
(name token))
((typep token 'token-number)
(format nil "~D" (value token)))
(t (car (find (type-of token) *operator-token-classes* :key #'cadr))))))
(format t "~A~A" token-string
(if (or (typep token `(or ,@*tokens-no-space-after*))
(typep next-token `(or ,@*tokens-no-space-before*)))
""
" "))
(when (typep token 'token-end-of-statement)
(setf line-empty-p t)))
;; Post-token style actions
:do (when (and (typep token 'token-keyword)
(member (name token) '(:times :then :else)))
(incf indent indent-size)))))