255 lines
9.3 KiB
Common Lisp
255 lines
9.3 KiB
Common Lisp
(in-package #:user-side-compiler)
|
|
|
|
(defparameter *special-token-chars*
|
|
'(#\+ #\-
|
|
#\< #\>
|
|
#\( #\)
|
|
#\, #\.
|
|
#\= #\!)
|
|
"Characters that when encountered will finish up textual tokens and begin
|
|
the processing of a new token.")
|
|
|
|
(defparameter *single-token-chars*
|
|
'(#\, #\.
|
|
#\( #\))
|
|
"Characters that are ALWAYS single-character tokens and will end their own
|
|
reading immediately. Should be a subset of *SPECIAL-TOKEN-CHARS*.")
|
|
|
|
(defparameter *line-comment-chars*
|
|
'(#\# #\;))
|
|
|
|
(defclass token ()
|
|
((%source :accessor source :initarg :source :initform nil)))
|
|
|
|
;; Tokens containing user data
|
|
|
|
(defclass token-name (token)
|
|
((%name :accessor name :initarg :name)))
|
|
|
|
(defmethod print-object ((object token-name) stream)
|
|
(print-unreadable-object (object stream :type t :identity t)
|
|
(when (slot-boundp object '%name)
|
|
(format stream "\"~A\"" (name object)))))
|
|
|
|
(define-transformation (token (token-name reference-variable))
|
|
(multiple-value-bind (value existsp)
|
|
(find-reference-by-symbol (name token))
|
|
(if existsp
|
|
value
|
|
(add-reference-symbol (name token)
|
|
(make-instance 'reference-variable
|
|
:name (name token))))))
|
|
|
|
(defclass token-number (token)
|
|
((%value :accessor value :initarg :value)))
|
|
|
|
(defmethod print-object ((object token-number) stream)
|
|
(print-unreadable-object (object stream :type t :identity t)
|
|
(when (slot-boundp object '%value)
|
|
(format stream "~D" (value object)))))
|
|
|
|
(define-transformation (token (token-number reference-constant))
|
|
(make-instance 'reference-constant :value (value token)))
|
|
|
|
(define-transformation (token (token-number integer))
|
|
(value token))
|
|
|
|
(defclass token-comment (token)
|
|
((%text :accessor text :initarg :text)))
|
|
|
|
(defmethod comment-p (obj)
|
|
nil)
|
|
|
|
(defmethod comment-p ((obj token-comment))
|
|
t)
|
|
|
|
;; Special syntax tokens
|
|
|
|
(defmacro define-atomic-token (name)
|
|
`(defclass ,name (token) ()))
|
|
|
|
(define-atomic-token token-end-of-statement)
|
|
|
|
(define-atomic-token token-open-paren)
|
|
(define-atomic-token token-close-paren)
|
|
(define-atomic-token token-comma)
|
|
|
|
(define-atomic-token token-plus)
|
|
(define-atomic-token token-minus)
|
|
(define-atomic-token token-star)
|
|
(define-atomic-token token-slash)
|
|
|
|
(define-atomic-token token-less-than)
|
|
(define-atomic-token token-greater-than)
|
|
(define-atomic-token token-equal-equal)
|
|
(define-atomic-token token-not-equal)
|
|
|
|
(define-atomic-token token-not)
|
|
(define-atomic-token token-negate)
|
|
|
|
(define-atomic-token token-equal)
|
|
|
|
;;; String->class-name mappings for operator tokens
|
|
|
|
(defparameter *operator-token-classes*
|
|
`((,(string #\Newline) token-end-of-statement)
|
|
(";" token-end-of-statement)
|
|
("," token-comma)
|
|
("+" token-plus)
|
|
("-" token-minus)
|
|
("*" token-star)
|
|
("/" token-slash)
|
|
("(" token-open-paren)
|
|
(")" token-close-paren)
|
|
("<" token-less-than)
|
|
(">" token-greater-than)
|
|
("=" token-equal)
|
|
("==" token-equal-equal)
|
|
("!=" token-not-equal)))
|
|
|
|
;;; Keyword tokens
|
|
|
|
(defclass token-keyword (token-name)
|
|
((%name :accessor name :initarg :name)))
|
|
|
|
(defparameter *syntax-keywords*
|
|
'("end" "for" "do" "times" "if" "then" "else"))
|
|
|
|
(defun whitespacep (char)
|
|
(declare (type character char)
|
|
(optimize (speed 3)))
|
|
(not (null (member char '(#\Space #\Tab)))))
|
|
|
|
(defun text-to-token (text &optional source)
|
|
(let ((operator-token (cadr (find text *operator-token-classes* :test #'equalp :key #'car))))
|
|
(if (not (null operator-token))
|
|
(make-instance operator-token :source source)
|
|
(cond ((every #'digit-char-p text)
|
|
(make-instance 'token-number :source source
|
|
:value (parse-integer text)))
|
|
((member text *syntax-keywords* :test #'equalp)
|
|
(make-instance 'token-keyword :source source
|
|
:name (intern (string-upcase text) (find-package '#:keyword))))
|
|
((member (aref text 0) *line-comment-chars*)
|
|
(let ((start (position-if-not #'whitespacep (subseq text 1))))
|
|
(make-instance 'token-comment
|
|
:source source
|
|
:text (if (null start) ""
|
|
(subseq text start)))))
|
|
(t (make-instance 'token-name :source source :name text))))))
|
|
|
|
(defun tokenize (stream &optional source)
|
|
(let ((token-text-buffer (make-array 32 :element-type 'character :fill-pointer 0 :adjustable t))
|
|
(tokens '())
|
|
(new-token-p t)
|
|
(in-comment-p nil)
|
|
(line 1)
|
|
(column 0)
|
|
(token-source (cons source 0)))
|
|
(labels
|
|
((next-token ()
|
|
(push (text-to-token (copy-seq token-text-buffer) token-source) tokens)
|
|
(setf (fill-pointer token-text-buffer) 0
|
|
new-token-p t
|
|
token-source (cons source (file-position stream)))))
|
|
(loop :for char := (read-char stream)
|
|
:for next := (peek-char nil stream nil :eof)
|
|
:do (incf column)
|
|
:do
|
|
(cond
|
|
;; Break for next token at whitespace
|
|
((and (not new-token-p) (whitespacep char) (not in-comment-p))
|
|
(next-token))
|
|
;; Process and add a newline
|
|
((char= char #\Newline)
|
|
(setf in-comment-p nil
|
|
line (1+ line)
|
|
column 0)
|
|
(unless (zerop (length token-text-buffer))
|
|
(next-token))
|
|
(vector-push #\Newline token-text-buffer)
|
|
(next-token))
|
|
;; Starting a comment
|
|
((and (member char *line-comment-chars* :test #'char=) (not in-comment-p))
|
|
(unless (zerop (length token-text-buffer))
|
|
(next-token))
|
|
(vector-push char token-text-buffer)
|
|
(setf in-comment-p t))
|
|
;; Comment character (next-token'd by newline)
|
|
(in-comment-p
|
|
(vector-push char token-text-buffer))
|
|
;; Non-whitespace non-comment characters
|
|
((and (not (whitespacep char)) (not in-comment-p))
|
|
(vector-push char token-text-buffer)
|
|
(when new-token-p
|
|
(setf token-source (cons source (cons line column)))
|
|
(setf new-token-p nil))
|
|
(cond
|
|
;; End if the next is a non-alphanumeric token
|
|
((and (not (member char *special-token-chars*))
|
|
(member next *special-token-chars*))
|
|
(next-token))
|
|
;; End if the next is an alphanumeric token
|
|
((and (member char *special-token-chars*)
|
|
(not (member next *special-token-chars*)))
|
|
(next-token))
|
|
;; Or if we *are* a special one-off character (that's a different set)
|
|
((member char *single-token-chars*)
|
|
(next-token))
|
|
((eql :eof next)
|
|
(next-token)))))
|
|
:until (eql :eof next)))
|
|
(nreverse tokens)))
|
|
|
|
;;; Jigs
|
|
|
|
(defparameter *tokens-no-space-before*
|
|
'(token-comma
|
|
token-open-paren
|
|
token-close-paren
|
|
token-end-of-statement)
|
|
"Token classes which do not allow a space before being reprinted.")
|
|
|
|
(defparameter *tokens-no-space-after*
|
|
'(token-end-of-statement
|
|
token-open-paren)
|
|
"Token classes which do not add a space after being reprinted.")
|
|
|
|
(defparameter *reprint-indent-size* 4
|
|
"The amount of spaces that an indentation level adds to its line.")
|
|
|
|
(defun reprint-from-tokens (tokens)
|
|
(let ((indent-size 4))
|
|
(loop :with indent := 0
|
|
:with line-empty-p := t
|
|
:for token :in tokens
|
|
:for tokens-left := tokens :then (cdr tokens-left)
|
|
:for next-token := (second tokens-left)
|
|
;; Pre-token style actions
|
|
:do (when (and (typep token 'token-keyword)
|
|
(member (name token) '(:end :else)))
|
|
(decf indent indent-size))
|
|
;; Printing action
|
|
:do (when line-empty-p
|
|
(loop :repeat indent :do (write-char #\Space))
|
|
;;(format t "(~D)" indent)
|
|
(setf line-empty-p nil))
|
|
:do (let ((token-string
|
|
(cond ((typep token 'token-keyword)
|
|
(format nil "~A" (name token)))
|
|
((typep token 'token-name)
|
|
(name token))
|
|
((typep token 'token-number)
|
|
(format nil "~D" (value token)))
|
|
(t (car (find (type-of token) *operator-token-classes* :key #'cadr))))))
|
|
(format t "~A~A" token-string
|
|
(if (or (typep token `(or ,@*tokens-no-space-after*))
|
|
(typep next-token `(or ,@*tokens-no-space-before*)))
|
|
""
|
|
" "))
|
|
(when (typep token 'token-end-of-statement)
|
|
(setf line-empty-p t)))
|
|
;; Post-token style actions
|
|
:do (when (and (typep token 'token-keyword)
|
|
(member (name token) '(:times :then :else)))
|
|
(incf indent indent-size)))))
|