162 lines
6 KiB
Common Lisp
162 lines
6 KiB
Common Lisp
(in-package #:user-side-compiler)
|
|
|
|
(defparameter *special-token-chars*
|
|
'(#\+ #\-
|
|
#\< #\>
|
|
#\( #\)
|
|
#\, #\.
|
|
#\= #\!)
|
|
"Characters that when encountered will finish up textual tokens and begin
|
|
the processing of a new token.")
|
|
|
|
(defparameter *single-token-chars*
|
|
'(#\, #\.
|
|
#\( #\))
|
|
"Characters that are ALWAYS single-character tokens and will end their own
|
|
reading immediately. Should be a subset of *SPECIAL-TOKEN-CHARS*.")
|
|
|
|
(defclass token ()
|
|
((%source :accessor source :initarg :source)))
|
|
|
|
;; Tokens containing user data
|
|
|
|
(defclass token-name (token)
|
|
((%text :accessor text :initarg :text)))
|
|
|
|
(defclass token-number (token)
|
|
((%value :accessor value :initarg :value)))
|
|
|
|
;; Special syntax tokens
|
|
|
|
(defclass token-end-of-statement (token) ())
|
|
(defclass token-comma (token) ())
|
|
(defclass token-plus (token) ())
|
|
(defclass token-open-paren (token) ())
|
|
(defclass token-close-paren (token) ())
|
|
(defclass token-less-than (token) ())
|
|
(defclass token-greater-than (token) ())
|
|
|
|
;;; String->class-name mappings for operator tokens
|
|
|
|
(defparameter *operator-token-classes*
|
|
`((,(string #\Newline) token-end-of-statement)
|
|
(";" token-end-of-statement)
|
|
("," token-comma)
|
|
("+" token-plus)
|
|
("(" token-open-paren)
|
|
(")" token-close-paren)
|
|
("<" token-less-than)
|
|
(">" token-greater-than)))
|
|
|
|
;;; Keyword tokens
|
|
|
|
(defclass token-keyword (token)
|
|
((%name :accessor name :initarg :name)))
|
|
|
|
(defparameter *syntax-keywords*
|
|
'("end" "for" "do" "times" "if" "then" "else"))
|
|
|
|
(defun whitespacep (char)
|
|
(declare (type character char)
|
|
(optimize (speed 3)))
|
|
(not (null (member char '(#\Space #\Tab)))))
|
|
|
|
(defun text-to-token (text &optional source)
|
|
(let ((operator-token (cadr (find text *operator-token-classes* :test #'equalp :key #'car))))
|
|
(if (not (null operator-token))
|
|
(make-instance operator-token :source source)
|
|
(cond ((every #'digit-char-p text)
|
|
(make-instance 'token-number :source source
|
|
:value (parse-integer text)))
|
|
((member text *syntax-keywords* :test #'equalp)
|
|
(make-instance 'token-keyword :source source
|
|
:name (intern (string-upcase text) (find-package '#:keyword))))
|
|
(t (make-instance 'token-name :source source :text text))))))
|
|
|
|
(defun tokenize (stream &optional source)
|
|
(let ((token-text-buffer (make-array 32 :element-type 'character :fill-pointer 0 :adjustable t))
|
|
(tokens '())
|
|
(new-token-p t)
|
|
(token-source (cons source 0)))
|
|
(labels
|
|
((next-token ()
|
|
(push (text-to-token (copy-seq token-text-buffer) token-source) tokens)
|
|
(setf (fill-pointer token-text-buffer) 0
|
|
new-token-p t
|
|
token-source (cons source (file-position stream)))))
|
|
(loop :for char := (read-char stream)
|
|
:for next := (peek-char nil stream nil :eof)
|
|
:until (eql :eof next)
|
|
:do
|
|
(cond ((and (not new-token-p) (whitespacep char))
|
|
(next-token))
|
|
((char= char #\Newline)
|
|
;; this will be different later
|
|
(unless (zerop (length token-text-buffer))
|
|
(next-token))
|
|
(vector-push #\Newline token-text-buffer)
|
|
(next-token))
|
|
((not (whitespacep char))
|
|
(vector-push char token-text-buffer)
|
|
(setf new-token-p nil)
|
|
;; Check if we should end here based on what's next
|
|
(when (member next *special-token-chars*)
|
|
(next-token))
|
|
;; Or if we *are* a special one-off character (that's a different set)
|
|
(when (member char *single-token-chars*)
|
|
(next-token))))
|
|
:finally (next-token)))
|
|
(nreverse tokens)))
|
|
|
|
;;; Jigs
|
|
|
|
(defparameter *tokens-no-space-before*
|
|
'(token-comma
|
|
token-open-paren
|
|
token-close-paren
|
|
token-end-of-statement)
|
|
"Token classes which do not allow a space before being reprinted.")
|
|
|
|
(defparameter *tokens-no-space-after*
|
|
'(token-end-of-statement
|
|
token-open-paren)
|
|
"Token classes which do not add a space after being reprinted.")
|
|
|
|
(defparameter *reprint-indent-size* 4
|
|
"The amount of spaces that an indentation level adds to its line.")
|
|
|
|
(defun reprint-from-tokens (tokens)
|
|
(let ((indent-size 4))
|
|
(loop :with indent := 0
|
|
:with line-empty-p := t
|
|
:for token :in tokens
|
|
:for tokens-left := tokens :then (cdr tokens-left)
|
|
:for next-token := (second tokens-left)
|
|
;; Pre-token style actions
|
|
:do (when (and (typep token 'token-keyword)
|
|
(member (name token) '(:end :else)))
|
|
(decf indent indent-size))
|
|
;; Printing action
|
|
:do (when line-empty-p
|
|
(loop :repeat indent :do (write-char #\Space))
|
|
;;(format t "(~D)" indent)
|
|
(setf line-empty-p nil))
|
|
:do (let ((token-string
|
|
(cond ((typep token 'token-keyword)
|
|
(format nil "~A" (name token)))
|
|
((typep token 'token-name)
|
|
(text token))
|
|
((typep token 'token-number)
|
|
(format nil "~D" (value token)))
|
|
(t (car (find (type-of token) *operator-token-classes* :key #'cadr))))))
|
|
(format t "~A~A" token-string
|
|
(if (or (typep token `(or ,@*tokens-no-space-after*))
|
|
(typep next-token `(or ,@*tokens-no-space-before*)))
|
|
""
|
|
" "))
|
|
(when (typep token 'token-end-of-statement)
|
|
(setf line-empty-p t)))
|
|
;; Post-token style actions
|
|
:do (when (and (typep token 'token-keyword)
|
|
(member (name token) '(:times :then :else)))
|
|
(incf indent indent-size)))))
|