(in-package #:user-side-compiler) (defparameter *special-token-chars* '(#\+ #\- #\< #\> #\( #\) #\, #\. #\= #\!) "Characters that when encountered will finish up textual tokens and begin the processing of a new token.") (defparameter *single-token-chars* '(#\, #\. #\( #\)) "Characters that are ALWAYS single-character tokens and will end their own reading immediately. Should be a subset of *SPECIAL-TOKEN-CHARS*.") (defclass token () ((%source :accessor source :initarg :source))) ;; Tokens containing user data (defclass token-name (token) ((%text :accessor text :initarg :text))) (defclass token-number (token) ((%value :accessor value :initarg :value))) ;; Special syntax tokens (defclass token-end-of-statement (token) ()) (defclass token-comma (token) ()) (defclass token-plus (token) ()) (defclass token-open-paren (token) ()) (defclass token-close-paren (token) ()) (defclass token-less-than (token) ()) (defclass token-greater-than (token) ()) ;;; String->class-name mappings for operator tokens (defparameter *operator-token-classes* `((,(string #\Newline) token-end-of-statement) (";" token-end-of-statement) ("," token-comma) ("+" token-plus) ("(" token-open-paren) (")" token-close-paren) ("<" token-less-than) (">" token-greater-than))) ;;; Keyword tokens (defclass token-keyword (token) ((%name :accessor name :initarg :name))) (defparameter *syntax-keywords* '("end" "for" "do" "times" "if" "then" "else")) (defun whitespacep (char) (declare (type character char) (optimize (speed 3))) (not (null (member char '(#\Space #\Tab))))) (defun text-to-token (text &optional source) (let ((operator-token (cadr (find text *operator-token-classes* :test #'equalp :key #'car)))) (if (not (null operator-token)) (make-instance operator-token :source source) (cond ((every #'digit-char-p text) (make-instance 'token-number :source source :value (parse-integer text))) ((member text *syntax-keywords* :test #'equalp) (make-instance 'token-keyword :source source :name (intern (string-upcase text) (find-package '#:keyword)))) (t (make-instance 'token-name :source source :text text)))))) (defun tokenize (stream &optional source) (let ((token-text-buffer (make-array 32 :element-type 'character :fill-pointer 0 :adjustable t)) (tokens '()) (new-token-p t) (token-source (cons source 0))) (labels ((next-token () (push (text-to-token (copy-seq token-text-buffer) token-source) tokens) (setf (fill-pointer token-text-buffer) 0 new-token-p t token-source (cons source (file-position stream))))) (loop :for char := (read-char stream) :for next := (peek-char nil stream nil :eof) :until (eql :eof next) :do (cond ((and (not new-token-p) (whitespacep char)) (next-token)) ((char= char #\Newline) ;; this will be different later (unless (zerop (length token-text-buffer)) (next-token)) (vector-push #\Newline token-text-buffer) (next-token)) ((not (whitespacep char)) (vector-push char token-text-buffer) (setf new-token-p nil) ;; Check if we should end here based on what's next (when (member next *special-token-chars*) (next-token)) ;; Or if we *are* a special one-off character (that's a different set) (when (member char *single-token-chars*) (next-token)))) :finally (next-token))) (nreverse tokens))) ;;; Jigs (defparameter *tokens-no-space-before* '(token-comma token-open-paren token-close-paren token-end-of-statement) "Token classes which do not allow a space before being reprinted.") (defparameter *tokens-no-space-after* '(token-end-of-statement token-open-paren) "Token classes which do not add a space after being reprinted.") (defparameter *reprint-indent-size* 4 "The amount of spaces that an indentation level adds to its line.") (defun reprint-from-tokens (tokens) (let ((indent-size 4)) (loop :with indent := 0 :with line-empty-p := t :for token :in tokens :for tokens-left := tokens :then (cdr tokens-left) :for next-token := (second tokens-left) ;; Pre-token style actions :do (when (and (typep token 'token-keyword) (member (name token) '(:end :else))) (decf indent indent-size)) ;; Printing action :do (when line-empty-p (loop :repeat indent :do (write-char #\Space)) ;;(format t "(~D)" indent) (setf line-empty-p nil)) :do (let ((token-string (cond ((typep token 'token-keyword) (format nil "~A" (name token))) ((typep token 'token-name) (text token)) ((typep token 'token-number) (format nil "~D" (value token))) (t (car (find (type-of token) *operator-token-classes* :key #'cadr)))))) (format t "~A~A" token-string (if (or (typep token `(or ,@*tokens-no-space-after*)) (typep next-token `(or ,@*tokens-no-space-before*))) "" " ")) (when (typep token 'token-end-of-statement) (setf line-empty-p t))) ;; Post-token style actions :do (when (and (typep token 'token-keyword) (member (name token) '(:times :then :else))) (incf indent indent-size)))))