(in-package #:user-side-compiler)

(defparameter *special-token-chars*
  '(#\+ #\-
    #\< #\>
    #\( #\)
    #\, #\.
    #\= #\!)
  "Characters that when encountered will finish up textual tokens and begin
the processing of a new token.")

(defparameter *single-token-chars*
  '(#\, #\.
    #\( #\))
  "Characters that are ALWAYS single-character tokens and will end their own
reading immediately. Should be a subset of *SPECIAL-TOKEN-CHARS*.")

(defclass token ()
  ((%source :accessor source :initarg :source)))

;; Tokens containing user data

(defclass token-name (token)
  ((%text :accessor text :initarg :text)))

(defclass token-number (token)
  ((%value :accessor value :initarg :value)))

;; Special syntax tokens

(defclass token-end-of-statement (token) ())
(defclass token-comma (token) ())
(defclass token-plus (token) ())
(defclass token-open-paren (token) ())
(defclass token-close-paren (token) ())
(defclass token-less-than (token) ())
(defclass token-greater-than (token) ())

;;; String->class-name mappings for operator tokens

(defparameter *operator-token-classes*
  `((,(string #\Newline) token-end-of-statement)
    (";" token-end-of-statement)
    ("," token-comma)
    ("+" token-plus)
    ("(" token-open-paren)
    (")" token-close-paren)
    ("<" token-less-than)
    (">" token-greater-than)))

;;; Keyword tokens

(defclass token-keyword (token)
  ((%name :accessor name :initarg :name)))

(defparameter *syntax-keywords*
  '("end" "for" "do" "times" "if" "then" "else"))

(defun whitespacep (char)
  (declare (type character char)
           (optimize (speed 3)))
  (not (null (member char '(#\Space #\Tab)))))

(defun text-to-token (text &optional source)
  (let ((operator-token (cadr (find text *operator-token-classes* :test #'equalp :key #'car))))
    (if (not (null operator-token))
        (make-instance operator-token :source source)
        (cond ((every #'digit-char-p text)
               (make-instance 'token-number :source source
                                            :value (parse-integer text)))
              ((member text *syntax-keywords* :test #'equalp)
               (make-instance 'token-keyword :source source
                              :name (intern (string-upcase text) (find-package '#:keyword))))
              (t (make-instance 'token-name :source source :text text))))))

(defun tokenize (stream &optional source)
  (let ((token-text-buffer (make-array 32 :element-type 'character :fill-pointer 0 :adjustable t))
        (tokens '())
        (new-token-p t)
        (token-source (cons source 0)))
    (labels
        ((next-token ()
           (push (text-to-token (copy-seq token-text-buffer) token-source) tokens)
           (setf (fill-pointer token-text-buffer) 0
                 new-token-p t
                 token-source (cons source (file-position stream)))))
      (loop :for char := (read-char stream)
            :for next := (peek-char nil stream nil :eof)
            :until (eql :eof next)
            :do
               (cond ((and (not new-token-p) (whitespacep char))
                      (next-token))
                     ((char= char #\Newline)
                      ;; this will be different later
                      (unless (zerop (length token-text-buffer))
                        (next-token))
                      (vector-push #\Newline token-text-buffer)
                      (next-token))
                     ((not (whitespacep char))
                      (vector-push char token-text-buffer)
                      (setf new-token-p nil)
                      ;; Check if we should end here based on what's next
                      (when (member next *special-token-chars*)
                        (next-token))
                      ;; Or if we *are* a special one-off character (that's a different set)
                      (when (member char *single-token-chars*)
                        (next-token))))
            :finally (next-token)))
    (nreverse tokens)))

;;; Jigs

(defparameter *tokens-no-space-before*
  '(token-comma
    token-open-paren
    token-close-paren
    token-end-of-statement)
  "Token classes which do not allow a space before being reprinted.")

(defparameter *tokens-no-space-after*
  '(token-end-of-statement
    token-open-paren)
  "Token classes which do not add a space after being reprinted.")

(defparameter *reprint-indent-size* 4
  "The amount of spaces that an indentation level adds to its line.")

(defun reprint-from-tokens (tokens)
  (let ((indent-size 4))
    (loop :with indent := 0
          :with line-empty-p := t
          :for token :in tokens
          :for tokens-left := tokens :then (cdr tokens-left)
          :for next-token := (second tokens-left)
          ;; Pre-token style actions
          :do (when (and (typep token 'token-keyword)
                         (member (name token) '(:end :else)))
                (decf indent indent-size))
              ;; Printing action
          :do (when line-empty-p
                (loop :repeat indent :do (write-char #\Space))
                ;;(format t "(~D)" indent)
                (setf line-empty-p nil))
          :do (let ((token-string
                      (cond ((typep token 'token-keyword)
                             (format nil "~A" (name token)))
                            ((typep token 'token-name)
                             (text token))
                            ((typep token 'token-number)
                             (format nil "~D" (value token)))
                            (t (car (find (type-of token) *operator-token-classes* :key #'cadr))))))
                (format t "~A~A" token-string
                        (if (or (typep token `(or ,@*tokens-no-space-after*))
                                (typep next-token `(or ,@*tokens-no-space-before*)))
                            ""
                            " "))
                (when (typep token 'token-end-of-statement)
                  (setf line-empty-p t)))
              ;; Post-token style actions
          :do (when (and (typep token 'token-keyword)
                         (member (name token) '(:times :then :else)))
                (incf indent indent-size)))))