Wintus
3/15/2011 - 8:11 AM

H Tokenizer

H Tokenizer

(use srfi-13)

(define (token type data)
  (cons type data))

(define (normalize source)
  (let loop ([acc ()]
	     [dat (string-split source #\newline)])
    (cond [(null? dat)
	   (string-join (reverse acc) #\newline)]
	  [(string-null? (car dat))
	   (loop acc (cdr dat))]
	  [else
	   (loop (cons (car dat) acc) (cdr dat))])))

(define (tokenize source)
  (if (null? source)
      (list (token 'EOF ()))
      (if (char-numeric? (car source))
	  (tokenize-number source)
	  (let* ([c    (car source)]
		 [type (cond [(char-set-contains? #[slr] c) 'NATIVE]
			     [(char-lower-case? c) 'PARAMETER]
			     [(char-upper-case? c) 'PROCNAME]
			     [(char=? c #\() 'LPAREN]
			     [(char=? c #\)) 'RPAREN]
			     [(char=? c #\:) 'COLON]
			     [(char=? c #\,) 'COMMA]
			     [(char=? c #\-) 'MINUS]
			     [(char=? c #\+) 'PLUS]
			     [(char=? c #\newline) 'ENDLINE])])
	    (cons (token type c) (tokenize (cdr source)))))))

(define (tokenize-number source)
  (let loop ([acc 0]
	     [source source])
    (if (char-numeric? (car source))
	(loop (+ (* acc 10) (digit->integer (car source)))
	      (cdr source))
	(cons (token 'NUMBER acc) (tokenize source)))))

;;; Tokenizer test
;(define s (string->list (normalize "a(X,Y):Ya(X-1,Y)\na(45,s)")))
;(for-each print (tokenize s))