peddamat
5/9/2013 - 1:06 PM

step1.py

#****************************************************************************
 # ECE468 - Introduction to Compilers and Translation Engineering [Fall 2005]
 # 
 # A simple compiler for the LITTLE programming language.  (Step 1)
 # 
 # Dependencies:
 #   - Python Lex-Yacc : http://www.dabeaz.com/ply/
 #
 # Released as public domain code.
 # 
 # Sumanth Peddamatham  [peddamat@gmail.com]
 # bafoontecha.  2008
 #
 ##

import sys
import lex

reserved = ( 'PROGRAM', 'TO', 'FLOAT', 'FOR', 'FROM', 'READ', 'WRITE', 'INT', 'RETURN', 'FUNCTION',
    'WHILE', 'IF', 'ELSE', 'VOID', 'BY', 'CALL')

tokens = reserved + ( 'ID', 'INTLITERAL', 'FLOATLITERAL', 'SEMI', 'COMMA', 'LPAREN', 'RPAREN', 
    'RCURLY', 'LCURLY', 'ASSIGN_OP', 'LT', 'GT', 'EQ', 'ADD_OP', 'MINUS_OP', 'DIV_OP',
    'MULT_OP', 'BLANK' )

# Operators
t_ADD_OP           = r'\+'
t_MINUS_OP         = r'-'
t_MULT_OP          = r'\*'
t_DIV_OP           = r'/'
t_LT               = r'<'
t_GT               = r'>'
t_EQ               = r'=='

# Assignment operators
t_ASSIGN_OP        = r'='

# Delimiters
t_SEMI             = r';'
t_COMMA            = r','
t_LPAREN           = r'\('
t_RPAREN           = r'\)'
t_LCURLY           = r'\{'
t_RCURLY           = r'\}'

# Integer literal
t_INTLITERAL        = r'[0-9]+'

# Float literal
t_FLOATLITERAL      = r'[0-9]+\.[0-9]*|\.[0-9]+'

# Ignored characters
#t_ignore = ' \t'
t_BLANK             = r'[ \t]'

def t_ID(t):
    r'[A-Za-z_][A-Za-z_0-9]*'
    t.type = reserved_map.get(t.value,"ID")
    return t

def t_newline(t):
    r'\n+'
    t.lineno += len(t.value)

def t_error(t):
    print "%s<-UNRECOGNIZED" % (t.value)
    t.skip(1)

# Comments
def t_comment(t):
    r'"--"[^.\n]*'
    t.lineno += t.value.count('\n')

# Create a dictionary for reserved characters
reserved_map = { }
for r in reserved:
    reserved_map[r.lower()] = r

# Instantiate Lexer object
lex.lex()

# Open file for reading
filename = sys.argv[1]
f = open(filename)
data = f.read()
f.close()

lex.input(data)

while 1:
    tok = lex.token()
    if not tok: break
    print "%s<-%s" % (tok.value, tok.type)