lnicola
6/27/2014 - 9:25 AM

Parser.cs

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;

namespace Parser
{
    enum TokenType
    {
        Add,
        Sub,
        Mul,
        Div,
        Identifier,
        Number,
        LBracket,
        RBracket,
        Eof
    }

    class Token
    {
        public TokenType TokenType { get; private set; }
        public string Identifier { get; private set; }

        public Token(TokenType tokenType, string identifier)
        {
            TokenType = tokenType;
            Identifier = identifier;
        }
    }

    abstract class Node
    {
    }

    class BinOpNode : Node
    {
        public TokenType TokenType { get; private set; }
        public Node Left { get; private set; }
        public Node Right { get; private set; }

        public BinOpNode(TokenType tokenType, Node left, Node right)
        {
            TokenType = tokenType;
            Left = left;
            Right = right;
        }
    }

    class IdentifierNode : Node
    {
        public string Identifier { get; private set; }

        public IdentifierNode(string identifier)
        {
            Identifier = identifier;
        }
    }

    class ConstantNode : Node
    {
        public string Constant { get; private set; }

        public ConstantNode(string constant)
        {
            Constant = constant;
        }
    }

    class Scanner
    {
        private TextReader textReader_;
        private Token token_;

        public Scanner(TextReader textReader)
        {
            textReader_ = textReader;

            GetToken();
        }

        public Token PeekToken()
        {
            return token_;
        }

        public Token GetToken()
        {
            var token = token_;
            token_ = ReadToken();
            return token;
        }

        private Token ReadToken()
        {
            var c = textReader_.Peek();
            if (c == -1)
                return new Token(TokenType.Eof, null);
            
            var ch = (char)c;
            while (Char.IsWhiteSpace(ch))
            {
                textReader_.Read();
                ch = (char)textReader_.Read();
            }

            switch (ch)
            {
                case '+':
                    textReader_.Read();
                    return new Token(TokenType.Add, ch.ToString());
                case '-':
                    textReader_.Read();
                    return new Token(TokenType.Sub, ch.ToString());
                case '*':
                    textReader_.Read();
                    return new Token(TokenType.Mul, ch.ToString());
                case '/':
                    textReader_.Read();
                    return new Token(TokenType.Div, ch.ToString());
                case '(':
                    textReader_.Read();
                    return new Token(TokenType.LBracket, "(");
                case ')':
                    textReader_.Read();
                    return new Token(TokenType.RBracket, ")");
            }

            var sb = new StringBuilder();
            if (Char.IsDigit(ch))
            {
                while (Char.IsDigit(ch))
                {
                    sb.Append(ch);
                    textReader_.Read();
                    ch = (char)textReader_.Peek();
                }

                return new Token(TokenType.Number, sb.ToString());
            }

            if (Char.IsLetter(ch))
            {
                while (Char.IsLetter(ch))
                {
                    sb.Append(ch);
                    textReader_.Read();
                    ch = (char)textReader_.Peek();
                }

                return new Token(TokenType.Identifier, sb.ToString());
            }

            throw new Exception(String.Format("Unexpected character: {0}", ch));
        }
    }

    class Parser
    {
        private Scanner scanner_;
        private Token token_;

        public Parser(TextReader textReader)
        {
            scanner_ = new Scanner(textReader);
        }

        // F = E | N | I
        public Node ParseFactor()
        {
            var token = scanner_.GetToken();
            if (token.TokenType == TokenType.LBracket)
            {
                var expression = ParseExpression();

                token = scanner_.GetToken();
                if (token.TokenType != TokenType.RBracket)
                    throw new Exception(String.Format("Expected ')', found {1}", token.TokenType));

                return expression;
            }

            if (token.TokenType == TokenType.Number)
                return new ConstantNode(token.Identifier);
            
            if (token.TokenType == TokenType.Identifier)
                return new IdentifierNode(token.Identifier);

            throw new Exception(String.Format("Unexpected token: {0}", token.TokenType));
        }

        // T = F [*|/ T]
        public Node ParseTerm()
        {
            var factor = ParseFactor();
            var token = scanner_.PeekToken();
            if (token.TokenType == TokenType.Mul || token.TokenType == TokenType.Div)
            {
                scanner_.GetToken();
                return new BinOpNode(token.TokenType, factor, ParseTerm());
            }

            return factor;
        }

        // E = T [+|- E]
        public Node ParseExpression()
        {
            var term = ParseTerm();
            var token = scanner_.PeekToken();
            if (token.TokenType == TokenType.Add || token.TokenType == TokenType.Sub)
            {
                scanner_.GetToken();
                return new BinOpNode(token.TokenType, term, ParseTerm());
            }
            
            return term;
        }
    }

    class Program
    {
        static void Main(string[] args)
        {
            using (var textReader = new StringReader("100 * 2 + 4 + 5"))
            {
                var parser = new Parser(textReader);
                var expression = parser.ParseExpression();
            }
        }
    }
}