mingliangguo
5/21/2013 - 9:53 AM

HTML语法分析器模型

HTML语法分析器模型

    function Element(token){
        for(var p in token) {
            this[p] = token[p];
        }
        this.childNodes = [];
    }
    function Text(value){
        this.value = value || "";
    }
    function HTMLSyntaticalParser(){
        var stack = [new Element({name:"document"})];
        this.receiveInput = function(token) {
            if(token.constructor.name == "String")
            {
                if(stack[stack.length-1].constructor.name == "Text")
                    stack[stack.length-1].value += token;
                else
                {
                    stack[stack.length-1].childNodes.push(new Text(token));
                    stack.push(stack[stack.length-1].childNodes[stack[stack.length-1].childNodes.length-1]);
                }
            }
            else if(stack.length>0 && stack[stack.length-1].constructor.name == "Text")
            {
                stack.pop();
            }

            if(token.constructor.name == "StartTagToken")
            {
                console.log(stack[stack.length-1]);
                stack[stack.length-1].childNodes.push(new Element(token));
                stack.push(stack[stack.length-1].childNodes[stack[stack.length-1].childNodes.length-1]);
            }
            else if(token.constructor.name == "EndTagToken")
            {
                stack.pop();
            }

        }
        this.getOutput = function(){
          return stack[0];
        }
    }
    function StartTagToken(){
    }

    function EndTagToken(){
    }

    function Attribute(){
    }


    function HTMLLexicalParser(syntaxer){

        function emitToken(token){
            syntaxer.receiveInput(token);
        }

        function error(){
            console.log("error");
        }



        
        var token;
        var attribute;

        //function consumeReference();

        //状态函数们……
        var dataState = function dataState(c){
            if(c=="<") {
                return tagOpenState;
            }
            else {
                emitToken(c);
                return dataState;
            }
        };
        var tagOpenState = function tagOpenState(c){
            if(c=="/") {
                return endTagOpenState;
            }
            if(c.match(/[A-Z]/)) {
                token = new StartTagToken();
                token.name = c.toLowerCase();
                return tagNameState;
            }
            if(c.match(/[a-z]/)) {
                token = new StartTagToken();
                token.name = c;
                return tagNameState;
            }
            if(c=="?") {
                return bogusCommentState;
            }
            else {
                error();
                return dataState;
            }
        };
        var endTagOpenState = function endTagOpenState(c){
            if(c.match(/[A-Z]/)) {
                token = new EndTagToken();
                token.name = c.toLowerCase();
                return tagNameState;
            }
            if(c.match(/[a-z]/)) {
                token = new EndTagToken();
                token.name = c;
                return tagNameState;
            }
            if(c==">") {
                error();
                return dataState;
            }
            else {
                error();
                return bogusCommentState;
            }
        };
        var tagNameState = function tagNameState(c) {
            
            if(c.match(/[\t \f\n]/)) {
                return beforeAttributeNameState;
            }
            if(c=="/") {
                return selfClosingStartTagState;
            }
            if(c == ">") {
                emitToken(token);
                return dataState;
            }
            if(c.match(/[a-z]/))
            {
                token.name += c.toLowerCase();
                return tagNameState;
            }
        }
        var beforeAttributeNameState = function beforeAttributeNameState(c) {
            if(c.match(/[\t \f\n]/)) {
                return beforeAttributeNameState;
            }
            if(c=="/") {
                return selfClosingStartTagState;
            }


            if(c == ">") {
                emitToken(token);
                return dataState;
            }
            if(c.match(/[a-z]/))
            {
                attribute = new Attribute();
                attribute.name = c.toLowerCase();
                attribute.value = "";
                return attributeNameState;
            }
            if(c=="\"" || c=="'" || c=="<" || c=="\"") {
                error();
            }
            else {
                attribute = new Attribute();
                attribute.name = c;
                attribute.value = "";
                return attributeNameState;
            }
        }
        var attributeNameState = function attributeNameState(c) {
            if(c=="/") {
                token[attribute.name] = attribute.value;
                return selfClosingStartTagState;
            }
            if(c=="/") {
                token[attribute.name] = attribute.value;
                return selfClosingStartTagState;
            }
            if(c=="=") {
                return beforeAttributeValueState;
            }
            if(c.match(/[\t \f\n]/)) {
                return afterAttributeNameState;
            }
            if(c.match(/[A-Z]/))
            {
                attribute.name += c.toLowerCase();
                return attributeNameState;
            }
            else {
                attribute.name += c;
                return attributeNameState;
            }
        }

        var afterAttributeNameState = function afterAttributeNameState(c) {
            if(c=="/") {
                token[attribute.name] = attribute.value;
                return selfClosingStartTagState;
            }
            if(c=="/") {
                token[attribute.name] = attribute.value;
                return selfClosingStartTagState;
            }
            if(c=="=") {
                return beforeAttributeValueState;
            }
            if(c.match(/[\t \f\n]/)) {
                return afterAttributeNameState;
            }
            if(c.match(/[A-Z]/))
            {
                attribute = new Attribute();
                attribute.name = c.toLowerCase();
                attribute.value = "";
                
                return attributeNameState;
            }
            else {
                attribute = new Attribute();
                attribute.name = c;
                attribute.value = "";
                return attributeNameState;
            }
        }

        var beforeAttributeValueState = function beforeAttributeValueState(c) {

            if(c=="\"") {
                return attributeValueDoubleQuotedState;
            }
            if(c=="\'") {
                return attributeValueSingleQuotedState;
            }

            if(c.match(/[\t \f\n]/)) {
                return beforeAttributeValueState;
            }
            else {
                attribute.value += c;
                return attributeValueUnquotedState;
            }
        }
        var attributeValueDoubleQuotedState = function attributeValueDoubleQuotedState(c) {
            if(c=="\"") {
                token[attribute.name] = attribute.value;
                return beforeAttributeNameState;
            }
            else {
                attribute.value += c;
                return attributeValueDoubleQuotedState;
            }
        }

        var attributeValueSingleQuotedState = function attributeValueSingleQuotedState(c) {
            if(c=="\'") {
                token[attribute.name] = attribute.value;
                return beforeAttributeNameState;
            }
            else {
                attribute.value += c;
                return attributeValueSingleQuoted;
            }
        }

        var attributeValueUnquotedState = function attributeValueUnquotedState(c) {
            if(c.match(/[\t \f\n]/)) {
                token[attribute.name] = attribute.value;
                return beforeAttributeNameState;
            }
            else {
                attribute.value += c;
                return attributeValueUnquoted;
            }
        }
        var selfClosingStartTagState = function selfClosingStartTagState(c) {
            if(c == ">") {
                emitToken(token);
                
                endToken = new EndTagToken();
                endToken.name = token.name;
                emitToken(endToken);
                return dataState;
            }
        }
        var bogusCommentState = function bogusCommentState(c) {
            if(c == ">") {
                return dataState;
            }
            else {
                return bogusCommentState;
            }
        }


        var state = dataState;
        this.receiveInput = function(char) {
            //visualizer.visualize(state.name);
            state = state(char);
        }
        this.reset = function(){
            state = dataState;
        }
    }