实现一个Number,float的词法解析器

javaG

浏览: 557490 次
性别:
来自: 深圳

最近访客更多访客>>

sa360430

TYOUKAI_

jack1007

250516230

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

编译原理

Number

package compiler.lexer;

import java.text.CharacterIterator;
import java.text.StringCharacterIterator;

public class NumberFloat
{
    static enum Type{
        Number,Float
    }
    private  StringCharacterIterator iter = null;
    private char currentChar;
    private String currentToken="";
    
    /**
     * @param iter
     */
    public NumberFloat(String sourceCode)
    {
        this.iter = new StringCharacterIterator(sourceCode);
        this.currentChar = this.iter.first();
    }
    
    public char nextChar() 
    {
        add(this.currentChar);
        this.currentChar = iter.next();
        
        return currentChar;
    }

    public Token parse()
    {
        // 1
        if (this.currentChar == '+' || this.currentChar == '-')
        {
            this.nextChar();
            // 2
            if (isDigit())
            {
                this.nextChar();
                // 3
                digitClosure();
                if (isDone())
                {
                    return new Token(Token.Number, this.currentToken);
                }
                // 4
                else if (isPoint())
                {
                    this.nextChar();
                    // 5
                    if (isDigit())
                    {
                        this.nextChar();
                        digitClosure();
                        if (isDone())
                        {
                            return new Token(Token.Float, this.currentToken);
                        }
                    }
                }
            }

        }
        // 1
        else if (isDigit())
        {
            this.nextChar();
            // 3
            digitClosure();
            if (isDone())
            {
                return new Token(Token.Number, this.currentToken);
            }
            // 4
            else if (isPoint())
            {
                this.nextChar();
                // 5
                if (isDigit())
                {
                    this.nextChar();
                    digitClosure();
                    if (isDone())
                    {
                        return new Token(Token.Float, this.currentToken);
                    }
                }
            }
        }
        System.out.println("error char at [" + this.currentChar + "].current parsed code:[" + this.currentToken+"]");
        return null;
    }

    private void digitClosure()
    {
        while (true)
        {   
            if (isDigit()) 
            {
                this.nextChar();
                continue;
            }    
            else
                break;
        }
    }
    
    private boolean isPoint()
    {
        return this.currentChar == '.';
    }

    private boolean isDigit()
    {
        return Character.isDigit(this.currentChar);
    }
    
    private boolean isDone()
    {
        return this.currentChar == CharacterIterator.DONE;
    }
    
    private void add(char c) 
    {
        this.currentToken += c;
    }
    
    private void clear() 
    {
        this.currentToken = "";
    }
    
    public static void main(String[] args)
    {
        System.out.println(new NumberFloat("100").parse());
        System.out.println(new NumberFloat("+100").parse());
        System.out.println(new NumberFloat("-100").parse());
        System.out.println(new NumberFloat("100.1").parse());
        System.out.println(new NumberFloatV2("100.1.").parse());
//        StringCharacterIterator iter = new StringCharacterIterator("100");
//        for(char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
//            System.out.println(c);            
//        }
    }
}

package compiler.lexer;

import java.text.CharacterIterator;
import java.text.StringCharacterIterator;

/**
 * 
 * 输入: <br/>
 * 输出: <br/>
 * 描述: 区别于NumberFloat,这里是使用了DFA的节点编号来实现,这种实现方式的好处是直接按照图来翻译成代码非常方便<br/>
 * 用法: <br/>
 *
 */
public class NumberFloatV2
{
    static enum Type{
        Number,Float
    }
    private  StringCharacterIterator iter = null;
    private char currentChar;
    private String currentToken="";
    private int nextNo4DFA = 1;
    private String copySourceCode;
    
    /**
     * @param iter
     */
    public NumberFloatV2(String sourceCode)
    {
        this.iter = new StringCharacterIterator(sourceCode);
        this.copySourceCode = sourceCode;
    }
    
    public char nextChar() 
    {
        add(this.currentChar);
        this.currentChar = iter.next();
        
        return currentChar;
    }

    public Token parse()
    {
        this.currentChar = iter.first();
        // 1
        if (this.currentChar == '+' || this.currentChar == '-')
        {
           
           this.nextNo4DFA = 2;
        }
        else if(isDigit()) 
        {
            this.nextNo4DFA = 3;
        }
        
        //2
        if(this.nextNo4DFA == 2) 
        {
            this.nextChar();
            if(isDigit()) 
            {
                this.nextNo4DFA = 3;
            }
        }
        
        //3
        if(this.nextNo4DFA == 3) 
        {
            this.nextChar();
            digitClosure();
            if(isDone()) 
            {
                return new Token(Token.Number, this.currentToken);
            }
            else if(isPoint()) 
            {
                this.nextNo4DFA = 4;
            }
        }
        
        //4
        if(this.nextNo4DFA == 4) 
        {
            this.nextChar();
            if(isDigit()) 
            {
                this.nextNo4DFA = 5;
            }
        }
        
        //5
        if(this.nextNo4DFA == 5) 
        {
            this.nextChar();
            digitClosure();
            if(isDone()) 
            {
                return new Token(Token.Float, this.currentToken);
            }
        }
        System.out.println("error char at [" + this.currentChar + "].current parsed code:[" + this.currentToken+"] sourceCode:["+this.copySourceCode+"]");
        return null;
    }

    private void digitClosure()
    {
        while (true)
        {   
            if (isDigit()) 
            {
                this.nextChar();
                continue;
            }    
            else
                break;
        }
    }
    
    private boolean isPoint()
    {
        return this.currentChar == '.';
    }

    private boolean isDigit()
    {
        return Character.isDigit(this.currentChar);
    }
    
    private boolean isDone()
    {
        return this.currentChar == CharacterIterator.DONE;
    }
    
    private void add(char c) 
    {
        this.currentToken += c;
    }
    
    private void clear() 
    {
        this.currentToken = "";
    }
    
    public static void main(String[] args)
    {
        System.out.println(new NumberFloatV2("100").parse());
        System.out.println(new NumberFloatV2("+100").parse());
        System.out.println(new NumberFloatV2("-100").parse());
        System.out.println(new NumberFloatV2("100.1").parse());
        System.out.println(new NumberFloatV2("100.1.").parse());
//        StringCharacterIterator iter = new StringCharacterIterator("100");
//        for(char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
//            System.out.println(c);            
//        }
    }
}

package compiler.lexer;
public class Token
{
    
    public static final int Number = 1;
    public static final int Float = 2;
    
    
    /**
     * @param type
     * @param name
     */
    public Token(int type, String name)
    {
        super();
        this.type = type;
        Name = name;
    }
    private int type;
    private String Name;
    /**
     * @return the type
     */
    public int getType()
    {
        return type;
    }
    /**
     * @param type the type to set
     */
    public void setType(int type)
    {
        this.type = type;
    }
    /**
     * @return the name
     */
    public String getName()
    {
        return Name;
    }
    /**
     * @param name the name to set
     */
    public void setName(String name)
    {
        Name = name;
    }
    /* (non-Javadoc)
     * @see java.lang.Object#toString()
     */
    @Override
    public String toString()
    {
        return "Token {Name=[" + Name + "], type=[" + type + "]}";   }

}

package compiler.lexer;

import java.text.CharacterIterator;
import java.text.StringCharacterIterator;

/**
 * 
 * 输入: <br/>
 * 输出: <br/>
 * 描述: 区别于NumberFloatV2,这里是使用循环来实现,这种实现方式的好处是如果存在节点回退的情况也是可以处理的,例如从state=5编程state=2<br/>
 * 用法: <br/>
 * 
 */
public class NumberFloatV3
{
    static enum Type
    {
        Number, Float
    }

    private StringCharacterIterator iter = null;
    private char currentChar;
    private String currentToken = "";
    private int state = 1;
    private String copySourceCode;

    /**
     * @param iter
     */
    public NumberFloatV3(String sourceCode)
    {
        this.iter = new StringCharacterIterator(sourceCode);
        this.copySourceCode = sourceCode;
        this.currentChar = iter.first();
    }

    public char nextChar()
    {
        add(this.currentChar);
        this.currentChar = iter.next();

        return currentChar;
    }

    public Token parse()
    {
        boolean isError = false;
        while (true)
        {
            switch (this.state)
            {
            case 1:
                if (this.currentChar == '+' || this.currentChar == '-')
                {
                    this.state = 2;
                }
                else if (isDigit())
                {
                    this.state = 3;
                }
                else
                    isError = true;
                break;
            case 2:
                if (isDigit())
                {
                    this.state = 3;
                }
                else
                    isError = true;
                break;
            case 3:
                digitStar();
                if (isDone())
                {
                    return new Token(Token.Number, this.currentToken);
                }
                else if (isPoint())
                {
                    this.state = 4;
                }
                else
                    isError = true;
                break;
            case 4:
                if (isDigit())
                {
                    this.state = 5;
                }
                else
                    isError = true;
                break;
            case 5:
                digitStar();
                if (isDone())
                {
                    return new Token(Token.Float, this.currentToken);
                }
                else
                    isError = true;
                break;

            default:
                break;
            }
            if(isError)
                break;
            this.nextChar();
        }
        System.out.println("error char at [" + this.currentChar + "] in parsed code:[" + this.currentToken
                + this.currentChar + "]");
        return null;
    }

    private boolean digitStar()
    {
        boolean isFound = false;
        while (true)
        {
            if (isDigit())
            {
                this.nextChar();
                isFound = true;
                continue;
            }
            else
                break;
        }
        return isFound;
    }

    private boolean isPoint()
    {
        return this.currentChar == '.';
    }

    private boolean isDigit()
    {
        return Character.isDigit(this.currentChar);
    }

    private boolean isDone()
    {
        return this.currentChar == CharacterIterator.DONE;
    }

    private void add(char c)
    {
        this.currentToken += c;
    }

    private void clear()
    {
        this.currentToken = "";
    }

    public static void main(String[] args)
    {
        System.out.println(new NumberFloatV3("100").parse());
        System.out.println(new NumberFloatV3("+100").parse());
        System.out.println(new NumberFloatV3("-100").parse());
        System.out.println(new NumberFloatV3("100.1").parse());
        System.out.println(new NumberFloatV3("100.1.").parse());
        // StringCharacterIterator iter = new StringCharacterIterator("100");
        // for(char c = iter.first(); c != CharacterIterator.DONE; c =
        // iter.next()) {
        // System.out.println(c);
        // }
    }
}

package compiler.lexer;

import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
import java.util.ArrayList;
import java.util.List;

import org.eclipse.jdt.core.dom.ThisExpression;

/**
 * 
 * 输入: <br/>
 * 输出: <br/>
 * 描述: 区别于NumberFloatV3,这里功能新增加为识别多个token<br/>
 * 用法: <br/>
 * 
 */
public class NumberFloatV4
{
    static enum Type
    {
        Number, Float
    }

    private StringCharacterIterator iter = null;
    private char currentChar;
    private String currentToken = "";
    private int state = 1;
    private String copySourceCode;

    /**
     * @param iter
     */
    public NumberFloatV4(String sourceCode)
    {
        this.iter = new StringCharacterIterator(sourceCode);
        this.copySourceCode = sourceCode;
        this.currentChar = iter.first();
    }

    public char nextChar()
    {
        if (this.currentChar != ' ')
            add(this.currentChar);
        this.currentChar = iter.next();

        return currentChar;
    }

    public List<Token> parse()
    {
        List<Token> list = new ArrayList<Token>();
        boolean isError = false;
        while (true)
        {
            //过滤空格,在识别
            if(this.currentToken.length() == 0) 
            {
                blankStar();
                if(isEnd())
                    break;
            }    
            switch (this.state)
            {
            case 1:
                if (this.currentChar == '+' || this.currentChar == '-')
                {
                    this.state = 2;
                }
                else if (isDigit())
                {
                    this.state = 3;
                }
                else
                    isError = true;
                break;
            case 2:
                if (isDigit())
                {
                    this.state = 3;
                }
                else
                    isError = true;
                break;
            case 3:
                digitStar();
                if (isDone())
                {
                    addToken(list,Token.Number);
                    break;
                }
                else if (isPoint())
                {
                    this.state = 4;
                }
                else
                    isError = true;
                break;
            case 4:
                if (isDigit())
                {
                    this.state = 5;
                }
                else
                    isError = true;
                break;
            case 5:
                digitStar();
                if (isDone())
                {
                    addToken(list,Token.Float);
                    break;
                }
                else
                    isError = true;
                break;

            default:
                break;
            }
            if(isError || isEnd())
                break;
            this.nextChar();
        }
        if (isError)
            System.out.println("error char at [" + this.currentChar + "] in parsed code:[" + this.currentToken
                    + this.currentChar + "]");
        return list;
    }

    private void addToken(List<Token> list,int type)
    {
        list.add(new Token(type, this.currentToken));

        this.state = 1;
        this.currentToken = "";
    }
    
    private boolean blankStar() 
    {
        boolean isFound = false;
        while (true)
        {
            if (isBlank())
            {
                this.nextChar();
                isFound = true;
                continue;
            }
            else
                break;
        }
        return isFound;
    }

    private boolean digitStar()
    {
        boolean isFound = false;
        while (true)
        {
            if (isDigit())
            {
                this.nextChar();
                isFound = true;
                continue;
            }
            else
                break;
        }
        return isFound;
    }
    
    private boolean isBlank()
    {
        return this.currentChar == ' ';
    }

    private boolean isPoint()
    {
        return this.currentChar == '.';
    }

    private boolean isDigit()
    {
        return Character.isDigit(this.currentChar);
    }

    private boolean isDone()
    {
        boolean isDone =  isEnd() || isBlank();
        return isDone;
    }

    private boolean isEnd()
    {
        return this.currentChar == CharacterIterator.DONE;
    }

    private void add(char c)
    {
        this.currentToken += c;
    }

    private void clear()
    {
        this.currentToken = "";
    }

    public static void main(String[] args)
    {
        System.out.println(new NumberFloatV4("100").parse());
        System.out.println(new NumberFloatV4("+100").parse());
        System.out.println(new NumberFloatV4("-100").parse());
        System.out.println(new NumberFloatV4("100.1").parse());
        System.out.println(new NumberFloatV4("100.1.").parse());
        System.out.println(new NumberFloatV4("   100.1 100 +100 -1.100 ").parse());
        // StringCharacterIterator iter = new StringCharacterIterator("100");
        // for(char c = iter.first(); c != CharacterIterator.DONE; c =
        // iter.next()) {
        // System.out.println(c);
        // }
    }
}