View Javadoc

1   package simpledb.parse;
2   
3   import java.util.*;
4   import java.io.*;
5   
6   /**
7    * The lexical analyzer.
8    * @author Edward Sciore
9    */
10  public class Lexer {
11     private Collection<String> keywords;
12     private StreamTokenizer tok;
13     
14     /**
15      * Creates a new lexical analyzer for SQL statement s.
16      * @param s the SQL statement
17      */
18     public Lexer(String s) {
19        initKeywords();
20        tok = new StreamTokenizer(new StringReader(s));
21        tok.ordinaryChar('.');
22        tok.lowerCaseMode(true); //ids and keywords are converted
23        nextToken();
24     }
25     
26  //Methods to check the status of the current token
27     
28     /**
29      * Returns true if the current token is
30      * the specified delimiter character.
31      * @param d a character denoting the delimiter
32      * @return true if the delimiter is the current token
33      */
34     public boolean matchDelim(char d) {
35        return d == (char)tok.ttype;
36     }
37     
38     /**
39      * Returns true if the current token is an integer.
40      * @return true if the current token is an integer
41      */
42     public boolean matchIntConstant() {
43        return tok.ttype == StreamTokenizer.TT_NUMBER;
44     }
45     
46     /**
47      * Returns true if the current token is a string.
48      * @return true if the current token is a string
49      */
50     public boolean matchStringConstant() {
51        return '\'' == (char)tok.ttype;
52     }
53     
54     /**
55      * Returns true if the current token is the specified keyword.
56      * @param w the keyword string
57      * @return true if that keyword is the current token
58      */
59     public boolean matchKeyword(String w) {
60        return tok.ttype == StreamTokenizer.TT_WORD && tok.sval.equals(w);
61     }
62     
63     /**
64      * Returns true if the current token is a legal identifier.
65      * @return true if the current token is an identifier
66      */
67     public boolean matchId() {
68        return  tok.ttype==StreamTokenizer.TT_WORD && !keywords.contains(tok.sval);
69     }
70     
71  //Methods to "eat" the current token
72     
73     /**
74      * Throws an exception if the current token is not the
75      * specified delimiter. 
76      * Otherwise, moves to the next token.
77      * @param d a character denoting the delimiter
78      */
79     public void eatDelim(char d) {
80        if (!matchDelim(d))
81           throw new BadSyntaxException();
82        nextToken();
83     }
84     
85     /**
86      * Throws an exception if the current token is not 
87      * an integer. 
88      * Otherwise, returns that integer and moves to the next token.
89      * @return the integer value of the current token
90      */
91     public int eatIntConstant() {
92        if (!matchIntConstant())
93           throw new BadSyntaxException();
94        int i = (int) tok.nval;
95        nextToken();
96        return i;
97     }
98     
99     /**
100     * Throws an exception if the current token is not 
101     * a string. 
102     * Otherwise, returns that string and moves to the next token.
103     * @return the string value of the current token
104     */
105    public String eatStringConstant() {
106       if (!matchStringConstant())
107          throw new BadSyntaxException();
108       String s = tok.sval; //constants are not converted to lower case
109       nextToken();
110       return s;
111    }
112    
113    /**
114     * Throws an exception if the current token is not the
115     * specified keyword. 
116     * Otherwise, moves to the next token.
117     * @param w the keyword string
118     */
119    public void eatKeyword(String w) {
120       if (!matchKeyword(w))
121          throw new BadSyntaxException();
122       nextToken();
123    }
124    
125    /**
126     * Throws an exception if the current token is not 
127     * an identifier. 
128     * Otherwise, returns the identifier string 
129     * and moves to the next token.
130     * @return the string value of the current token
131     */
132    public String eatId() {
133       if (!matchId())
134          throw new BadSyntaxException();
135       String s = tok.sval;
136       nextToken();
137       return s;
138    }
139    
140    private void nextToken() {
141       try {
142          tok.nextToken();
143       }
144       catch(IOException e) {
145          throw new BadSyntaxException();
146       }
147    }
148    
149    private void initKeywords() {
150       keywords = Arrays.asList("select", "from", "where", "and",
151                                "insert", "into", "values", "delete", "update", "set", 
152                                "create", "table", "int", "varchar", "view", "as", "index", "on");
153    }
154 }