Mercurial > hg > Members > kono > PLparser
changeset 9:29e309b2f624
Try several Tokenizer
author | one |
---|---|
date | Thu, 02 Sep 2010 10:00:23 +0900 |
parents | 8d0f9c1816f5 |
children | 0d74081c1309 |
files | src/plparser/PropertyListCharTokenizer.java src/plparser/PropertyListScanner.java src/plparser/PropertyListStreamScanner.java src/plparser/PropertyListStreamTokenizer.java src/plparser/TestScanner.java |
diffstat | 5 files changed, 442 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/plparser/PropertyListCharTokenizer.java Thu Sep 02 10:00:23 2010 +0900 @@ -0,0 +1,113 @@ +package plparser; + +import java.io.FileNotFoundException; +import java.io.InputStream; +import java.nio.CharBuffer; + +public class PropertyListCharTokenizer<T> extends PropertyListScanner<T> + implements PLScanner<T> { + + public PropertyListCharTokenizer(Dictionary<T> dict) { + super(dict); + } + + public PropertyListCharTokenizer( + PLScanner<T>s, + Dictionary<T> dict, Token<T> nullToken) { + super(dict); + this.nullToken = nullToken; + this.prev = null; + } + + public char ch; + + @Override + public Token<T> nextToken() { + nextToken = nullToken; + if (cb==null) return nextToken; + if (!hasRemaining()) return nextToken; + while(Character.isSpaceChar(ch)) { + if (!hasRemaining()) return nextToken; + ch = nextChar(); + } + CharBuffer w = CharBuffer.allocate(BufferSize); + if (Character.isJavaIdentifierStart(ch)) { + w.put(ch); + while(hasRemaining()&&Character.isJavaIdentifierPart((ch=nextChar()))) { + w.put(ch); + } + return lookupDict(w); + } else if (Character.isDigit(ch)||ch=='-'||ch=='+') { + w.put(ch); + while(hasRemaining()&&Character.isDigit((ch=nextChar()))) { + w.put(ch); + } + return nextToken = new Token<T>(w.toString(),TokenID.NUMBER); + } else if (ch=='/') { + w.put(ch); + if (!hasRemaining()) return new Token<T>(w.toString(),TokenID.Any); + ch = nextChar(); + if (ch=='/') { + while(hasRemaining() && (ch=nextChar())!='\n'); + if (!hasRemaining())return nullToken; + ch = nextChar(); + return nextToken(); + } + if (ch=='*') { + while(hasRemaining() && !((ch=nextChar())=='*'&&(ch=nextChar())=='/')); + if (!hasRemaining())return nullToken; + ch = nextChar(); + return nextToken(); + } + return new Token<T>(w.toString(),TokenID.Any); + } else if (ch=='\'') { + while(hasRemaining() && (ch=nextChar())!='\'') w.put(ch); + if (!hasRemaining())return nullToken; // non terminate string + ch = nextChar(); + return lookupDict(w); + } else if (ch=='"') { + while(hasRemaining() && (ch=nextChar())!='"') w.put(ch); + if (!hasRemaining())return nullToken; // non terminate string + ch = nextChar(); + return lookupDict(w); + } else { + nextToken = lookupDict(w); + if (!hasRemaining())return nextToken; + ch = nextChar(); + return nextToken; + } + } + + private Token<T> lookupDict(CharBuffer w) { + Token<T> t; + String s = w.toString(); + if ((t = dict.get(s))==null) { + dict.put(s, t = new Token<T>(s,TokenID.Any)); + } + return nextToken = t; + } + + private char nextChar() { + if (!cb.hasRemaining()) extendInput(); + char ch = cb.get(); + return ch; + } + + + @Override + public PLScanner<T> pushScannerFile(InputStream newfile, String prompt) { + return new PropertyListCharTokenizer<T>(this,dict,nullToken).setFile(newfile,prompt); + } + + @Override + public PLScanner<T> pushScanner(String exp) { + return new PropertyListCharTokenizer<T>(this,dict,nullToken).set(exp); + } + + @Override + public PLScanner<T> pushScannerFile(String newfile) + throws FileNotFoundException { + return new PropertyListCharTokenizer<T>(this,dict,nullToken).setFile(newfile); + } +} +
--- a/src/plparser/PropertyListScanner.java Wed Sep 01 18:43:06 2010 +0900 +++ b/src/plparser/PropertyListScanner.java Thu Sep 02 10:00:23 2010 +0900 @@ -31,7 +31,7 @@ */ public Matcher scan; - private CharBuffer cb; + public CharBuffer cb; public PropertyListScanner(Dictionary<Node> dict) { this.dict = dict; nullToken = new Token<Node>("",TokenID.NULL); @@ -118,7 +118,8 @@ } else if ((s=next(numPat))!=null) { return nextToken = new Token<Node>(s,TokenID.NUMBER); } else if ((s=next(commentPat))!=null) { - while(cb.hasRemaining()&&next(anyPat)!=null); // skip until eol (in case of buffer full) + cb.get(); scan.reset(); lineno++; + // while(cb.hasRemaining()&&next(anyPat)!=null); // skip until eol (in case of buffer full) continue; } else if ((s=next(commentPat1))!=null) { while(next(commentPat1End)==null) {
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/plparser/PropertyListStreamScanner.java Thu Sep 02 10:00:23 2010 +0900 @@ -0,0 +1,174 @@ +package plparser; + +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.StringReader; +import java.util.Scanner; +import java.util.regex.Pattern; + +/** + * delimiter に何を設定しても動いてくれない。 + * */ + +public class PropertyListStreamScanner<T> extends PLScannerImpl<T> implements + PLScanner<T> { + + private Scanner scan; + + public PropertyListStreamScanner( + PLScanner<T> s, + Dictionary<T> dict, Token<T> nullToken) { + this.dict = dict; + this.nullToken = nullToken; + } + + + public PropertyListStreamScanner(Dictionary<T> dict) { + this.dict = dict; + nullToken = new Token<T>("",TokenID.NULL); + } + + + public void init() { + String pattern = "."; + scan.useDelimiter(pattern); + } + + // Pattern must contain exact 1 group + private static Pattern tokenPat = Pattern.compile( + "([={}(),;])" + ); + private static Pattern namePat = Pattern.compile("([_a-zA-Z][\\@\\w]*)"); + private static final Pattern numPat = Pattern.compile("([0-9]+)"); + private static final Pattern stringPat1 = Pattern.compile("\\\"([^\"]*)\\\""); + private static final Pattern stringPat = Pattern.compile("\\'([^\\']*)\\'"); + private static final Pattern stringPat1cont = Pattern.compile("\\\"([^\"]*)$"); + private static final Pattern stringPatCont = Pattern.compile("\\'([^\\']*)$"); + private static final Pattern stringPat1End = Pattern.compile("([^\"]*)\\\""); + private static final Pattern stringPatEnd = Pattern.compile("([^\\']*)\'"); + private static final Pattern commentPat = Pattern.compile("(//.*)"); + private static final Pattern commentPat1 = Pattern.compile("(/\\*)"); + private static final Pattern commentPat1End = Pattern.compile("(.*\\*/)"); + private static final Pattern errorPat = Pattern.compile("([^\\s])"); + private static final Pattern anyPat = Pattern.compile("(.)"); + + @Override + public Token<T> nextToken() { + String s; + nextToken = nullToken; + while(hasRemaining()) { + if ((s=scan.next(tokenPat))!=null) { + Token<T> t; + if ((t = dict.get(s))==null) { + dict.put(s, t = new Token<T>(s,TokenID.Any)); + } + return nextToken = t; + } else if ((s=scan.next(stringPatCont))!=null) { + // non terminated string + String s1; + while((s1=scan.next(stringPatEnd))==null) { + s += scan.nextLine(); + lineno++; + } + s += s1; + Token<T> t; + if ((t = dict.get(s))==null) { + dict.put(s, t = new Token<T>(s,TokenID.VARIABLE)); + } + return nextToken = t; + } else if ((s=scan.next(stringPat1cont))!=null) { + // non terminated string + String s1; + while((s1=scan.next(stringPat1End))==null) { + s += scan.nextLine(); + lineno++; + } + s += s1; + Token<T> t; + if ((t = dict.get(s))==null) { + dict.put(s, t = new Token<T>(s,TokenID.VARIABLE)); + } + return nextToken = t; + } else if ((s=scan.next(stringPat))!=null||(s=scan.next(stringPat1))!=null||(s=scan.next(namePat))!=null) { + Token<T> t; + if ((t = dict.get(s))==null) { + dict.put(s, t = new Token<T>(s,TokenID.VARIABLE)); + } + if (t.type!=TokenID.VARIABLE) { + t = new Token<T>(s,TokenID.VARIABLE); + } + return nextToken = t; + } else if ((s=scan.next(numPat))!=null) { + return nextToken = new Token<T>(s,TokenID.NUMBER); + } else if ((s=scan.next(commentPat))!=null) { + scan.nextLine(); + continue; + } else if ((s=scan.next(commentPat1))!=null) { + while(scan.next(commentPat1End)==null) { + scan.nextLine(); + lineno++; + } + continue; + } else if ((s=scan.next(errorPat))!=null) { + error("Don't understand '"+s+"'"); + continue; + } else if ((s=scan.next(anyPat))!=null) { + // skip space + continue; + } else { + lineno++; + } + } + return nextToken; + } + + + @Override + public boolean hasRemaining() { + return scan.hasNext(anyPat); + } + + @Override + public PLScanner<T> pushScannerFile(InputStream newfile, + String prompt) { + return new PropertyListStreamScanner<T>(this,dict,nullToken).setFile(newfile,prompt); + } + + @Override + public PLScanner<T> pushScanner(String exp) { + return new PropertyListStreamScanner<T>(this,dict,nullToken).set(exp); + } + + @Override + public PLScanner<T> pushScannerFile(String file) + throws FileNotFoundException { + return new PropertyListStreamScanner<T>(this,dict,nullToken).setFile(file); + } + + @Override + public PLScanner<T> set(String exp) { + Reader reader = new StringReader(exp); + scan = new Scanner(reader); + return this; + } + + @Override + public PLScanner<T> setFile(String file) + throws FileNotFoundException { + Reader reader = new FileReader(file); + scan = new Scanner(reader); + return this; + } + + @Override + public PLScanner<T> set(InputStreamReader reader) { + scan = new Scanner(reader); + return this; + } + + + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/plparser/PropertyListStreamTokenizer.java Thu Sep 02 10:00:23 2010 +0900 @@ -0,0 +1,150 @@ +package plparser; + +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.StreamTokenizer; +import java.io.StringReader; + +/** + * このTokenizer は実装は簡単だが、複数行を持つ"",''を取り扱えないらしい。 + * + * @author kono + * + * @param <T> + */ +public class PropertyListStreamTokenizer<T> extends PLScannerImpl<T> + implements PLScanner<T> { + + private StreamTokenizer tokenizer; + + public final static char QUOTE = '\''; + public final static char DOUBLE_QUOTE = '"'; + + public PropertyListStreamTokenizer( + PropertyListStreamTokenizer<T> propertyListStreamTokenizer, + Dictionary<T> dict, Token<T> nullToken) { + this.dict = dict; + this.nullToken = nullToken; + } + + + public PropertyListStreamTokenizer(Dictionary<T> dict) { + this.dict = dict; + nullToken = new Token<T>("",TokenID.NULL); + } + + + public void init() { + tokenizer.resetSyntax(); + tokenizer.wordChars('0', '9'); + tokenizer.wordChars('a', 'z'); + tokenizer.wordChars('A', 'Z'); + tokenizer.wordChars('_', '_'); + tokenizer.ordinaryChar('='); + tokenizer.ordinaryChar('{'); + tokenizer.ordinaryChar('}'); + tokenizer.ordinaryChar('('); + tokenizer.ordinaryChar(')'); + tokenizer.ordinaryChar(';'); + tokenizer.ordinaryChar(','); + tokenizer.whitespaceChars(' ', ' '); + tokenizer.whitespaceChars('\t', '\t'); + tokenizer.whitespaceChars('\n', '\n'); + tokenizer.whitespaceChars('\r', '\r'); + tokenizer.quoteChar(QUOTE); + tokenizer.quoteChar(DOUBLE_QUOTE); + tokenizer.parseNumbers(); + tokenizer.eolIsSignificant(false); + tokenizer.slashStarComments(true); + tokenizer.slashSlashComments(true); + } + + + @Override + public Token<T> nextToken() { + int token; + nextToken = nullToken; + lineno = tokenizer.lineno(); + try { + token = tokenizer.nextToken(); + switch (token) { + case StreamTokenizer.TT_EOF: + return nextToken; + case StreamTokenizer.TT_NUMBER: + return nextToken = new Token<T>(tokenizer.sval,TokenID.NUMBER); + case StreamTokenizer.TT_WORD: + String s = tokenizer.sval; + Token<T> t; + if ((t = dict.get(s))==null) { + dict.put(s, t = new Token<T>(s,TokenID.Any)); + } + return nextToken = t; + case QUOTE: + case DOUBLE_QUOTE: + return nextToken = new Token<T>(tokenizer.sval,TokenID.VARIABLE); + case StreamTokenizer.TT_EOL: + if (prompt!=null) System.out.print(prompt); + return nextToken(); + default: + return nextToken = new Token<T>(tokenizer.sval,TokenID.Any); + } + } catch (IOException e) { + return nullToken; + } + } + + @Override + public boolean hasRemaining() { + int nextToken = StreamTokenizer.TT_EOF; + try { + nextToken = tokenizer.nextToken(); + } catch (IOException e) { + return false; + }; + return nextToken!=StreamTokenizer.TT_EOF; + } + + @Override + public PLScanner<T> pushScannerFile(InputStream newfile, + String prompt) { + return new PropertyListStreamTokenizer<T>(this,dict,nullToken).setFile(newfile,prompt); + } + + @Override + public PLScanner<T> pushScanner(String exp) { + return new PropertyListStreamTokenizer<T>(this,dict,nullToken).set(exp); + } + + @Override + public PLScanner<T> pushScannerFile(String file) + throws FileNotFoundException { + return new PropertyListStreamTokenizer<T>(this,dict,nullToken).setFile(file); + } + + @Override + public PLScanner<T> set(String exp) { + Reader reader = new StringReader(exp); + tokenizer = new StreamTokenizer(reader); + return this; + } + + @Override + public PLScanner<T> setFile(String file) + throws FileNotFoundException { + Reader reader = new FileReader(file); + tokenizer = new StreamTokenizer(reader); + return this; + } + + @Override + public PLScanner<T> set(InputStreamReader reader) { + tokenizer = new StreamTokenizer(reader); + return this; + } + + +}
--- a/src/plparser/TestScanner.java Wed Sep 01 18:43:06 2010 +0900 +++ b/src/plparser/TestScanner.java Thu Sep 02 10:00:23 2010 +0900 @@ -27,7 +27,8 @@ Dictionary<Property> dict = new Dictionary<Property>(); // scan = new PropertyListScanner<Property>(dict); // scan = new PropertyListStreamTokenizer<Property>(dict); - scan = new PropertyListStreamScanner<Property>(dict); + // scan = new PropertyListStreamScanner<Property>(dict); + scan = new PropertyListCharTokenizer<Property>(dict); }