view src/parser/LogicNodeScanner.java @ 111:f00740bd0feb

Java 6 patch (Matcher.group(1))
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Tue, 15 Dec 2009 01:07:22 +0900
parents 31278b74094b
children 264d9178c01b
line wrap: on
line source

package parser;

import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.CharBuffer;
import java.util.Iterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


public class LogicNodeScanner<Node> {

	/*
	 * Tokenizer for LogicNode Parser
	 *     Pattern/Matcher implementation
	 *     
	 *     sannerStack is used for nested parsing.
	 *         scanner.push();
	 *         parser.parse(exp);
	 *         nextToken = scanner.pop();
	 *         
	 *     2007/12 Shinji Kono    
	 */
	
	public int lineno;
	public Matcher scan;
	public Token<Node> nextToken;
	public Dictionary<Node> dict;
	public LogicNodeScanner<Node> next;
	protected CharBuffer cb;
	private InputStreamReader file;
	private String filename;
	public LogicNodeScanner<Node> prev;
	public Token<Node> nullToken ;
	public String prompt;

	public LogicNodeScanner(Dictionary<Node> dict) {
		this.dict = dict;
		nullToken = new Token<Node>("",TokenID.NULL,0);
	}
	
	/*
	 * Scanner Container for Stack
	 */
	public LogicNodeScanner(LogicNodeScanner<Node> prev, Dictionary<Node> dict, Token<Node> nullToken) {
		this.prev = prev;
		this.dict = dict;
		this.nullToken = nullToken;
	}


	// Pattern must contain exact 1 group
	public static Pattern tokenPat = Pattern.compile(
			"(\\&\\&|[*\\?\\:\\^.()+{},;~\\&@]|\\[\\]|\\:\\=|\\<\\=|\\<\\>|\\=\\>|\\=|" +
			"\\<\\-\\>|\\-\\>|\\<\\-|\\[|\\])"
			);
	public static Pattern namePat  = Pattern.compile("([a-zA-Z]\\w*)");
	public static final Pattern numPat  = Pattern.compile("([0-9]+)");
	public static final Pattern stringPat1  = Pattern.compile("\\\"([^\"]*)\\\"");
	public static final Pattern stringPat  = Pattern.compile("\\'([^\\']*)\\'");
	public static final Pattern commentPat  = Pattern.compile("(%.*)");
	public static final Pattern errorPat = Pattern.compile("([^\\s])");
	public static final Pattern anyPat = Pattern.compile("(.)");
	private static final int BufferSize = 4096;

	/*
	 * Get next token
	 * 
	 *    No looking up method nor put back. It never returns null but
	 *    may return nullToken. So nextToken.type is always valid.
	 *    nullToken means the end of the input.
	 *    
	 *    Token is a syntax element and it may have macro binding as
	 *    predicate, infix or prefix operator. To get the value, use
	 *    makeVariable(). Operator order for infix and prefix is in
	 *    Token.order. TokenID.order is default order for fix element and
	 *    currently never used.
	 *    
	 *    When matcher hit an end of the input, hasRemaining() method try
	 *    to extend the input using extendInput().
	 */
	
	public Token<Node> nextToken() {
		String s;
		nextToken = nullToken;
		while(hasRemaining()) {
			scan.reset(); // to tell CharBuffer is modified
			if ((s=next(tokenPat))!=null) {
				Token<Node> t;
				if ((t = dict.get(s))==null) {
					dict.put(s, t = new Token<Node>(s,TokenID.Any));
				}
				return nextToken = t;
			} else if ((s=next(stringPat))!=null||(s=next(namePat))!=null) {
				Token<Node> t;
				if ((t = dict.get(s))==null) {
					dict.put(s, t = new Token<Node>(s,TokenID.VARIABLE));
				}
				return nextToken = t;
			} else if ((s=next(numPat))!=null) {
				return nextToken = new Token<Node>(s,TokenID.NUMBER);
			} else if ((s=next(stringPat1))!=null) {
				return nextToken = new Token<Node>(s,TokenID.STRING);
			} else if ((s=next(commentPat))!=null) {
				while(cb.hasRemaining()&&next(anyPat)!=null); // skip until eol (in case of buffer full)
				continue;
			} else if ((s=next(errorPat))!=null) {
				error("Don't understand '"+s+"'");
				continue;
			} else if ((s=next(anyPat))!=null) {
				// skip space
				continue;
			} else {
				lineno++;
				cb.get(); // discard one ( new line )
			}
		}
		return nextToken;
	}

	protected String next(Pattern pattern) {
		String s = null;
		while(true) {
			Boolean match = scan.usePattern(pattern).lookingAt();
			if (scan.hitEnd()) {
				if (extendInput()) {
					// input is extended try again
					scan.reset();
					continue;
				}
				// no extension.
			}
			if (match) {
				// This won't work in Java 6
				// s = scan.group(1);
				s = cb.toString().substring(scan.start(1),scan.end(1));
				// fix position in CharBuffer
				// scan.end() is relative position
				cb.position(cb.position()+scan.end());
				// scan.reset(); will be done on top of nextToken()
			}
			if (scan.hitEnd()) {
				// previous extendInput is failed because of Buffer full.
				// Now we have a space. Try again
				extendInput();scan.hitEnd();
			}
			return s;
		}
	}
	
	public boolean hasRemaining() {
		return cb.hasRemaining()||extendInput();
	}

	/*
	 *    Extend Input data
	 */
	protected boolean extendInput() {
		if (file!=null && cb.position()!=0) {
			// move remaining data to the top, set position for next read
			cb.compact();
			try {
				if (prompt!=null) System.out.print(prompt);
				if (file.read(cb)>0) {
					cb.flip();    // prepare for get (but we don't...) 
					return true;
				} else {
					throw new IOException();
				}
			} catch (IOException e) {
				file = null ; 
				cb.flip();
			}
		}
		return false;
	}

	protected LogicNodeScanner<Node> pushScanner(String exp) {
		// Save current matcher for nested parsing
		return new LogicNodeScanner<Node>(this,dict,nullToken).set(exp);
	}
	
	protected LogicNodeScanner<Node> pushScannerFile(String newfile) throws FileNotFoundException {
		// Save current matcher for nested file
		return new LogicNodeScanner<Node>(this,dict,nullToken).setFile(newfile);
	}

	public LogicNodeScanner<Node> pushScannerFile(InputStream newfile,String prompt) {
		return new LogicNodeScanner<Node>(this,dict,nullToken).setFile(newfile,prompt);
	}

	protected LogicNodeScanner<Node> popScanner() {
		return prev;
	}

	private LogicNodeScanner<Node> findFileName() {
		for(LogicNodeScanner<Node> s = this;s!=null ; s = s.prev) {
			if (s.filename!=null) return s;
		}
		return null;
	}
	/*
	 * Read From String
	 */
	
	public LogicNodeScanner<Node> set(String exp) {
		cb = CharBuffer.wrap(exp);
		scan = tokenPat.matcher(cb);
		filename = null; file = null;
		nextToken = nullToken;
		return this;
	}
	
	/*
	 * Read From File
	 *    We cannot read symbol bigger than Buffersize
	 */
	public LogicNodeScanner<Node> setFile(String file) throws FileNotFoundException {
		this.filename = file;
		nextToken = nullToken;
		set(new FileReader(file));
		return this;
	}
	
	public LogicNodeScanner<Node> set(InputStreamReader file) {
		this.file = file;
		cb = CharBuffer.allocate(BufferSize);
		try {
			if (prompt!=null) System.out.print(prompt);
			if (file.read(cb) <= 0) {
				throw new IOException();
			}
		} catch (IOException e) {
			file = null; cb = null;
			set("");
			return this;
		} finally {
			cb.flip();
		}
		scan = tokenPat.matcher(cb);
		lineno = 0;
		return this;
	}
	
	public void error(String err) {
		LogicNodeScanner<Node> s = findFileName();
		if (s!=null) {
			System.err.print(s.filename+":"+s.lineno+": ");
		}
		System.err.println("error: "+err);
	}
	
	/*
	 * Iterator for Test Routing
	 *    for(Token<Node> t: scanner.scanToken(FileReader(file)) { ... }
	 */

	public Iterable<Token<Node>> scanToken(String exp) {
		set(exp);
		return iterator();
	}
	
	public Iterable<Token<Node>> scanToken(FileReader file) {
		set(file);
		return iterator();
	}

	private Iterable<Token<Node>> iterator() {
		return new Iterable<Token<Node>>() {
			public Iterator<Token<Node>> iterator() {
				return new Iterator<Token<Node>>() {
					public boolean hasNext() {
						return hasRemaining();
					}
					public Token<Node> next() {
						return nextToken();
					}
					public void remove() {
					}
				};
			}
		};
	}

	private LogicNodeScanner<Node> setFile(InputStream newfile,String prompt) {
		this.filename = newfile.toString();
		nextToken = nullToken;
		this.prompt = prompt;
		set(new InputStreamReader(newfile));
		return this;
	}


}