changeset 2:1744340f8be6 draft

add some java files
author one
date Wed, 05 Sep 2012 11:56:21 +0900
parents 08f01b5c4d4a
children b44abb9aa09f
files src/pagerank/LinkConvertGraph.java src/pagerank/LinkToVertex.java src/pagerank/WikiPage.java src/sample/CreateTinkerGraph.java src/xmlParser/CharReader.java src/xmlParser/TextTagParser.java src/xmlParser/WikiLinkParser.java src/xmlParser/XmlTagObject.java
diffstat 8 files changed, 790 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/pagerank/LinkConvertGraph.java	Wed Sep 05 11:56:21 2012 +0900
@@ -0,0 +1,158 @@
+package pagerank;
+
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
+import org.xml.sax.SAXException;
+
+import com.tinkerpop.blueprints.Graph;
+import com.tinkerpop.blueprints.Vertex;
+import com.tinkerpop.blueprints.impls.tg.TinkerGraph;
+import com.tinkerpop.blueprints.util.io.graphml.GraphMLWriter;
+
+import xmlParser.TextTagParser;
+
+public class LinkConvertGraph {
+	
+	private String filename;
+	private FileInputStream fis;
+	private SAXParserFactory factory;
+	private SAXParser parser;
+	private TextTagParser xmlParser;
+
+	private HashMap<String,HashSet<String>> hash;
+
+
+	LinkConvertGraph() throws ParserConfigurationException, SAXException {
+		xmlParser = new TextTagParser();
+		factory = SAXParserFactory.newInstance();
+		parser = factory.newSAXParser();
+	}
+
+	LinkConvertGraph(final String filename) throws FileNotFoundException, ParserConfigurationException, SAXException {
+		this.filename = filename;
+		fis = new FileInputStream(filename);
+		xmlParser = new TextTagParser();
+		factory = SAXParserFactory.newInstance();
+		parser = factory.newSAXParser();
+	}
+	
+	public void setFilename(final String filename) throws FileNotFoundException {
+		this.filename = filename;
+		this.fis = new FileInputStream(filename);
+	}
+	
+	private void parseXml() throws SAXException, IOException {
+		parser.parse(this.fis, this.xmlParser);
+		hash = xmlParser.getHash();
+	}
+	
+	private HashMap<String,HashSet<String>> getHash() {
+		return hash;
+	}
+
+	public void printHash() {
+		for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) {
+			String title = entry.getKey();
+			System.out.println("title: " + title);
+			for (String link : entry.getValue()) {
+				System.out.println("\t"+link);
+			}
+			System.out.println();
+		}		
+	}
+	
+	private void printHash(FileOutputStream os) throws IOException {
+		for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) {
+			String title = entry.getKey();
+			os.write( ("title: " + title + "\n").getBytes());
+			for (String link : entry.getValue()) {
+				os.write( ("\t"+link+"\n").getBytes());
+			}
+			os.write( ("\n").getBytes());
+			os.flush();
+		}		
+	}
+	
+	
+	
+	
+	
+	public static void main(String[] args) {
+//		final String filename = "./resource/article.xml";
+		final String filename = "/Users/aotokage/testProgram/wiki/ja-pages_current.xml";
+		
+		LinkConvertGraph lcg;
+
+		try {
+			
+			lcg = new LinkConvertGraph(filename);
+
+			lcg.parseXml();
+//			lcg.printHash();
+
+			FileOutputStream fos = new FileOutputStream("./resource/wikiLink.log");
+			lcg.printHash(fos);
+
+			HashMap<String,HashSet<String>> hash = lcg.getHash();
+
+			
+			final String filenameD = "./resource/tinkerpopDB";
+			
+			Graph graph = new TinkerGraph();
+			FileOutputStream out = new FileOutputStream(new File(filename));
+			LinkToVertex ltn = new LinkToVertex(graph);
+			
+			for (Map.Entry<String, HashSet<String>> map : hash.entrySet()) {
+				String pageTitle = map.getKey();
+				
+				Vertex v;// = graph.addVertex(null);
+				
+				if ( ltn.getId(pageTitle) == null ) {
+					v = ltn.createVertexWithPageTitle(pageTitle);
+					
+				} else {
+					v = ltn.getVertex(pageTitle);
+				}
+
+				for (String linkPageTitle : map.getValue()) {
+					Vertex linkV;
+					if ( ltn.getId(linkPageTitle) == null) {
+						linkV = ltn.createVertexWithPageTitle(linkPageTitle);
+						ltn.setPageRank(linkV, (Double)0.0);
+					} else {
+						linkV = ltn.getVertex(linkPageTitle);
+					}
+					ltn.setHasLink(v, linkV);
+				}
+				
+			}
+
+			GraphMLWriter.outputGraph(graph, out);
+
+		} catch (FileNotFoundException e) {
+			e.printStackTrace();
+		} catch (SAXException e) {
+			e.printStackTrace();			
+		} catch (ParserConfigurationException e) {
+			e.printStackTrace();
+		} catch (IOException e) {
+			System.err.println("Failed to parse xml");
+			e.printStackTrace();
+		}
+
+		
+
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/pagerank/LinkToVertex.java	Wed Sep 05 11:56:21 2012 +0900
@@ -0,0 +1,231 @@
+package pagerank;
+
+import java.util.HashMap;
+
+import com.tinkerpop.blueprints.Direction;
+import com.tinkerpop.blueprints.Edge;
+import com.tinkerpop.blueprints.Graph;
+import com.tinkerpop.blueprints.Vertex;
+
+import pagerank.WikiPage;
+
+public class LinkToVertex {
+
+	Graph graph;
+	public final static String PAGE_TITLE = "pageTitle";
+	public final static String PAGE_RANK = "pageRank";
+	private HashMap<String, Long> pageIdTable = new HashMap<String, Long>();
+
+	private HashMap<String, WikiPage> wikiPageHash = new HashMap<String, WikiPage>();
+	private long AllNodeNumber;
+
+	private final double weight1 = 0.85;
+	private final double weight2 = 0.15;
+
+	public static final String HAS_LINK = "HasLink";
+
+	LinkToVertex(Graph graph) {
+		this.graph = graph;
+		AllNodeNumber = 0;
+	}
+
+	Long getId(String pageTitle) {
+		return pageIdTable.get(pageTitle);
+	}
+
+	boolean isHasLink(String label) {
+		return label.equals(HAS_LINK);
+	}
+
+	private Vertex createVertex() {
+		return graph.addVertex(null);
+	}
+
+	private Vertex createVertex(Object id) {
+		return graph.addVertex(id);
+	}
+
+	private Vertex createVertexWithProperty(String key, Object value) {
+		Vertex v = graph.addVertex(null);
+		v.setProperty(key,value);
+		return v;
+	}
+
+	String getPageTitle(Vertex v) {
+		return (String) v.getProperty(PAGE_TITLE);
+	}
+
+	Double getPageRank(Vertex v) {
+		return (Double) v.getProperty(PAGE_RANK);
+	}
+
+	Vertex createVertexWithPageTitle(String pageTitle) {
+		Vertex v = createVertexWithProperty(PAGE_TITLE, pageTitle);
+		pageIdTable.put(pageTitle, (Long) v.getId());
+		return v;
+	}
+
+	Vertex setPageRank(Vertex v, Double rank) {
+		v.setProperty(PAGE_RANK, rank);
+		return v;
+	}
+
+	Vertex getVertex(String name) {
+		long id = pageIdTable.get(name);
+		return graph.getVertex(id);
+	}
+	
+	Vertex getNode(int nodeId) {
+		return graph.getVertex(nodeId);
+	}
+
+	Edge setRelationship(Vertex v1, Vertex v2, String label) {
+		Edge e = graph.addEdge(null, v1, v2, label);
+		return e;
+	}
+
+	Edge setHasLink(Vertex v1, Vertex v2) {
+		return setRelationship(v1, v2, HAS_LINK);
+	}
+
+	long searchAllNodes() {
+		AllNodeNumber = 0;
+		for (Vertex v : graph.getVertices()) {
+			if ( (v.getProperty(PAGE_TITLE) != null) && 
+					(v.getProperty(PAGE_RANK)) != null ) {
+			WikiPage wiki = new WikiPage(v);
+				pageIdTable.put((String) v.getProperty(PAGE_TITLE), (Long) v.getId());
+				wiki.setInHasLink(computeInHasLink(v));
+				wiki.setOutHasLink(computeOutHasLink(v));
+				wikiPageHash.put((String) v.getProperty(PAGE_TITLE), wiki);
+				AllNodeNumber++;
+			}
+		}
+		return AllNodeNumber;
+	}
+
+	void searchRegiNodes(Vertex v) {
+
+		if ( (v.getProperty(PAGE_TITLE) != null) &&
+				(v.getProperty(PAGE_RANK) != null)) {
+			WikiPage wiki = new WikiPage(v);
+			pageIdTable.put((String) v.getProperty(PAGE_TITLE), (Long) v.getId());
+			wiki.setInHasLink(computeInHasLink(v));
+			wiki.setOutHasLink(computeOutHasLink(v));
+			wikiPageHash.put((String) v.getProperty(PAGE_TITLE), wiki);
+			AllNodeNumber++;
+		}
+	}
+
+	HashMap<String, WikiPage> getWikiPageHash() {
+		return wikiPageHash;
+	}
+
+	HashMap<String, Long> getPageIdTable() {
+		return pageIdTable;
+	}
+
+	public Iterable<Vertex> getAllNodes() {
+		return graph.getVertices();
+	}
+
+	public void printAllNodes() {
+		for (Vertex v : graph.getVertices() ) {
+			System.out.println("ID = "+ v.getId());
+			for (String key: v.getPropertyKeys()) {
+				System.out.println(key + "=" + v.getProperty(key));
+			}
+/*
+			for (Edge e : v.getEdges(Direction.IN, HAS_LINK) ) {
+				System.out.println();
+			}
+*/			
+		}
+		System.out.println("--");
+
+	}
+
+	public long computeOutHasLink(Vertex v) {
+		long count = 0;
+		for (Edge edge : v.getEdges(Direction.OUT, HAS_LINK)) {
+			count++;
+		}
+		return count;
+	}
+
+	public long computeInHasLink(Vertex v) {
+		long count = 0;
+		for (Edge edge : v.getEdges(Direction.IN, HAS_LINK)) {
+			count++;
+		}
+		return count;
+	}
+
+	public void printOutHasLink(Vertex v, int depth) {
+		int numberOfLinkPages = 0;
+		String output = v.getProperty(PAGE_TITLE) + " outHasLink pages:";
+		System.out.println(output);
+		for (Edge edge : v.getEdges(Direction.OUT, HAS_LINK)) {
+			Vertex outV = edge.getVertex(Direction.IN);
+			String str = (String) outV.getProperty(PAGE_TITLE);
+			System.out.println(str);
+			numberOfLinkPages++;
+		}
+		String numOutput = "Number of outHaslink pages: " + numberOfLinkPages;
+		System.out.println(numOutput);
+	}
+
+	public void printInHasLink(Vertex v, int depth) {
+		int numberOfLinkPages = 0;
+		String output = v.getProperty(PAGE_TITLE) + " inHasLink pages:";
+		System.out.println(output);
+		for (Edge edge : v.getEdges(Direction.IN, HAS_LINK)) {
+			Vertex outV = edge.getVertex(Direction.OUT);
+			String str = (String) outV.getProperty(PAGE_TITLE);
+			System.out.println(str);
+			numberOfLinkPages++;
+		}
+		String numOutput = "Number of inHaslink pages: " + numberOfLinkPages + "\n";
+		System.out.println(numOutput);
+	}
+
+	public double computePageRank(Vertex v) {
+		double sum = 0;
+		double pageRank = 0;
+		String title = getPageTitle(v);
+		WikiPage wiki = wikiPageHash.get(title);
+
+		for (Edge edge : v.getEdges(Direction.IN, HAS_LINK) ) {
+			Vertex linkV = edge.getVertex(Direction.OUT); 
+			sum += (double) ((Double) linkV.getProperty(PAGE_RANK)) / computeInHasLink(linkV) ;
+		}
+		
+		if (computeOutHasLink(v) == 0) {
+			pageRank = (double) sum * weight1
+					+ (double) ((double) 1 / AllNodeNumber * weight2);
+		} else {
+			pageRank = (double) ((double)sum / computeOutHasLink(v) * weight1)
+					+ (double) ((double) 1 / AllNodeNumber * weight2);
+		}
+		wiki.setRank(pageRank);
+		v.setProperty(PAGE_RANK, pageRank);
+		return pageRank;
+	}
+	
+	public void printNodeInfo(int nodeId) {
+		Vertex v = graph.getVertex(nodeId);
+		printInHasLink(v, 1);
+		printOutHasLink(v, 1);
+
+		String title = getPageTitle(v);
+		double rank = getPageRank(v);
+		long inHasLink = computeInHasLink(v);
+		long outHasLink = computeOutHasLink(v);
+		
+		System.out.println("id:"+nodeId+" title:"+title+" rank:"+rank);
+		System.out.println("inHasLink:"+inHasLink+" outHasLink:"+outHasLink);
+
+	
+	}
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/pagerank/WikiPage.java	Wed Sep 05 11:56:21 2012 +0900
@@ -0,0 +1,94 @@
+package pagerank;
+
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import com.tinkerpop.blueprints.Vertex;
+
+import pagerank.LinkToVertex;
+
+public class WikiPage {
+
+	private String title;
+	private long id;
+	private Double rank;
+	private long outHasLink;
+	private long inHasLink;
+	
+	WikiPage() {
+		this.title = null;
+		this.id = -1;
+		this.rank = -1.0;
+		this.outHasLink = 0;
+		this.inHasLink = 0;
+	}
+	
+	WikiPage(Vertex v) {
+		this.title = (String) v.getProperty(LinkToVertex.PAGE_TITLE);
+		this.id = (Long) v.getId();
+		this.rank = (Double)v.getProperty(LinkToVertex.PAGE_RANK);
+		this.outHasLink = 0;
+		this.inHasLink = 0;
+	}
+
+	WikiPage(String title, long id, Double rank) {
+		this.title = title;
+		this.id = id;
+		this.rank = rank;
+		this.outHasLink = 0;
+		this.inHasLink = 0;
+	}
+	
+	String getTitle() {
+		return title;
+	}
+	
+	long getId() {
+		return id;
+	}
+	
+	double getRank() {
+		return rank;
+	}
+	
+	long getOutHasLink() {
+		return outHasLink;
+	}
+
+	long getInHasLink() {
+		return inHasLink;
+	}
+	
+	void setTitle(String title) {
+		this.title = title;
+	}
+	
+	void setId(long id) {
+		this.id = id;
+	}
+	
+	void setRank(double rank) {
+		this.rank = rank;
+	}
+	
+	void setOutHasLink(long num) {
+		this.outHasLink = num;
+	}
+	
+	void setInHasLink(long num) {
+		this.inHasLink = num;
+	}
+	
+	void printInfo() {
+		System.out.println("id:"+id+" title:"+title+" rank:"+rank);
+		System.out.println("outHasLink:"+outHasLink+" inHasLink:"+inHasLink);
+	}
+
+	void printInfo(FileOutputStream fos) throws IOException {
+		fos.write(("id:"+id+" title:"+title+" rank:"+rank+"\n").getBytes());
+		fos.write(("outHasLink:"+outHasLink+" inHasLink:"+inHasLink+"\n").getBytes());
+		fos.write(("\n").getBytes());
+		fos.flush();
+	}
+
+}
--- a/src/sample/CreateTinkerGraph.java	Tue Sep 04 22:47:53 2012 +0900
+++ b/src/sample/CreateTinkerGraph.java	Wed Sep 05 11:56:21 2012 +0900
@@ -1,44 +1,80 @@
 package sample;
 
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
 import com.tinkerpop.blueprints.Direction;
 import com.tinkerpop.blueprints.Edge;
 import com.tinkerpop.blueprints.Graph;
 import com.tinkerpop.blueprints.Vertex;
 import com.tinkerpop.blueprints.impls.tg.TinkerGraph;
 import com.tinkerpop.blueprints.impls.tg.TinkerGraphFactory;
+import com.tinkerpop.blueprints.util.io.graphml.GraphMLReader;
+import com.tinkerpop.blueprints.util.io.graphml.GraphMLWriter;
 
 public class CreateTinkerGraph {
 
+	public static final String filename = "./resources/tinkerpopDB";
+	
 	public static void main(String[] args) {
-		
-		createTest();
-//		readTest();
+
+
+		try {
+
+			outputGraph();
+			readGraph();
+
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+
 	}
 
+	public static void outputGraph() throws IOException {
+		Graph graph = new TinkerGraph();
+		FileOutputStream out = new FileOutputStream(new File(filename));
+
+		Vertex a = graph.addVertex(null);
+		Vertex b = graph.addVertex(null);
+		Vertex c = graph.addVertex(null);
+		Vertex d = graph.addVertex(null);
+		a.setProperty("name", "maro");
+		b.setProperty("name", "Peter");
+		c.setProperty("name", "smith");
+		d.setProperty("name", "black");
+		Edge e = graph.addEdge(null, a, b, "knows");
+		Edge e2 = graph.addEdge(null, c, a, "knows");
+		Edge e3 = graph.addEdge(null, d, b, "knows");
+		System.out.println(e.getVertex(Direction.OUT).getProperty("name")
+				+ "--" + e.getLabel() + "-->"
+				+ e.getVertex(Direction.IN).getProperty("name"));
+		for (Edge edge : b.getEdges(Direction.IN, "knows")) {
+			Vertex v =edge.getVertex(Direction.OUT);
+			System.out.println(v.getProperty("name"));
+		}
+		
+		
+		GraphMLWriter.outputGraph(graph, out);
+
+	}
+
+	public static void readGraph() throws IOException {
+		Graph graph = new TinkerGraph();
+		FileInputStream in = new FileInputStream(new File(filename));
 	
-	public static void outputGraph() {
-		Graph graph = new TinkerGraph("./resources/");
+		GraphMLReader.inputGraph(graph, in);
+		Vertex aa = graph.getVertex("1");
+		System.out.println("vertex " + aa.getId() + " has name " + aa.getProperty("name"));
+		if (aa.getProperty("aaa") != null)
+			System.out.println(aa.getProperty("aaa"));
+		for(Edge ee : aa.getEdges(Direction.OUT)) {
+		  System.out.println(ee);
+		}
 		
 	}
 	
-	public static void createTest() {
-		Graph graph = new TinkerGraph("/tmp/tinkergraph"); 
-		Vertex a = graph.addVertex(null);
-		Vertex b = graph.addVertex(null);
-		a.setProperty("name", "mariko");
-		b.setProperty("name", "Peter");
-		Edge e = graph.addEdge(null, a, b, "knows");
-		System.out.println(e.getVertex(Direction.OUT).getProperty("name") + "--" + e.getLabel()
-				+ "-->" + e.getVertex(Direction.IN).getProperty("name"));
-		
-	}
 	
-	public static void readTest() {
-		Graph graph = new TinkerGraph("/tmp/tinkergraph"); 
-		Vertex aa = graph.getVertex("1");
-		System.out.println("vertex " + aa.getId() + " has name " + aa.getProperty("name"));
-		for(Edge ee : aa.getEdges(Direction.OUT)) {
-		  System.out.println(ee);
-		}
-	}
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/xmlParser/CharReader.java	Wed Sep 05 11:56:21 2012 +0900
@@ -0,0 +1,86 @@
+package xmlParser;
+
+
+public class CharReader {
+
+	final static char EOFchar = (char) 0;
+
+	private String text;
+	private int textLength;
+	private int index;
+
+	private final char LBRANK = '[';
+	private final char RBRANK = ']';
+	private final char VERBAR = '|';
+	private final char COLON= ':';
+
+
+	CharReader() {
+	}
+	
+	public void setText(String str) {
+		text = str;
+		textLength = text.length();
+		index = 0;
+	}
+
+	
+	char nextChar() {
+		if (index < textLength)
+			return text.charAt(index++);
+
+		return EOFchar;
+
+	}
+
+	String getToken() {
+
+		int nextState = 1;
+
+		StringBuffer buf = new StringBuffer(256);
+		char ch;
+		int index = -1;
+		while (true) {
+			ch = nextChar();	
+			if (ch == EOFchar) return null;
+			switch (nextState) {
+			case 1:
+				if (ch == LBRANK)
+					nextState = 2;
+				break;
+			case 2:
+				if (ch == LBRANK)
+					nextState = 3;
+				else
+					nextState = 1;
+				break;
+			case 3:
+				if (ch == RBRANK) {
+					nextState = 4;
+				} else if (ch == VERBAR) {
+					index = buf.length();
+					buf.append(ch);	
+					return buf.substring(0,index);
+				} else if  (ch == COLON) { 
+					index = -1;
+					buf.delete(0,buf.length());
+				} else {
+					buf.append(ch);
+				}
+				break;
+			case 4:
+				if (ch == RBRANK) {
+					if (index == -1) {
+						return buf.toString();
+					} else{
+						return buf.substring(0,index);
+					}
+				} else {
+					return null;
+				}
+			}
+
+		}
+
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/xmlParser/TextTagParser.java	Wed Sep 05 11:56:21 2012 +0900
@@ -0,0 +1,95 @@
+package xmlParser;
+
+import xmlParser.XmlTagObject;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Stack;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+public class TextTagParser extends DefaultHandler {
+
+	protected Stack<XmlTagObject> stack;
+	private XmlTagObject currentObj;
+	private Attributes currentAttr;
+	private String currentTag;
+	private String currentTitleName;
+	final static String TAGNAME_TITLE = "title";
+	final static String TAGNAME_TEXT = "text";
+	WikiLinkParser linkParser = new WikiLinkParser();
+	
+	HashMap<String,HashSet<String>> hash = new HashMap<String,HashSet<String>>();
+	HashSet<String> currentLinkHash = new HashSet<String>();
+	
+	public TextTagParser() {
+		stack = new Stack<XmlTagObject>();
+	}
+	
+	public HashMap<String,HashSet<String>> getHash() {
+		return hash;
+	}
+
+	public void startDocument() {
+//		System.out.println("read start");
+	}
+
+	public void startElement(String uri, String localName, String qName,
+			Attributes attributes) throws SAXException {
+		currentTag = qName;
+		if (qName.equals(TAGNAME_TITLE)) {
+			currentObj = new XmlTagObject(attributes);
+			stack.push(currentObj);
+		} else if (qName.equals(TAGNAME_TEXT)) {
+			currentObj = new XmlTagObject(attributes);
+			stack.push(currentObj);
+		} else {
+			
+		}
+	}
+
+	public void characters(char[] ch, int offset, int length) {
+		
+		String value = new String(ch, offset, length);
+		if (currentObj != null) {
+			currentObj.setValue(currentTag, currentAttr, value);
+
+			if(currentTag.equals(TAGNAME_TITLE)) { 
+				currentTitleName = value;
+			}
+			if(currentTag.equals(TAGNAME_TEXT)) { 
+				HashSet<String> tmpHash = linkParser.parse(value);
+				if (tmpHash.size() <= 0) return;
+				for (String link: tmpHash) {
+					currentLinkHash.add(link);
+				}
+			}
+		}
+	}
+
+	public void endElement(String uri, String localName, String qName) {
+		
+		if (currentObj == null)
+			return;
+		if (qName.equals(TAGNAME_TITLE)) {
+			stack.pop();
+		} else if (qName.equals(TAGNAME_TEXT)) {
+			hash.put(currentTitleName, currentLinkHash);
+			currentLinkHash = new HashSet<String>();
+			stack.pop();				
+		} else {
+			
+		}
+		if (stack.empty())
+			currentObj = null;
+		else
+			currentObj = (XmlTagObject) stack.peek();
+	}
+
+	public void endDocument() {
+//		System.out.println("end reading file.");
+	}
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/xmlParser/WikiLinkParser.java	Wed Sep 05 11:56:21 2012 +0900
@@ -0,0 +1,25 @@
+package xmlParser;
+
+import java.util.HashSet;
+
+public class WikiLinkParser {
+
+	private CharReader reader;
+
+	WikiLinkParser() {
+		reader = new CharReader();
+	}
+	
+	public HashSet<String> parse(String text) {
+		HashSet<String> hash = new HashSet<String>();
+		
+		reader.setText(text);
+		String str;
+		while ( (str = reader.getToken()) != null) { 
+			hash.add(str);
+		}
+			
+		return hash;
+		
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/xmlParser/XmlTagObject.java	Wed Sep 05 11:56:21 2012 +0900
@@ -0,0 +1,41 @@
+package xmlParser;
+
+import org.xml.sax.Attributes;
+
+
+
+public class XmlTagObject {
+
+	private static Attributes attributes;
+	private static String currentTag;
+	private static String value;
+	
+	public XmlTagObject() {
+		
+	}
+	
+	public XmlTagObject(Attributes attr) {
+		attributes = attr;
+	}
+	
+
+	public static void setValue(String tag, Attributes attr, String val) {
+		currentTag = tag;
+		attributes = attr;
+		value = val;
+	}
+	
+	public String getTag() {
+		return currentTag;
+	}
+	
+	public Attributes getAttributes() {
+		return attributes;
+	}
+			
+	public String getValue() {
+		return value;
+	}
+			
+	
+}