changeset 14:86567db31710 draft

fix
author one
date Sat, 08 Sep 2012 04:50:50 +0900
parents 0ef7268bbbac
children e1d758d08e9c
files src/pagerank/LinkConvertGraph.java src/pagerank/LinkToVertex.java src/pagerank/TPReadWikiLink.java
diffstat 3 files changed, 97 insertions(+), 106 deletions(-) [+]
line wrap: on
line diff
--- a/src/pagerank/LinkConvertGraph.java	Sat Sep 08 04:12:19 2012 +0900
+++ b/src/pagerank/LinkConvertGraph.java	Sat Sep 08 04:50:50 2012 +0900
@@ -24,6 +24,81 @@
 import xmlParser.TextTagParser;
 
 public class LinkConvertGraph {
+
+
+	
+	
+	public static void main(String[] args) {
+//		final String filename = "./resources/article.xml";
+		final String filename = "/Users/aotokage/testProgram/wiki/ja-pages_current.xml";
+//		final String fileDB = "./resources/tinkerpopDB";
+
+
+		final long PAGENUM = 11;
+		final String fileDB = "./resources/tinkerGraph"+ Long.toString(PAGENUM);
+		final String logFile = "./resources/wikiLink"+Long.toString(PAGENUM)+".log";
+		
+		LinkConvertGraph lcg;
+
+		try {
+			lcg = new LinkConvertGraph(filename);
+			lcg.parseXml();
+//			lcg.printHash();
+
+			HashMap<String,HashSet<String>> hash = lcg.getHash();
+
+			Graph graph = new TinkerGraph();
+			LinkToVertex ltv = new LinkToVertex(graph);
+
+			FileOutputStream fos = new FileOutputStream(logFile);
+			long countId = 0;
+			for (Map.Entry<String, HashSet<String>> map : hash.entrySet()) {
+				lcg.printLinkLog(map, fos);
+//				if (PAGENUM <= countId) break;
+				String pageTitle = map.getKey();
+				Vertex v;// = graph.addVertex(null);
+				if ( ltv.getId(pageTitle) == null ) {
+					v = ltv.createVertexWithPageTitle(pageTitle);
+					ltv.setPageRank(v, (Double)0.0);
+					countId++;
+//					if (PAGENUM <= countId) break;
+				} else {
+					v = ltv.getVertex(pageTitle);
+				}
+				
+				for (String linkPageTitle : map.getValue()) {
+					Vertex linkV;
+					if ( ltv.getId(linkPageTitle) == null) {
+						linkV = ltv.createVertexWithPageTitle(linkPageTitle);
+						countId++;
+						ltv.setPageRank(linkV, (Double)0.0);
+					} else {
+						linkV = ltv.getVertex(linkPageTitle);
+					}
+					ltv.setHasLink(v, linkV);
+//					if (PAGENUM <= countId) break;
+				}
+				
+			}
+			System.out.println("countId = "+countId);
+			
+			FileOutputStream out = new FileOutputStream(new File(fileDB));
+			GraphMLWriter.outputGraph(graph, out);
+
+		} catch (FileNotFoundException e) {
+			e.printStackTrace();
+		} catch (SAXException e) {
+			e.printStackTrace();			
+		} catch (ParserConfigurationException e) {
+			e.printStackTrace();
+		} catch (IOException e) {
+			System.err.println("Failed to parse xml");
+			e.printStackTrace();
+		}
+
+		
+
+	}	
 	
 	private String filename;
 	private FileInputStream fis;
@@ -96,76 +171,4 @@
 	}
 	
 	
-	
-	
-	public static void main(String[] args) {
-//		final String filename = "./resources/article.xml";
-		final String filename = "/Users/aotokage/testProgram/wiki/ja-pages_current.xml";
-//		final String fileDB = "./resources/tinkerpopDB";
-
-
-		final long PAGENUM = 100; 
-		final String fileDB = "./resources/tinkerGraph"+ Long.toString(PAGENUM);
-		final String logFile = "./resources/wikiLink"+Long.toString(PAGENUM)+".log";
-		
-		LinkConvertGraph lcg;
-
-		try {
-			lcg = new LinkConvertGraph(filename);
-			lcg.parseXml();
-//			lcg.printHash();
-
-			HashMap<String,HashSet<String>> hash = lcg.getHash();
-
-			Graph graph = new TinkerGraph();
-			LinkToVertex ltv = new LinkToVertex(graph);
-
-			FileOutputStream fos = new FileOutputStream(logFile);
-			long countId = 0;
-			for (Map.Entry<String, HashSet<String>> map : hash.entrySet()) {
-				lcg.printLinkLog(map, fos);
-				if (PAGENUM <= countId) break;
-				String pageTitle = map.getKey();
-				Vertex v;// = graph.addVertex(null);
-				if ( ltv.getId(pageTitle) == null ) {
-					v = ltv.createVertexWithPageTitle(pageTitle);
-					ltv.setPageRank(v, (Double)0.0);
-					countId++;
-					if (PAGENUM <= countId) break;
-				} else {
-					v = ltv.getVertex(pageTitle);
-				}
-				
-				for (String linkPageTitle : map.getValue()) {
-					Vertex linkV;
-					if ( ltv.getId(linkPageTitle) == null) {
-						linkV = ltv.createVertexWithPageTitle(linkPageTitle);
-						countId++;
-						ltv.setPageRank(linkV, (Double)0.0);
-					} else {
-						linkV = ltv.getVertex(linkPageTitle);
-					}
-					ltv.setHasLink(v, linkV);
-					if (PAGENUM <= countId) break;
-				}
-				
-			}
-
-			FileOutputStream out = new FileOutputStream(new File(fileDB));
-			GraphMLWriter.outputGraph(graph, out);
-
-		} catch (FileNotFoundException e) {
-			e.printStackTrace();
-		} catch (SAXException e) {
-			e.printStackTrace();			
-		} catch (ParserConfigurationException e) {
-			e.printStackTrace();
-		} catch (IOException e) {
-			System.err.println("Failed to parse xml");
-			e.printStackTrace();
-		}
-
-		
-
-	}
 }
--- a/src/pagerank/LinkToVertex.java	Sat Sep 08 04:12:19 2012 +0900
+++ b/src/pagerank/LinkToVertex.java	Sat Sep 08 04:50:50 2012 +0900
@@ -237,7 +237,6 @@
 		double sum = 0.0;
 		double pageRank = 0.0;
 		Vertex v = graph.getVertex(id);
-		WikiPage wiki = wikiPageHash.get(v.getProperty(PAGE_TITLE));
 		
 		GremlinPipeline<Vertex,Vertex> pipe = new GremlinPipeline<Vertex,Vertex>();
 		pipe.start(graph.getVertex(id)).in("HasLink");
@@ -250,7 +249,6 @@
 			sum += 	(double) pr / linkNum;
 		}
 		pageRank = (double) 1 - weight + (double) sum * weight;		
-		wiki.setRank(pageRank);
 		v.setProperty(PAGE_RANK, pageRank);
 		return pageRank;
 	}
--- a/src/pagerank/TPReadWikiLink.java	Sat Sep 08 04:12:19 2012 +0900
+++ b/src/pagerank/TPReadWikiLink.java	Sat Sep 08 04:50:50 2012 +0900
@@ -25,10 +25,10 @@
 
 	public static void main(String[] args) {
 
-//		final String fileDB = "./resources/tinkerpopDB";
+		final String fileDB = "./resources/tinkerpopDB";
 
-		final long PAGENUM = 100; 
-		final String fileDB = "./resources/tinkerGraph"+Long.toString(PAGENUM);	
+		final long PAGENUM = 11; 
+//		final String fileDB = "./resources/tinkerGraph"+Long.toString(PAGENUM);	
 		final String pageRankLog = "./resources/wikiPageRank"+Long.toString(PAGENUM)+".log";	
 		
 		try {
@@ -50,44 +50,32 @@
 			
 /*
 			long start = java.lang.System.currentTimeMillis();
-			writeComputeTransition(ltv, nodeIds, 50, PAGENUM);
+			writeComputeTransition(ltv, nodeIds, 10, PAGENUM);
 			long end = java.lang.System.currentTimeMillis();
 			long time = end - start;
 			System.out.println(time);
 */
+			for (int i=0; i<10; i++ ) {
+			long start = java.lang.System.currentTimeMillis();
+			for (int j=0; j<10; j++){
+				for (Vertex v : graph.getVertices()) {
+					ltv.computePageRankUsingPipes(v.getId());
+				}
+			}
+			long end = java.lang.System.currentTimeMillis();
+			long time = end - start;
+			System.out.println(time);
+			}
+			
 			
 
-			for (int i=0; i<10; i++) {
-				long start = java.lang.System.currentTimeMillis();
-				for (int j=0; j<10; j++){
-					for (Vertex v : graph.getVertices()) {
-						ltv.computePageRankUsingPipes(v.getId());
-					}
-				}
-				long end = java.lang.System.currentTimeMillis();
-				long time = end - start;
-//				System.out.println(time);
-			}
+			FileOutputStream fos = new FileOutputStream(new File(pageRankLog));
+			descendingOrder(graph, ltv, fos);			
 /*			
-			long count = 0;
-			for (Vertex v: graph.getVertices()) {
-				count++;
-				System.out.print("No."+count+" ");
-				System.out.print("title: "+v.getProperty("pageTitle"));
-				System.out.print(" pageRank: "+v.getProperty("pageRank"));
-				System.out.println();
-				System.out.flush();
-			}
-*/
-
-			FileOutputStream fos = new FileOutputStream(new File(pageRankLog));
-			descendingOrder(graph, fos);			
-			//			descendingOrder(wikiHash, fos);			
-			
 			FileOutputStream out = new FileOutputStream(new File(fileDB));
 			GraphMLWriter.outputGraph(graph, out);
 			out.close();
-			
+*/			
 //			loop(ltv);
 			
 		} catch (NumberFormatException e){
@@ -165,10 +153,12 @@
 		
 	}
 
-	public static void descendingOrder(Graph graph, FileOutputStream fos) throws IOException {
+	public static void descendingOrder(Graph graph, LinkToVertex ltv, FileOutputStream fos) throws IOException {
 		ArrayList<WikiPage> list = new ArrayList<WikiPage>();
 		for (Vertex v : graph.getVertices()) {
 			WikiPage w = new WikiPage(v);
+			w.setInHasLink(ltv.computeInHasLink(v));
+			w.setOutHasLink(ltv.computeOutHasLink(v));
 			list.add(w);
 		}
 		Collections.sort(list, new Comparator<WikiPage>(){