Mercurial > hg > Members > nobuyasu > TPPageRank
changeset 14:86567db31710 draft
fix
author | one |
---|---|
date | Sat, 08 Sep 2012 04:50:50 +0900 |
parents | 0ef7268bbbac |
children | e1d758d08e9c |
files | src/pagerank/LinkConvertGraph.java src/pagerank/LinkToVertex.java src/pagerank/TPReadWikiLink.java |
diffstat | 3 files changed, 97 insertions(+), 106 deletions(-) [+] |
line wrap: on
line diff
--- a/src/pagerank/LinkConvertGraph.java Sat Sep 08 04:12:19 2012 +0900 +++ b/src/pagerank/LinkConvertGraph.java Sat Sep 08 04:50:50 2012 +0900 @@ -24,6 +24,81 @@ import xmlParser.TextTagParser; public class LinkConvertGraph { + + + + + public static void main(String[] args) { +// final String filename = "./resources/article.xml"; + final String filename = "/Users/aotokage/testProgram/wiki/ja-pages_current.xml"; +// final String fileDB = "./resources/tinkerpopDB"; + + + final long PAGENUM = 11; + final String fileDB = "./resources/tinkerGraph"+ Long.toString(PAGENUM); + final String logFile = "./resources/wikiLink"+Long.toString(PAGENUM)+".log"; + + LinkConvertGraph lcg; + + try { + lcg = new LinkConvertGraph(filename); + lcg.parseXml(); +// lcg.printHash(); + + HashMap<String,HashSet<String>> hash = lcg.getHash(); + + Graph graph = new TinkerGraph(); + LinkToVertex ltv = new LinkToVertex(graph); + + FileOutputStream fos = new FileOutputStream(logFile); + long countId = 0; + for (Map.Entry<String, HashSet<String>> map : hash.entrySet()) { + lcg.printLinkLog(map, fos); +// if (PAGENUM <= countId) break; + String pageTitle = map.getKey(); + Vertex v;// = graph.addVertex(null); + if ( ltv.getId(pageTitle) == null ) { + v = ltv.createVertexWithPageTitle(pageTitle); + ltv.setPageRank(v, (Double)0.0); + countId++; +// if (PAGENUM <= countId) break; + } else { + v = ltv.getVertex(pageTitle); + } + + for (String linkPageTitle : map.getValue()) { + Vertex linkV; + if ( ltv.getId(linkPageTitle) == null) { + linkV = ltv.createVertexWithPageTitle(linkPageTitle); + countId++; + ltv.setPageRank(linkV, (Double)0.0); + } else { + linkV = ltv.getVertex(linkPageTitle); + } + ltv.setHasLink(v, linkV); +// if (PAGENUM <= countId) break; + } + + } + System.out.println("countId = "+countId); + + FileOutputStream out = new FileOutputStream(new File(fileDB)); + GraphMLWriter.outputGraph(graph, out); + + } catch (FileNotFoundException e) { + e.printStackTrace(); + } catch (SAXException e) { + e.printStackTrace(); + } catch (ParserConfigurationException e) { + e.printStackTrace(); + } catch (IOException e) { + System.err.println("Failed to parse xml"); + e.printStackTrace(); + } + + + + } private String filename; private FileInputStream fis; @@ -96,76 +171,4 @@ } - - - public static void main(String[] args) { -// final String filename = "./resources/article.xml"; - final String filename = "/Users/aotokage/testProgram/wiki/ja-pages_current.xml"; -// final String fileDB = "./resources/tinkerpopDB"; - - - final long PAGENUM = 100; - final String fileDB = "./resources/tinkerGraph"+ Long.toString(PAGENUM); - final String logFile = "./resources/wikiLink"+Long.toString(PAGENUM)+".log"; - - LinkConvertGraph lcg; - - try { - lcg = new LinkConvertGraph(filename); - lcg.parseXml(); -// lcg.printHash(); - - HashMap<String,HashSet<String>> hash = lcg.getHash(); - - Graph graph = new TinkerGraph(); - LinkToVertex ltv = new LinkToVertex(graph); - - FileOutputStream fos = new FileOutputStream(logFile); - long countId = 0; - for (Map.Entry<String, HashSet<String>> map : hash.entrySet()) { - lcg.printLinkLog(map, fos); - if (PAGENUM <= countId) break; - String pageTitle = map.getKey(); - Vertex v;// = graph.addVertex(null); - if ( ltv.getId(pageTitle) == null ) { - v = ltv.createVertexWithPageTitle(pageTitle); - ltv.setPageRank(v, (Double)0.0); - countId++; - if (PAGENUM <= countId) break; - } else { - v = ltv.getVertex(pageTitle); - } - - for (String linkPageTitle : map.getValue()) { - Vertex linkV; - if ( ltv.getId(linkPageTitle) == null) { - linkV = ltv.createVertexWithPageTitle(linkPageTitle); - countId++; - ltv.setPageRank(linkV, (Double)0.0); - } else { - linkV = ltv.getVertex(linkPageTitle); - } - ltv.setHasLink(v, linkV); - if (PAGENUM <= countId) break; - } - - } - - FileOutputStream out = new FileOutputStream(new File(fileDB)); - GraphMLWriter.outputGraph(graph, out); - - } catch (FileNotFoundException e) { - e.printStackTrace(); - } catch (SAXException e) { - e.printStackTrace(); - } catch (ParserConfigurationException e) { - e.printStackTrace(); - } catch (IOException e) { - System.err.println("Failed to parse xml"); - e.printStackTrace(); - } - - - - } }
--- a/src/pagerank/LinkToVertex.java Sat Sep 08 04:12:19 2012 +0900 +++ b/src/pagerank/LinkToVertex.java Sat Sep 08 04:50:50 2012 +0900 @@ -237,7 +237,6 @@ double sum = 0.0; double pageRank = 0.0; Vertex v = graph.getVertex(id); - WikiPage wiki = wikiPageHash.get(v.getProperty(PAGE_TITLE)); GremlinPipeline<Vertex,Vertex> pipe = new GremlinPipeline<Vertex,Vertex>(); pipe.start(graph.getVertex(id)).in("HasLink"); @@ -250,7 +249,6 @@ sum += (double) pr / linkNum; } pageRank = (double) 1 - weight + (double) sum * weight; - wiki.setRank(pageRank); v.setProperty(PAGE_RANK, pageRank); return pageRank; }
--- a/src/pagerank/TPReadWikiLink.java Sat Sep 08 04:12:19 2012 +0900 +++ b/src/pagerank/TPReadWikiLink.java Sat Sep 08 04:50:50 2012 +0900 @@ -25,10 +25,10 @@ public static void main(String[] args) { -// final String fileDB = "./resources/tinkerpopDB"; + final String fileDB = "./resources/tinkerpopDB"; - final long PAGENUM = 100; - final String fileDB = "./resources/tinkerGraph"+Long.toString(PAGENUM); + final long PAGENUM = 11; +// final String fileDB = "./resources/tinkerGraph"+Long.toString(PAGENUM); final String pageRankLog = "./resources/wikiPageRank"+Long.toString(PAGENUM)+".log"; try { @@ -50,44 +50,32 @@ /* long start = java.lang.System.currentTimeMillis(); - writeComputeTransition(ltv, nodeIds, 50, PAGENUM); + writeComputeTransition(ltv, nodeIds, 10, PAGENUM); long end = java.lang.System.currentTimeMillis(); long time = end - start; System.out.println(time); */ + for (int i=0; i<10; i++ ) { + long start = java.lang.System.currentTimeMillis(); + for (int j=0; j<10; j++){ + for (Vertex v : graph.getVertices()) { + ltv.computePageRankUsingPipes(v.getId()); + } + } + long end = java.lang.System.currentTimeMillis(); + long time = end - start; + System.out.println(time); + } + - for (int i=0; i<10; i++) { - long start = java.lang.System.currentTimeMillis(); - for (int j=0; j<10; j++){ - for (Vertex v : graph.getVertices()) { - ltv.computePageRankUsingPipes(v.getId()); - } - } - long end = java.lang.System.currentTimeMillis(); - long time = end - start; -// System.out.println(time); - } + FileOutputStream fos = new FileOutputStream(new File(pageRankLog)); + descendingOrder(graph, ltv, fos); /* - long count = 0; - for (Vertex v: graph.getVertices()) { - count++; - System.out.print("No."+count+" "); - System.out.print("title: "+v.getProperty("pageTitle")); - System.out.print(" pageRank: "+v.getProperty("pageRank")); - System.out.println(); - System.out.flush(); - } -*/ - - FileOutputStream fos = new FileOutputStream(new File(pageRankLog)); - descendingOrder(graph, fos); - // descendingOrder(wikiHash, fos); - FileOutputStream out = new FileOutputStream(new File(fileDB)); GraphMLWriter.outputGraph(graph, out); out.close(); - +*/ // loop(ltv); } catch (NumberFormatException e){ @@ -165,10 +153,12 @@ } - public static void descendingOrder(Graph graph, FileOutputStream fos) throws IOException { + public static void descendingOrder(Graph graph, LinkToVertex ltv, FileOutputStream fos) throws IOException { ArrayList<WikiPage> list = new ArrayList<WikiPage>(); for (Vertex v : graph.getVertices()) { WikiPage w = new WikiPage(v); + w.setInHasLink(ltv.computeInHasLink(v)); + w.setOutHasLink(ltv.computeOutHasLink(v)); list.add(w); } Collections.sort(list, new Comparator<WikiPage>(){