Mercurial > hg > Members > nobuyasu > TPPageRank
changeset 13:0ef7268bbbac draft
create descendiangOrder(Graph,FileOutputStream) method.
author | one |
---|---|
date | Sat, 08 Sep 2012 04:12:19 +0900 |
parents | 7e38484474f4 |
children | 86567db31710 |
files | src/pagerank/LinkConvertGraph.java src/pagerank/LinkToVertex.java src/pagerank/TPReadWikiLink.java src/sample/CreateTinkerGraph.java src/sample/GremlinSample.java |
diffstat | 5 files changed, 176 insertions(+), 62 deletions(-) [+] |
line wrap: on
line diff
--- a/src/pagerank/LinkConvertGraph.java Thu Sep 06 06:05:24 2012 +0900 +++ b/src/pagerank/LinkConvertGraph.java Sat Sep 08 04:12:19 2012 +0900 @@ -85,6 +85,15 @@ } } + void printLinkLog(Map.Entry<String, HashSet<String>> map, FileOutputStream os) throws IOException { + String title = map.getKey(); + os.write( ("title: " + title + "\n").getBytes()); + for (String link : map.getValue()) { + os.write( ("\t"+link+"\n").getBytes()); + } + os.write( ("\n").getBytes()); + os.flush(); + } @@ -92,56 +101,57 @@ public static void main(String[] args) { // final String filename = "./resources/article.xml"; final String filename = "/Users/aotokage/testProgram/wiki/ja-pages_current.xml"; +// final String fileDB = "./resources/tinkerpopDB"; + + + final long PAGENUM = 100; + final String fileDB = "./resources/tinkerGraph"+ Long.toString(PAGENUM); + final String logFile = "./resources/wikiLink"+Long.toString(PAGENUM)+".log"; LinkConvertGraph lcg; try { - lcg = new LinkConvertGraph(filename); - lcg.parseXml(); // lcg.printHash(); - - FileOutputStream fos = new FileOutputStream("./resources/wikiLink.log"); - lcg.printHash(fos); HashMap<String,HashSet<String>> hash = lcg.getHash(); - - final String fileDB = "./resources/tinkerpopDB"; - Graph graph = new TinkerGraph(); - FileOutputStream out = new FileOutputStream(new File(fileDB)); LinkToVertex ltv = new LinkToVertex(graph); - long countId = 1; + FileOutputStream fos = new FileOutputStream(logFile); + long countId = 0; for (Map.Entry<String, HashSet<String>> map : hash.entrySet()) { + lcg.printLinkLog(map, fos); + if (PAGENUM <= countId) break; String pageTitle = map.getKey(); - Vertex v;// = graph.addVertex(null); - if ( ltv.getId(pageTitle) == null ) { - v = ltv.createVertexWithPageTitle(pageTitle,countId); + v = ltv.createVertexWithPageTitle(pageTitle); ltv.setPageRank(v, (Double)0.0); countId++; + if (PAGENUM <= countId) break; } else { v = ltv.getVertex(pageTitle); } - + for (String linkPageTitle : map.getValue()) { Vertex linkV; if ( ltv.getId(linkPageTitle) == null) { - linkV = ltv.createVertexWithPageTitle(linkPageTitle,countId); + linkV = ltv.createVertexWithPageTitle(linkPageTitle); countId++; ltv.setPageRank(linkV, (Double)0.0); } else { linkV = ltv.getVertex(linkPageTitle); } ltv.setHasLink(v, linkV); + if (PAGENUM <= countId) break; } } + FileOutputStream out = new FileOutputStream(new File(fileDB)); GraphMLWriter.outputGraph(graph, out); } catch (FileNotFoundException e) {
--- a/src/pagerank/LinkToVertex.java Thu Sep 06 06:05:24 2012 +0900 +++ b/src/pagerank/LinkToVertex.java Sat Sep 08 04:12:19 2012 +0900 @@ -6,6 +6,8 @@ import com.tinkerpop.blueprints.Edge; import com.tinkerpop.blueprints.Graph; import com.tinkerpop.blueprints.Vertex; +import com.tinkerpop.gremlin.java.GremlinPipeline; +import com.tinkerpop.pipes.util.iterators.SingleIterator; import pagerank.WikiPage; @@ -14,12 +16,15 @@ Graph graph; public final static String PAGE_TITLE = "pageTitle"; public final static String PAGE_RANK = "pageRank"; - // pageIdTable - // key: pageTitle value: Vertex ID + + /* pageIdTable + * key: pageTitle value: Vertex ID + */ private HashMap<String, Object> pageIdTable = new HashMap<String, Object>(); - // wikiPageHash - // key: pageTitle value: wikiPage(class) + /* wikiPageHash + * key: pageTitle value: wikiPage(class) + */ private HashMap<String, WikiPage> wikiPageHash = new HashMap<String, WikiPage>(); private long AllVertexNumber; @@ -108,7 +113,7 @@ } } - long searchAllVertices() { + public long searchAllVertices() { AllVertexNumber = 0; for (Vertex v : graph.getVertices()) { if ( (v.getProperty(PAGE_TITLE) != null) && @@ -221,15 +226,35 @@ double pr = (Double)linkV.getProperty(PAGE_RANK); sum += (double) pr / computeOutHasLink(linkV) ; } - double tmp = (double) 1 - weight; - pageRank = (double) tmp / AllVertexNumber - + (double) sum * weight; + pageRank = (double) 1 - weight + (double) sum * weight; wiki.setRank(pageRank); v.setProperty(PAGE_RANK, pageRank); return pageRank; } + public double computePageRankUsingPipes(Object id) { + double sum = 0.0; + double pageRank = 0.0; + Vertex v = graph.getVertex(id); + WikiPage wiki = wikiPageHash.get(v.getProperty(PAGE_TITLE)); + + GremlinPipeline<Vertex,Vertex> pipe = new GremlinPipeline<Vertex,Vertex>(); + pipe.start(graph.getVertex(id)).in("HasLink"); + for (Vertex inVer : pipe) { + Object inVerId = inVer.getId(); + GremlinPipeline<Vertex,Vertex> inPipe = new GremlinPipeline<Vertex,Vertex>(); + inPipe.start(graph.getVertex(inVerId)).out("HasLink"); + long linkNum = inPipe.count(); + double pr = (Double) inVer.getProperty(PAGE_RANK); + sum += (double) pr / linkNum; + } + pageRank = (double) 1 - weight + (double) sum * weight; + wiki.setRank(pageRank); + v.setProperty(PAGE_RANK, pageRank); + return pageRank; + } + public void printVertexInfo(int nodeId) { Vertex v = graph.getVertex(nodeId); printInHasLink(v, 1);
--- a/src/pagerank/TPReadWikiLink.java Thu Sep 06 06:05:24 2012 +0900 +++ b/src/pagerank/TPReadWikiLink.java Sat Sep 08 04:12:19 2012 +0900 @@ -25,8 +25,11 @@ public static void main(String[] args) { - final String fileDB = "./resources/tinkerpopDB"; - +// final String fileDB = "./resources/tinkerpopDB"; + + final long PAGENUM = 100; + final String fileDB = "./resources/tinkerGraph"+Long.toString(PAGENUM); + final String pageRankLog = "./resources/wikiPageRank"+Long.toString(PAGENUM)+".log"; try { Graph graph = new TinkerGraph(); @@ -35,28 +38,55 @@ in.close(); LinkToVertex ltv = new LinkToVertex(graph); -// ltv.initPageRankAllVertex(); -/* + ltv.initPageRankAllVertex(); + final long AllVertexNumber = ltv.searchAllVertices(); HashMap<String, WikiPage> wikiHash = ltv.getWikiPageHash(); System.out.println("AllVertexNumber = "+AllVertexNumber); -*/ + + +// String nodeIds[] = {"80", "290", "21", "164", "41972", "103700", "65956", "103700"}; + String nodeIds[] = {"146","148"}; + /* - String nodeIds[] = {"80", "290", "21", "164"}; - writeComputeTransition(ltv, nodeIds, 50); + long start = java.lang.System.currentTimeMillis(); + writeComputeTransition(ltv, nodeIds, 50, PAGENUM); + long end = java.lang.System.currentTimeMillis(); + long time = end - start; + System.out.println(time); +*/ + -*/ -/* - FileOutputStream fos = new FileOutputStream(new File("./resources/wikiPageRank.log")); - descendingOrder(wikiHash, fos); + for (int i=0; i<10; i++) { + long start = java.lang.System.currentTimeMillis(); + for (int j=0; j<10; j++){ + for (Vertex v : graph.getVertices()) { + ltv.computePageRankUsingPipes(v.getId()); + } + } + long end = java.lang.System.currentTimeMillis(); + long time = end - start; +// System.out.println(time); + } +/* + long count = 0; + for (Vertex v: graph.getVertices()) { + count++; + System.out.print("No."+count+" "); + System.out.print("title: "+v.getProperty("pageTitle")); + System.out.print(" pageRank: "+v.getProperty("pageRank")); + System.out.println(); + System.out.flush(); + } */ - + FileOutputStream fos = new FileOutputStream(new File(pageRankLog)); + descendingOrder(graph, fos); + // descendingOrder(wikiHash, fos); FileOutputStream out = new FileOutputStream(new File(fileDB)); GraphMLWriter.outputGraph(graph, out); out.close(); - // loop(ltv); @@ -83,21 +113,22 @@ } } - public static void writeComputeTransition(LinkToVertex ltv,final String nodeIds[], int count) throws IOException { + public static void writeComputeTransition(LinkToVertex ltv,final String nodeIds[], int count, long pagenum) throws IOException { LinkedList<FileOutputStream> fosList = new LinkedList<FileOutputStream>(); for (String id: nodeIds) { - String filename = "./resources/NodeId_"+id+".dat"; + String filename = "./resources/VertexId_"+id+"_num"+Long.toString(pagenum)+".dat"; FileOutputStream fos = null; fos = new FileOutputStream(filename); Vertex v = ltv.getVertexById(id); - fos.write( ("# Node ID "+id+" "+ ltv.getPageTitle(v)+"\n").getBytes()); + fos.write( ("# Vertex ID "+id+" "+ ltv.getPageTitle(v)+"\n").getBytes()); fosList.add(fos); } for (int i=0; i<count; i++) { for (Vertex v : ltv.getAllVertices() ) { - ltv.computePageRank(v); +// ltv.computePageRank(v); + ltv.computePageRankUsingPipes(v.getId()); } for (int index=0; index<nodeIds.length; index++){ @@ -120,12 +151,37 @@ } Collections.sort(list, new Comparator<WikiPage>(){ public int compare(WikiPage w1, WikiPage w2) { - return (int)(w2.getRank()*Math.pow(10, 10)) - (int)(w1.getRank()*Math.pow(10,10)); + return (int)(w2.getRank()*Math.pow(10, 5)) - (int)(w1.getRank()*Math.pow(10, 5)); } }); + long count = 1; for (WikiPage w : list) { + fos.write(("No."+count+"\n").getBytes()); w.printInfo(fos); + count++; + } + fos.close(); + + } + + public static void descendingOrder(Graph graph, FileOutputStream fos) throws IOException { + ArrayList<WikiPage> list = new ArrayList<WikiPage>(); + for (Vertex v : graph.getVertices()) { + WikiPage w = new WikiPage(v); + list.add(w); + } + Collections.sort(list, new Comparator<WikiPage>(){ + public int compare(WikiPage w1, WikiPage w2) { + return (int)(w2.getRank()*Math.pow(10, 5)) - (int)(w1.getRank()*Math.pow(10, 5)); + } + }); + + long count = 1; + for (WikiPage w : list) { + fos.write(("No."+count+"\n").getBytes()); + w.printInfo(fos); + count++; } fos.close();
--- a/src/sample/CreateTinkerGraph.java Thu Sep 06 06:05:24 2012 +0900 +++ b/src/sample/CreateTinkerGraph.java Sat Sep 08 04:12:19 2012 +0900 @@ -11,27 +11,36 @@ import com.tinkerpop.blueprints.Edge; import com.tinkerpop.blueprints.Graph; import com.tinkerpop.blueprints.Vertex; +import com.tinkerpop.blueprints.impls.tg.TinkerGraph; import com.tinkerpop.blueprints.util.io.graphml.GraphMLReader; import com.tinkerpop.blueprints.util.io.graphml.GraphMLWriter; public class CreateTinkerGraph { - public static final String filename = "./resources/tinkerDB"; + public static final String filename = "./resources/sampleDB"; public static void main(String[] args) { - TinkerGraph graph = new TinkerGraph("/db"); - Vertex a = graph.addVertex(null); - Vertex b = graph.addVertex(null); - a.setProperty("name", "maro"); - b.setProperty("name", "Peter"); - Edge e = graph.addEdge(null, a, b, "knows"); - System.out.println(e.getVertex(Direction.OUT).getProperty("name") - + "--" + e.getLabel() + "-->" - + e.getVertex(Direction.IN).getProperty("name")); - graph.shutdown(); + try { +// outputGraph(); +/* + Graph graph = new TinkerGraph(); + FileInputStream in = new FileInputStream(new File(filename)); + + GraphMLReader.inputGraph(graph, in); + + for (Vertex v: graph.getVertices()) { + System.out.println(v.getId().getClass()); + } +*/ + readGraph(); - + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } public static void createGraph() {
--- a/src/sample/GremlinSample.java Thu Sep 06 06:05:24 2012 +0900 +++ b/src/sample/GremlinSample.java Sat Sep 08 04:12:19 2012 +0900 @@ -16,8 +16,8 @@ public static void main(String[] args) { - final String fileDB = "./resources/tinkerpopDB"; - +// final String fileDB = "./resources/tinkerpopDB"; + final String fileDB = "./resources/tinkerGraph100"; try { Graph graph = new TinkerGraph(); @@ -26,15 +26,29 @@ in.close(); LinkToVertex ltv = new LinkToVertex(graph); - String id = "21"; - GremlinPipeline pipe = new GremlinPipeline(); - pipe.start(graph.getVertex(id)).out("HasLink").property("pageTitle"); - pipe.setStarts(new SingleIterator<Vertex>(graph.getVertex(id))); + final long AllVertexNumber = ltv.searchAllVertices(); + System.out.println("AllVertexNumber = "+ AllVertexNumber); + + for (Vertex o : graph.getVertices()) { + System.out.println(o); + } +/* + GremlinPipeline pipe = new GremlinPipeline(); + pipe.start(graph.getVertex(1)); +*/ + +/* + String id = "85956"; + GremlinPipeline pipe = new GremlinPipeline(); + pipe.start(graph.getVertex(id)).out("HasLink");//.property("pageTitle"); + pipe.property("pageTitle"); +// pipe.start(graph.getVertex(id)).out("HasLink"); +// pipe.setStarts(new SingleIterator<Vertex>(graph.getVertex(id))); + for (Object title : pipe) { - System.out.println((String)title); + System.out.println(title); } - - +*/ } catch (NumberFormatException e){ System.out.println("Program exit"); } catch (Exception e) {