Mercurial > hg > Members > nobuyasu > TestNeo4j
changeset 25:fbf0cf550b06 draft
fix ReadWikiLink.java
author | one |
---|---|
date | Thu, 30 Aug 2012 23:23:24 +0900 |
parents | 71fe482aaf32 |
children | cfbab7d87188 |
files | src/wikigraph/LinkConvertGraph.java src/wikigraph/LinkToNode.java src/wikigraph/ReadWikiLink.java src/wikigraph/WikiPage.java |
diffstat | 4 files changed, 157 insertions(+), 145 deletions(-) [+] |
line wrap: on
line diff
--- a/src/wikigraph/LinkConvertGraph.java Tue Aug 28 15:27:35 2012 +0900 +++ b/src/wikigraph/LinkConvertGraph.java Thu Aug 30 23:23:24 2012 +0900 @@ -92,7 +92,6 @@ LinkConvertGraph lcg; - try { lcg = new LinkConvertGraph(filename); @@ -127,7 +126,7 @@ Node linkNode; if ( ltn.getId(linkPageTitle) == null) { linkNode = ltn.createNodeWithPageTitle(linkPageTitle); - ltn.setPageRank(linkNode, 0); + ltn.setPageRank(linkNode, (Double)0.0); } else { linkNode = ltn.getNode(linkPageTitle); }
--- a/src/wikigraph/LinkToNode.java Tue Aug 28 15:27:35 2012 +0900 +++ b/src/wikigraph/LinkToNode.java Thu Aug 30 23:23:24 2012 +0900 @@ -16,7 +16,6 @@ import wikigraph.Neo4jTest.RelTypes; - public class LinkToNode { GraphDatabaseService graphDb; @@ -24,12 +23,15 @@ public final static String PAGE_TITLE = "page_title"; public final static String PAGE_RANK = "page_rank"; - private HashMap<String,Long> pageIdTable = new HashMap<String,Long>(); - private HashMap<String,Long> pageRankTable = new HashMap<String,Long>(); - - private HashMap<String,WikiPage> wikiPageHash = new HashMap<String,WikiPage>(); + private HashMap<String, Long> pageIdTable = new HashMap<String, Long>(); + private HashMap<String, Double> pageRankTable = new HashMap<String, Double>(); + + private HashMap<String, WikiPage> wikiPageHash = new HashMap<String, WikiPage>(); private long AllNodeNumber; + private final double weight1 = 0.85; + private final double weight2 = 0.15; + public enum RelTypes implements RelationshipType { HAS_LINK } @@ -39,7 +41,7 @@ this.graphOpe = GlobalGraphOperations.at(graphDb); AllNodeNumber = 0; } - + LinkToNode(GraphDatabaseService graphDb, GlobalGraphOperations graphOpe) { this.graphDb = graphDb; this.graphOpe = graphOpe; @@ -55,97 +57,118 @@ Long getId(String pageTitle) { return pageIdTable.get(pageTitle); } - + boolean isHasLink(Relationship rel) { return rel.isType(RelTypes.HAS_LINK); } - + private Node createNode() { return graphDb.createNode(); } - + private Node createNodeWithProperty(String key, Object value) { Node node = createNode(); - node.setProperty(key,value); + node.setProperty(key, value); return node; } - + void initAllNodePageRank() { for (Node node : graphOpe.getAllNodes()) { - setPageRank(node, 0); + setPageRank(node, 0.0); } } - - + String getPageTitle(Node node) { return (String) node.getProperty(PAGE_TITLE); } - - Long getPageRank(Node node) { - return (Long) node.getProperty(PAGE_RANK); + + Double getPageRank(Node node) { + return (Double) node.getProperty(PAGE_RANK); } - - Node createNodeWithPageTitle(String pageTitle){ + + Node createNodeWithPageTitle(String pageTitle) { Node node = createNodeWithProperty(PAGE_TITLE, pageTitle); - pageIdTable.put(pageTitle, node.getId()); + pageIdTable.put(pageTitle, node.getId()); return node; } - - Node setPageRank(Node node, long rank) { + + Node setPageRank(Node node, Double rank) { node.setProperty(PAGE_RANK, rank); return node; } - + Node getNode(String name) { long id = pageIdTable.get(name); return graphDb.getNodeById(id); } - void setRelationship(Node node1, Node node2, RelTypes type) { + Node getNode(int nodeId) { + return graphDb.getNodeById(nodeId); + } + + void setRelationship(Node node1, Node node2, RelTypes type) { node1.createRelationshipTo(node2, type); } void setHasLink(Node node1, Node node2) { setRelationship(node1, node2, RelTypes.HAS_LINK); } - + long searchAllNodes() { AllNodeNumber = 0; - for (Node n: graphOpe.getAllNodes()) { - if (n.hasProperty(PAGE_TITLE)) { + for (Node n : graphOpe.getAllNodes()) { + + n.setProperty(PAGE_RANK,(double)0.0); + + if (n.hasProperty(PAGE_TITLE) || n.hasProperty(PAGE_RANK)) { WikiPage wiki = new WikiPage(n); - wikiPageHash.put((String)n.getProperty(PAGE_TITLE), wiki); - pageIdTable.put((String)n.getProperty(PAGE_TITLE), n.getId()); - pageRankTable.put((String)n.getProperty(PAGE_TITLE), (Long) n.getProperty(PAGE_RANK)); + pageIdTable.put((String) n.getProperty(PAGE_TITLE), n.getId()); + pageRankTable.put((String) n.getProperty(PAGE_TITLE), + (Double) n.getProperty(PAGE_RANK)); wiki.setInLink(computeInHasLink(n)); wiki.setOutLink(computeOutHasLink(n)); - + wikiPageHash.put((String) n.getProperty(PAGE_TITLE), wiki); AllNodeNumber++; } } return AllNodeNumber; } - - HashMap<String,WikiPage> getWikiPageHash() { + + void searchRegiNodes(Node n) { + + if (n.hasProperty(PAGE_TITLE) || n.hasProperty(PAGE_RANK)) { + WikiPage wiki = new WikiPage(n); + pageIdTable.put((String) n.getProperty(PAGE_TITLE), n.getId()); + pageRankTable.put((String) n.getProperty(PAGE_TITLE), + (Double) n.getProperty(PAGE_RANK)); + wiki.setInLink(computeInHasLink(n)); + wiki.setOutLink(computeOutHasLink(n)); + wikiPageHash.put((String) n.getProperty(PAGE_TITLE), wiki); + AllNodeNumber++; + } + } + + HashMap<String, WikiPage> getWikiPageHash() { return wikiPageHash; } - - HashMap<String,Long> getPageIdTable() { + + HashMap<String, Long> getPageIdTable() { return pageIdTable; } - HashMap<String,Long> getPageRankTable() { + HashMap<String, Double> getPageRankTable() { return pageRankTable; - } - + } + public void printAllNodes() { - for (Node n: graphOpe.getAllNodes()) { - System.out.println("ID="+ n.getId()); - for (String key: n.getPropertyKeys()) { + for (Node n : graphOpe.getAllNodes()) { + System.out.println("ID=" + n.getId()); + for (String key : n.getPropertyKeys()) { System.out.println(key + "=" + n.getProperty(key)); } - Iterable<Relationship> relIter = n.getRelationships(RelTypes.HAS_LINK); - for (Relationship rel : relIter ) { + Iterable<Relationship> relIter = n + .getRelationships(RelTypes.HAS_LINK); + for (Relationship rel : relIter) { System.out.println(rel); } System.out.println("--"); @@ -154,27 +177,27 @@ } private Traverser getTraverser(final Node node, final Direction rel) { - TraversalDescription td = Traversal.description() - .breadthFirst() - .relationships( RelTypes.HAS_LINK, rel ) - .evaluator( Evaluators.excludeStartPosition()); - return td.traverse( node ); - + TraversalDescription td = Traversal.description().breadthFirst() + .relationships(RelTypes.HAS_LINK, rel) + .evaluator(Evaluators.excludeStartPosition()); + return td.traverse(node); + } - - public Traverser getOutHasLinkTraverser( final Node node) { + + public Traverser getOutHasLinkTraverser(final Node node) { return getTraverser(node, Direction.OUTGOING); } - - public Traverser getInHasLinkTraverser( final Node node) { + + public Traverser getInHasLinkTraverser(final Node node) { return getTraverser(node, Direction.INCOMING); } - + public long computeOutHasLink(final Node node) { long count = 0; Traverser hasLinkTraverser = getOutHasLinkTraverser(node); for (Path hasLinkPath : hasLinkTraverser) { - if (hasLinkPath.length() > 1) break; + if (hasLinkPath.length() > 1) + break; count++; } return count; @@ -184,46 +207,49 @@ long count = 0; Traverser hasLinkTraverser = getInHasLinkTraverser(node); for (Path hasLinkPath : hasLinkTraverser) { - if (hasLinkPath.length() > 1) break; + if (hasLinkPath.length() > 1) + break; count++; } return count; } - public void searchHasLinkOut(final Node node) { int numberOfLinkPages = 0; String output = node.getProperty(PAGE_TITLE) + " have link pages:\n"; - Traverser hasLinkTraverser = getOutHasLinkTraverser( node ); - for ( Path hasLinkPath : hasLinkTraverser) { + Traverser hasLinkTraverser = getOutHasLinkTraverser(node); + for (Path hasLinkPath : hasLinkTraverser) { output += "At depth " + hasLinkPath.length() + " => " - + hasLinkPath.endNode() - .getProperty(PAGE_TITLE) + "\n"; + + hasLinkPath.endNode().getProperty(PAGE_TITLE) + "\n"; numberOfLinkPages++; } output += "Number of link pages: " + numberOfLinkPages + "\n"; System.out.println(output); } - public long computePageRank(Node node) { - long sum = 0; - long pageRank = 0; + public double computePageRank(Node node) { + double sum = 0; + double pageRank = 0; String title = getPageTitle(node); WikiPage wiki = wikiPageHash.get(title); - + Traverser hasLinkTraverser = getInHasLinkTraverser(node); - for (Path hasLinkPath: hasLinkTraverser) { - if (hasLinkPath.length() > 1) break; + for (Path hasLinkPath : hasLinkTraverser) { + if (hasLinkPath.length() > 1) + break; Node n = hasLinkPath.endNode(); - sum += (Long)n.getProperty(PAGE_RANK); + sum += (Double) n.getProperty(PAGE_RANK); } - pageRank = (long) (sum/wiki.getInLink() * 0.85 - + 1 / AllNodeNumber * 0.15); - + if (wiki.getOutLink() == 0) { + pageRank = (double) ((double) 1 / AllNodeNumber * weight2); + } else { + pageRank = (double) ((double)sum / wiki.getOutLink() * weight1) + + (double) ((double) 1 / AllNodeNumber * weight2); + } + wiki.setRank(pageRank); node.setProperty(PAGE_RANK, pageRank); return pageRank; } - - + }
--- a/src/wikigraph/ReadWikiLink.java Tue Aug 28 15:27:35 2012 +0900 +++ b/src/wikigraph/ReadWikiLink.java Thu Aug 30 23:23:24 2012 +0900 @@ -1,6 +1,9 @@ package wikigraph; +import java.io.FileOutputStream; +import java.io.IOException; import java.util.HashMap; +import java.util.LinkedList; import java.util.Map; import org.neo4j.graphdb.GraphDatabaseService; @@ -26,78 +29,50 @@ try { final long AllNodeNumber = ltn.searchAllNodes(); + HashMap<String, WikiPage> wikiHash = ltn.getWikiPageHash(); - long maxRelCount = 0; - String tmpKey = ""; - // relHash record number of relationships of each node. - // Key: page_title value: number of relationships - HashMap<String, Long> relHash = new HashMap<String, Long>(); + + int nodeIds[] = {1, 2, 3}; + LinkedList<FileOutputStream> fosList = new LinkedList<FileOutputStream>(); - // relKeyHash - // key: number of relationships value: ID - HashMap<Long, Long> relKeyHash = new HashMap<Long, Long>(); + for (int i: nodeIds) { + String filename = String.format("./resource/NodeId_%d.dat", i); + FileOutputStream fos = new FileOutputStream(filename); + fos.write( String.format("# Node ID %d\n",i).getBytes()); + fosList.add(fos); + } + + for (int i=0; i<50; i++) { + for (Node node : graphOpe.getAllNodes()) { + ltn.computePageRank(node); + } - HashMap<String,WikiPage> wikiHash = ltn.getWikiPageHash(); - - for (String title: wikiHash.keySet()) { - WikiPage w = wikiHash.get(title); - long id = w.getId(); + for (int index=0; index<nodeIds.length; index++){ + FileOutputStream fos = fosList.get(index); + printPageRankLog(fos, ltn, nodeIds[index], i); + } - Node node = graphDb.getNodeById(id); - - Iterable<Relationship> relIter = node.getRelationships(); - long count = 0; - // compute number of relationship. - for (Relationship rel : relIter) { - if (ltn.isHasLink(rel)) - count++; - } - relHash.put(title, count); - - if (maxRelCount < count) { - maxRelCount = count; - tmpKey = title; - } - relKeyHash.put(count, id); } - /* - System.out.println("title: id: rank: inLink: outLink"); - for (String title: wikiHash.keySet()) { - WikiPage w = wikiHash.get(title); - long id = w.getId(); - long rank = w.getRank(); - long inLink = w.getInLink(); - long outLink = w.getOutLink(); - System.out.println(title + ": "+ id + ": "+ rank +": "+ inLink +": "+outLink); + for (FileOutputStream fos: fosList) { + fos.close(); } - */ - - System.out.println("AllNodeNumber = " + AllNodeNumber); - System.out.println("Most :\n" + tmpKey + ":" + maxRelCount); - - String output = ""; - Node n = graphDb.getNodeById(32868); - Traverser hasLinkTraverser = ltn.getOutHasLinkTraverser(n); - System.out.println(n.getProperty(LinkToNode.PAGE_TITLE)); - for (Path hasLinkPath : hasLinkTraverser) { - if (hasLinkPath.length() > 1) break; - output = "At depth" - + hasLinkPath.length() - + " => " - + hasLinkPath.endNode().getProperty( - LinkToNode.PAGE_TITLE) + "\n"; - System.out.println(output); + /* + String title = "ブラックホール"; + FileOutputStream fos = new FileOutputStream("./resource/nodePageRank.dat"); + fos.write(("# page title: "+title+"\n").getBytes()); + + WikiPage w = wikiHash.get(title); + for (int i = 0; i < 50; i++) { + for (Node node : graphOpe.getAllNodes()) { + ltn.computePageRank(node); + } + double rank = w.getRank(); + printPageRankLog(fos, i, rank); + fos.flush(); } - - System.out.println("in = "+ltn.computeInHasLink(n)); - System.out.println("out = " +ltn.computeOutHasLink(n)); - - - - - - + fos.close(); +*/ } catch (Exception e) { e.printStackTrace(); } finally { @@ -107,5 +82,16 @@ } } + + public static void printPageRankLog(FileOutputStream fos, int x, double rank) throws IOException { + fos.write( (x+" "+ rank+"\n").getBytes() ); + fos.flush(); + } + + public static void printPageRankLog(FileOutputStream fos, LinkToNode ltn, int nodeId, int x) throws IOException { + double rank = ltn.getPageRank(ltn.getNode(nodeId)); + fos.write( (x+" "+ rank+"\n").getBytes() ); + fos.flush(); + } }
--- a/src/wikigraph/WikiPage.java Tue Aug 28 15:27:35 2012 +0900 +++ b/src/wikigraph/WikiPage.java Thu Aug 30 23:23:24 2012 +0900 @@ -6,14 +6,14 @@ private String title; private long id; - private long rank; + private Double rank; private long outLink; private long inLink; WikiPage() { this.title = null; this.id = -1; - this.rank = -1; + this.rank = -1.0; this.outLink = 0; this.inLink = 0; } @@ -21,12 +21,13 @@ WikiPage(Node node) { this.title = (String) node.getProperty(LinkToNode.PAGE_TITLE); this.id = node.getId(); - this.rank = (Long) node.getProperty(LinkToNode.PAGE_RANK); + this.rank = (Double)node.getProperty(LinkToNode.PAGE_RANK); + this.rank = 0.0; this.outLink = 0; this.inLink = 0; } - WikiPage(String title, long id, long rank) { + WikiPage(String title, long id, Double rank) { this.title = title; this.id = id; this.rank = rank; @@ -42,7 +43,7 @@ return id; } - long getRank() { + double getRank() { return rank; } @@ -62,7 +63,7 @@ this.id = id; } - void setRank(long rank) { + void setRank(double rank) { this.rank = rank; }