Mercurial > hg > Members > nobuyasu > TPPageRank
view src/pagerank/LinkConvertGraph.java @ 3:b44abb9aa09f draft
add resources/article.xml
author | one |
---|---|
date | Wed, 05 Sep 2012 11:59:02 +0900 |
parents | 1744340f8be6 |
children | dcd59917a2dd |
line wrap: on
line source
package pagerank; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.xml.sax.SAXException; import com.tinkerpop.blueprints.Graph; import com.tinkerpop.blueprints.Vertex; import com.tinkerpop.blueprints.impls.tg.TinkerGraph; import com.tinkerpop.blueprints.util.io.graphml.GraphMLWriter; import xmlParser.TextTagParser; public class LinkConvertGraph { private String filename; private FileInputStream fis; private SAXParserFactory factory; private SAXParser parser; private TextTagParser xmlParser; private HashMap<String,HashSet<String>> hash; LinkConvertGraph() throws ParserConfigurationException, SAXException { xmlParser = new TextTagParser(); factory = SAXParserFactory.newInstance(); parser = factory.newSAXParser(); } LinkConvertGraph(final String filename) throws FileNotFoundException, ParserConfigurationException, SAXException { this.filename = filename; fis = new FileInputStream(filename); xmlParser = new TextTagParser(); factory = SAXParserFactory.newInstance(); parser = factory.newSAXParser(); } public void setFilename(final String filename) throws FileNotFoundException { this.filename = filename; this.fis = new FileInputStream(filename); } private void parseXml() throws SAXException, IOException { parser.parse(this.fis, this.xmlParser); hash = xmlParser.getHash(); } private HashMap<String,HashSet<String>> getHash() { return hash; } public void printHash() { for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) { String title = entry.getKey(); System.out.println("title: " + title); for (String link : entry.getValue()) { System.out.println("\t"+link); } System.out.println(); } } private void printHash(FileOutputStream os) throws IOException { for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) { String title = entry.getKey(); os.write( ("title: " + title + "\n").getBytes()); for (String link : entry.getValue()) { os.write( ("\t"+link+"\n").getBytes()); } os.write( ("\n").getBytes()); os.flush(); } } public static void main(String[] args) { final String filename = "./resources/article.xml"; // final String filename = "/Users/aotokage/testProgram/wiki/ja-pages_current.xml"; LinkConvertGraph lcg; try { lcg = new LinkConvertGraph(filename); lcg.parseXml(); // lcg.printHash(); FileOutputStream fos = new FileOutputStream("./resources/wikiLink.log"); lcg.printHash(fos); HashMap<String,HashSet<String>> hash = lcg.getHash(); final String filenameD = "./resources/tinkerpopDB"; Graph graph = new TinkerGraph(); FileOutputStream out = new FileOutputStream(new File(filename)); LinkToVertex ltn = new LinkToVertex(graph); for (Map.Entry<String, HashSet<String>> map : hash.entrySet()) { String pageTitle = map.getKey(); Vertex v;// = graph.addVertex(null); if ( ltn.getId(pageTitle) == null ) { v = ltn.createVertexWithPageTitle(pageTitle); } else { v = ltn.getVertex(pageTitle); } for (String linkPageTitle : map.getValue()) { Vertex linkV; if ( ltn.getId(linkPageTitle) == null) { linkV = ltn.createVertexWithPageTitle(linkPageTitle); ltn.setPageRank(linkV, (Double)0.0); } else { linkV = ltn.getVertex(linkPageTitle); } ltn.setHasLink(v, linkV); } } GraphMLWriter.outputGraph(graph, out); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (IOException e) { System.err.println("Failed to parse xml"); e.printStackTrace(); } } }