2
|
1 package pagerank;
|
|
2
|
|
3
|
|
4 import java.io.File;
|
|
5 import java.io.FileInputStream;
|
|
6 import java.io.FileNotFoundException;
|
|
7 import java.io.FileOutputStream;
|
|
8 import java.io.IOException;
|
|
9 import java.util.HashMap;
|
|
10 import java.util.HashSet;
|
|
11 import java.util.Map;
|
|
12
|
|
13 import javax.xml.parsers.ParserConfigurationException;
|
|
14 import javax.xml.parsers.SAXParser;
|
|
15 import javax.xml.parsers.SAXParserFactory;
|
|
16
|
|
17 import org.xml.sax.SAXException;
|
|
18
|
|
19 import com.tinkerpop.blueprints.Graph;
|
|
20 import com.tinkerpop.blueprints.Vertex;
|
|
21 import com.tinkerpop.blueprints.impls.tg.TinkerGraph;
|
|
22 import com.tinkerpop.blueprints.util.io.graphml.GraphMLWriter;
|
|
23
|
|
24 import xmlParser.TextTagParser;
|
|
25
|
|
26 public class LinkConvertGraph {
|
|
27
|
|
28 private String filename;
|
|
29 private FileInputStream fis;
|
|
30 private SAXParserFactory factory;
|
|
31 private SAXParser parser;
|
|
32 private TextTagParser xmlParser;
|
|
33
|
|
34 private HashMap<String,HashSet<String>> hash;
|
|
35
|
|
36
|
|
37 LinkConvertGraph() throws ParserConfigurationException, SAXException {
|
|
38 xmlParser = new TextTagParser();
|
|
39 factory = SAXParserFactory.newInstance();
|
|
40 parser = factory.newSAXParser();
|
|
41 }
|
|
42
|
|
43 LinkConvertGraph(final String filename) throws FileNotFoundException, ParserConfigurationException, SAXException {
|
|
44 this.filename = filename;
|
|
45 fis = new FileInputStream(filename);
|
|
46 xmlParser = new TextTagParser();
|
|
47 factory = SAXParserFactory.newInstance();
|
|
48 parser = factory.newSAXParser();
|
|
49 }
|
|
50
|
|
51 public void setFilename(final String filename) throws FileNotFoundException {
|
|
52 this.filename = filename;
|
|
53 this.fis = new FileInputStream(filename);
|
|
54 }
|
|
55
|
|
56 private void parseXml() throws SAXException, IOException {
|
|
57 parser.parse(this.fis, this.xmlParser);
|
|
58 hash = xmlParser.getHash();
|
|
59 }
|
|
60
|
|
61 private HashMap<String,HashSet<String>> getHash() {
|
|
62 return hash;
|
|
63 }
|
|
64
|
|
65 public void printHash() {
|
|
66 for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) {
|
|
67 String title = entry.getKey();
|
|
68 System.out.println("title: " + title);
|
|
69 for (String link : entry.getValue()) {
|
|
70 System.out.println("\t"+link);
|
|
71 }
|
|
72 System.out.println();
|
|
73 }
|
|
74 }
|
|
75
|
|
76 private void printHash(FileOutputStream os) throws IOException {
|
|
77 for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) {
|
|
78 String title = entry.getKey();
|
|
79 os.write( ("title: " + title + "\n").getBytes());
|
|
80 for (String link : entry.getValue()) {
|
|
81 os.write( ("\t"+link+"\n").getBytes());
|
|
82 }
|
|
83 os.write( ("\n").getBytes());
|
|
84 os.flush();
|
|
85 }
|
|
86 }
|
|
87
|
|
88
|
|
89
|
|
90
|
|
91
|
|
92 public static void main(String[] args) {
|
3
|
93 final String filename = "./resources/article.xml";
|
|
94 // final String filename = "/Users/aotokage/testProgram/wiki/ja-pages_current.xml";
|
2
|
95
|
|
96 LinkConvertGraph lcg;
|
|
97
|
|
98 try {
|
|
99
|
|
100 lcg = new LinkConvertGraph(filename);
|
|
101
|
|
102 lcg.parseXml();
|
|
103 // lcg.printHash();
|
|
104
|
3
|
105 FileOutputStream fos = new FileOutputStream("./resources/wikiLink.log");
|
2
|
106 lcg.printHash(fos);
|
|
107
|
|
108 HashMap<String,HashSet<String>> hash = lcg.getHash();
|
|
109
|
|
110
|
3
|
111 final String filenameD = "./resources/tinkerpopDB";
|
2
|
112
|
|
113 Graph graph = new TinkerGraph();
|
|
114 FileOutputStream out = new FileOutputStream(new File(filename));
|
|
115 LinkToVertex ltn = new LinkToVertex(graph);
|
|
116
|
|
117 for (Map.Entry<String, HashSet<String>> map : hash.entrySet()) {
|
|
118 String pageTitle = map.getKey();
|
|
119
|
|
120 Vertex v;// = graph.addVertex(null);
|
|
121
|
|
122 if ( ltn.getId(pageTitle) == null ) {
|
|
123 v = ltn.createVertexWithPageTitle(pageTitle);
|
|
124
|
|
125 } else {
|
|
126 v = ltn.getVertex(pageTitle);
|
|
127 }
|
|
128
|
|
129 for (String linkPageTitle : map.getValue()) {
|
|
130 Vertex linkV;
|
|
131 if ( ltn.getId(linkPageTitle) == null) {
|
|
132 linkV = ltn.createVertexWithPageTitle(linkPageTitle);
|
|
133 ltn.setPageRank(linkV, (Double)0.0);
|
|
134 } else {
|
|
135 linkV = ltn.getVertex(linkPageTitle);
|
|
136 }
|
|
137 ltn.setHasLink(v, linkV);
|
|
138 }
|
|
139
|
|
140 }
|
|
141
|
|
142 GraphMLWriter.outputGraph(graph, out);
|
|
143
|
|
144 } catch (FileNotFoundException e) {
|
|
145 e.printStackTrace();
|
|
146 } catch (SAXException e) {
|
|
147 e.printStackTrace();
|
|
148 } catch (ParserConfigurationException e) {
|
|
149 e.printStackTrace();
|
|
150 } catch (IOException e) {
|
|
151 System.err.println("Failed to parse xml");
|
|
152 e.printStackTrace();
|
|
153 }
|
|
154
|
|
155
|
|
156
|
|
157 }
|
|
158 }
|