1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.deri.any23.extractor.html;
18
19 import org.deri.any23.extractor.ExtractionResult;
20 import org.deri.any23.extractor.ExtractorDescription;
21 import org.deri.any23.extractor.ExtractorFactory;
22 import org.deri.any23.extractor.SimpleExtractorFactory;
23 import org.deri.any23.extractor.TagSoupExtractionResult;
24 import org.deri.any23.rdf.PopularPrefixes;
25 import org.deri.any23.vocab.VCARD;
26 import org.openrdf.model.BNode;
27 import org.openrdf.model.vocabulary.RDF;
28 import org.w3c.dom.Node;
29
30 import java.util.Arrays;
31
32 import static org.deri.any23.extractor.html.HTMLDocument.TextField;
33
34
35
36
37
38
39
40
41 public class GeoExtractor extends EntityBasedMicroformatExtractor {
42
43 private static final VCARD vVCARD = VCARD.getInstance();
44
45 public static final ExtractorFactory<GeoExtractor> factory =
46 SimpleExtractorFactory.create(
47 "html-mf-geo",
48 PopularPrefixes.createSubset("rdf", "vcard"),
49 Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
50 null,
51 GeoExtractor.class
52 );
53
54 public ExtractorDescription getDescription() {
55 return factory;
56 }
57
58 protected String getBaseClassName() {
59 return "geo";
60 }
61
62 @Override
63 protected void resetExtractor() {
64
65 }
66
67 protected boolean extractEntity(Node node, ExtractionResult out) {
68 if (null == node) return false;
69
70 final HTMLDocument document = new HTMLDocument(node);
71 HTMLDocument.TextField latNode = document.getSingularTextField("latitude" );
72 TextField lonNode = document.getSingularTextField("longitude");
73 String lat = latNode.value();
74 String lon = lonNode.value();
75 if ("".equals(lat) || "".equals(lon)) {
76 String[] both = document.getSingularUrlField("geo").value().split(";");
77 if (both.length != 2) return false;
78 lat = both[0];
79 lon = both[1];
80 }
81 BNode geo = getBlankNodeFor(node);
82 out.writeTriple(geo, RDF.TYPE, vVCARD.Location);
83 final String extractorName = getDescription().getExtractorName();
84 conditionallyAddStringProperty(
85 latNode.source(),
86 geo, vVCARD.latitude , lat
87 );
88 conditionallyAddStringProperty(
89 lonNode.source(),
90 geo, vVCARD.longitude, lon
91 );
92
93 final TagSoupExtractionResult tser = (TagSoupExtractionResult) getCurrentExtractionResult();
94 tser.addResourceRoot( document.getPathToLocalRoot(), geo, this.getClass() );
95
96 return true;
97 }
98
99 }