-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathReadRelativeHrefs.js
43 lines (35 loc) · 1.38 KB
/
ReadRelativeHrefs.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
var readRelativeHrefs = function (doc) {
if (doc !== null &&
doc.getId() !== null &&
doc.getFirstFieldValue("body") !== null) {
var ArrayList = java.util.ArrayList;
var docs = new ArrayList();
var e = java.lang.Exception;
var String = java.lang.String;
var Jsoup = org.jsoup.Jsoup;
var jdoc = org.jsoup.nodes.Document;
var Parser = org.jsoup.parser.Parser;
var element = org.jsoup.Element;
var hrefs = org.jsoup.select.Elements;
var PipelineDocument = com.lucidworks.apollo.common.pipeline.PipelineDocument;
docs.add(doc);
try{
var baseUrl = new String("http://cprassoc.com/lucidworks/wabion");
var html = doc.getFirstFieldValue("body");
var jdoc = Jsoup.parse(html);
var hrefs = jdoc.select("a[href]");
for each(element in hrefs) {
logger.info("Parsed URL: " + element.ownText());
var newId = baseUrl + element.attr("abs:href");
var pipelineDoc = new PipelineDocument();
pipelineDoc.addField("id", newId);
docs.add(pipelineDoc);
}
}catch(e){
logger.error(e);
}
return docs;
} else {
return doc;
}
}