-
Notifications
You must be signed in to change notification settings - Fork 11
/
index.js
42 lines (37 loc) · 1.03 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
var url = require('url'),
jsdom = require("jsdom/lib/old-api.js"),
Europa = require('node-europa'),
europa = null,
config = {};
// TODO make user-configurable
function toHTML(win) {
var nodes = win.document.querySelectorAll(config.selector);
return Array.prototype.reduce.call(nodes, function (html, node) {
return html + node.innerHTML;
}, '');
}
function toMarkdown(error, win) {
if (error) {
console.error('ERROR: Cannot process HTML' + (config.fromURL || ''));
console.error(error);
} else {
console.log(europa.convert(toHTML(win)));
}
}
function parseDOM(data) {
var src = url.parse(data);
if (src.host && src.protocol) {
config.fromURL = ' from ' + url.format(src);
}
jsdom.env(data, toMarkdown);
}
function scrape(argv, data) {
config.selector = argv.selector || 'body';
europa = new Europa({inline: argv.inline});
if (data) {
parseDOM(data);
} else {
argv._.forEach(parseDOM);
}
}
exports.scrape = scrape;