-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_content_phantom.js
82 lines (59 loc) · 1.87 KB
/
get_content_phantom.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
var page = require('webpage').create();
var system = require('system');
if ( system.args.length < 2 ) {
console.log('Usage: ' + system.args[0] + ' url');
phantom.exit();
}
var url = system.args[1];
page.onConsoleMessage = function (msg) {
//console.log(msg);
};
page.open(url, function (status) {
if ( status !== 'success' ) {
console.log('Unable to access network');
} else {
var content = page.evaluate(function() {
var tags = document.body.getElementsByTagName('*');
var notAllowed = ['SCRIPT', 'LINK'];
var content = [];
var titleNodes = document.getElementsByTagName('title');
var title = '';
if ( titleNodes && titleNodes.length > 0 ) {
title = titleNodes[0].text;
}
var metas = document.getElementsByTagName('meta');
var searchedMetas = ['description', 'keywords'];
var pageMetas = {};
for ( var j=0; j<metas.length; j++ ) {
var meta = metas[j];
var name = meta.getAttribute('name');
if ( name && searchedMetas.indexOf(name) !== -1 ) {
pageMetas[name] = meta.getAttribute('content');
}
}
for ( var i=0; i<tags.length; i++ ) {
var tag = tags[i];
if ( (tag.text) && (notAllowed.indexOf(tag.tagName) === -1) ) {
var parents = [];
var parentNode = tag.parentNode || null;
while (parentNode && parentNode.tagName !== 'HTML' && parentNode.tagName !== 'BODY') {
parents.push(parentNode.tagName);
parentNode = parentNode.parentNode || null;
}
content.push({
tag: tag.tagName,
text: tag.text,
parents: parents
});
}
}
return {
title: title,
metas: pageMetas,
content: content
};
});
console.log(JSON.stringify(content));
}
phantom.exit();
});