-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlyrics_scrape.js
55 lines (53 loc) · 1.71 KB
/
lyrics_scrape.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
var artist = process.argv[2] && process.argv[2].replace(/\s+/g, "_");
var track = process.argv[3] && process.argv[3].replace(/\s+/g, "_");
if(!artist || !track) {
console.log("Usage: node lyrics_scrape.js [artist] [track]");
process.exit(1);
}
var url = "http://lyrics.wikia.com/" + artist + ":" + track;
var request = require("request");
var cheerio = require("cheerio");
var start = new Date();
console.log("Fetching lyrics from " + url);
request(url, function(err, response, html) {
if(err) return console.error(err);
var dl = new Date();
console.log("Fetch completed in " + (dl-start) + " ms");
var $ = cheerio.load(html);
$("div.lyricbox > .rtMatcher, div.lyricbox > .lyricsbreak").remove();
$("div.lyricbox > br").replaceWith("\n");
var lyrics = $("div.lyricbox").text();
var done = new Date();
console.log("Lyrics scrape completed in " + (done-start) + " ms");
console.log(lyrics.split("\n"));
process.exit(0);
});
/*
var jsdom = require("jsdom");
var start = new Date();
console.log("Fetching lyrics from " + url);
jsdom.env( url, ["http://code.jquery.com/jquery.min.js"],
function (errors, window) {
var dl = new Date();
console.log("Fetch completed in " + (dl-start) + " ms");
var lyrics = window.$("div.lyricbox").contents()
.filter(function() {
return (this.nodeType == 3 && this.nodeValue.search(/^\s+$/) < 0) || (this.nodeName.toLowerCase() == "br");
})
.map(function() {
if(this.nodeType == 3)
return this.nodeValue;
else
return "";
})
.toArray()
.filter(function(val, i, arr) {
return val !== "" || arr[i+1] === "";
});
console.log(lyrics);
var done = new Date();
console.log("Lyrics scrape completed in " + (done-start) + " ms");
process.exit(0);
}
);
*/