-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathwwoz.rb
68 lines (53 loc) · 2.19 KB
/
wwoz.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
require 'open-uri'
require 'nokogiri'
require_relative 'config'
class WWOZLivewire
def initialize
@api_keys = CloseEnough::Config::GooglePlaces[:api_keys]
@client = GooglePlaces::Client.new(@api_keys.sample(1).join)
end
def scrape
urls = []
(0..15).each do |x|
urls << "http://www.wwoz.org/new-orleans-community/music-venues?page=" + x.to_s
end
threads = []
#url = urls.first #for testing only
urls.each do |url|
threads << Thread.new(url) do |page_url|
venues_html = Nokogiri::HTML(open(page_url))
venues = venues_html.css('div.view-content div.item-list ul li.views-row span.field-content a')
venues.each do |venue|
#puts venue.text + "\n"
begin
unless Location.find_by_name(venue.text.strip)
vurl = 'http://www.wwoz.org' + venue.attributes['href'].value
venue_details_html = Nokogiri::HTML(open(vurl)).css('div.node')
status = venue_details_html.css('.venue-status').text.downcase.include?("open") ? 'open' : 'closed'
address = venue_details_html.css('.street-address').text + " " + venue_details_html.css('.locality').text + ', ' + venue_details_html.css('.region').text
map_matches = /q=(.*?)\+(.*?)\+/.match (venue_details_html.css('.location.map-link a').select{|x| x.text == "Google Maps" }.first.attributes['href'].value)
lat, long = map_matches[1..2]
#places = @client.spots(lat, long, :name => venue.text )
l = Location.new({
:vicinity => address,
:lat => lat,
:lng => long,
:status => status,
:from_wwoz => 1,
:name => venue.text.strip
})
l.save
puts "'#{l.name}' saved"
else
puts "'#{venue.text}' already in db"
end
sleep rand*2
rescue Exception => e
puts "#{venue.text} failed"
end #end begin ... rescue
end #end venues.each
end #end threads << Thread.do
end #end url.each
threads.each { |t| t.join }
end #end scrape
end