forked from johno/shovel
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathboise_weekly.rb--modified
140 lines (116 loc) · 4.7 KB
/
boise_weekly.rb--modified
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
require 'nokogiri'
require 'mechanize'
require 'open-uri'
require './categories'
require './event'
module Shovel
class BoiseWeekly
@base_url = 'http://www.boiseweekly.com/boise/EventSearch?narrowByDate='
def self.scrape options = {}
events = []
sub_url = case options[:when]
when :today then 'Today'
when :next_week then 'Next%207%20Days'
when :this_weekend then 'This%20Weekend'
else 'Today'
end
url = @base_url + sub_url
url << '&neighborhood=939889'
url << options[:param] if options[:param]
$stderr.puts "Loading " + url + "\n" if options[:verbose]
page = Nokogiri::HTML open url
page.css('.EventListing').each do |listing|
event_params = {}
listing.css('h3').css('a').each do |event_href|
next unless event_href['href'].include? 'Event?oid='
event_params[:title] = event_href.text.gsub(/\s+/, ' ').strip
event_params[:description] = strip_description listing
event_params[:category] = strip_category listing
event_params[:address] = strip_address listing
event_params[:venue] = strip_venue listing
#event_params[:phone] = strip_phone listing
event_params[:bw_id] = strip_oid event_href
event_params[:cost] = strip_cost listing
event_params[:date] = strip_date listing
end
unless event_params.keys.empty?
events << Event.new(event_params)
#unless event_params.empty? #.blank?
# events.push( Event.new event_params )
end
end
events
end
def self.strip_oid event_href
return if event_href.nil?
event_href['href'].split('?oid=')[1].gsub(/\s+/, ' ').strip
end
def self.strip_date listing
return if listing.nil?
listing = listing.clone()
# Here, since the date isn't actually in any type of container,
# we must grab all the other textual information from the parent
# div and remove it, thus leaving us with our desired date string.
listing.css('h3').remove
listing.css('.eventCategories').remove
listing.css('.descripText').remove
#header_stuff_we_dont_want = listing.css('h3').text
#category_stuff_we_dont_want = listing.css('.eventCategories').text
#description_stuff_we_dont_want = listing.css('.descripText').text
#phone_stuff_we_dont_want = /[()0-9. \-]{7,}/
all_stuff = listing.text
all_stuff.strip! # this has to be before any calls to slice
#all_stuff.slice! header_stuff_we_dont_want
#all_stuff.slice! category_stuff_we_dont_want
#all_stuff.slice! description_stuff_we_dont_want
#all_stuff.slice! phone_stuff_we_dont_want
all_stuff.slice! /[()0-9.\s\-]{7,}.*/m
all_stuff
end
def self.strip_venue listing
return if listing.nil? or listing.search('.locationLabel').empty?
listing.search('.locationLabel').search.text
end
def self.strip_category listing
return if listing.nil?
category = listing.css('.eventCategories').css('a').text
Categories::BoiseWeekly.parse_from_string category
end
def self.strip_address listing
return if listing.nil?
address = listing.search('.descripTxt').first
address.search('.//span').remove
address.text.split(' ').first << " Idaho"
return address
# print address = listing.css('.listingLocation').text.split(')')
#print address.empty?
# exit
#print "fuck"
#exit
#unless address.empty?
address = listing.css('.listingLocation').text.split(')').second
.gsub(/\s+/, ' ').gsub(listing.css('.listingLocation').css('.locationRegion').text, '')
.strip
address = address.split(' ')
address.pop # Remove phone number.
address = address.join(' ') << " Boise, Idaho"
end
def self.strip_phone listing
return if listing.nil?
listing.css('.listingLocation').text.split(')').second
.gsub(/\s+/, ' ').gsub(listing.css('.listingLocation').css('.locationRegion').text, '')
.strip.split(' ').pop
end
def self.strip_description listing
return if listing.nil?
text = listing.css('.descripTxt')[1].text.gsub(/\s+/, ' ').strip.split(' ')
text.pop # Remove price.
text.join(' ')
end
def self.strip_cost listing
return if listing.nil?
# Get the last word in the description
listing.css('.descripTxt')[1].text.gsub(/\s+/, ' ').strip.split(' ').pop.downcase
end
end
end