-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathHelper_fx.R
179 lines (148 loc) · 8.08 KB
/
Helper_fx.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# Helper functions for bike_counter_get
library(jsonlite)
# Define station characteristics. Will define these for each station
station_meta = data.frame(
url = 'https://data.cambridgema.gov/resource/gxzm-dpwp.csv',
name = 'Broadway',
id_name = NA, # To be used when there is a station ID inside the data
city_state = 'Cambridge, MA',
lat = 42.363464,
lon = -71.086202,
Total = 'total',
dir1 = 'exits',
dir2 = 'entries',
dir1name = 'Eastbound',
dir2name = 'Westbound',
tz = 'America/New_York')
# Get historical weather for a station from DarkSky. Must have a DarkSky API key. Assumes Bike_counter_get.R has already been run and 'api_get' and 'daily' are in working memory
get_historical_wx <- function(limit_1000 = F, chunk = c(NA, 'iter_date_range2')){
# Get key.
if(!exists('key')) key = scan('../keys/DarkSky_Key.txt', what = 'character')
# Get lat and long from station_meta
ll = station_meta %>% filter(url == api_get) %>% select(lat, lon)
tz = station_meta %>% filter(url == api_get) %>% select(tz)
# Find date range to look obtain
hist_date_range <- range(daily$date)
iter_date_range = seq(hist_date_range[1], hist_date_range[2], by = 1)
# If limiting to 1000 queries, get the appropriate 1000 query chunk
if(limit_1000){
# Make this more flexible at some point, works for now
if(length(iter_date_range) > 1000 & length(iter_date_range) <= 2000) {
iter_date_range2 = iter_date_range[1001:length(iter_date_range)]
iter_date_range = iter_date_range[1:1000]
}
if(!is.na(chunk)){
iter_date_range = get(chunk)
}
}
hist_wx <- vector()
for(i in 1:length(iter_date_range)){ # i = 1
query_time = format(
as.numeric(
as.POSIXct(as.character(iter_date_range[i]), format = '%F', tz = as.character(tz$tz))
),
scientific = F)
query = paste('https://api.darksky.net/forecast',
key,
paste(as.numeric(ll[1]), as.numeric(ll[2]), query_time, sep = ','),
sep = '/')
hist_response <- fromJSON(query)
# Add any additional columns if new ones found in this iteration.
# Since hist_response can have different columns, just use full_join with no messages rather than specifying columns with 'by'.
if(i == 1) {
hist_wx = hist_response$hourly$data
} else {
hist_wx <- suppressMessages( full_join(hist_wx, hist_response$hourly$data) )
}
if(i %% 100 == 0) cat(as.character(iter_date_range[i]), " . ")
}
# Format datetime
hist_wx <- hist_wx %>%
mutate(datetime = as.POSIXct(time, origin = '1970-01-01', tz = as.character(tz$tz)),
date = format(datetime, '%F'),
year = format(datetime, '%Y'),
month = format(datetime, '%m'),
day = format(datetime, '%A'),
hour = format(datetime, '%H'))
# Prepare variable types and fill NA with zero for specific columns
hist_wx <- hist_wx %>%
mutate(date = as.Date(date),
hour = as.numeric(hour),
day = as.character(day)) %>%
tidyr::replace_na(list(precipIntensity = 0,
precipProbability = 0,
precipAccumulation = 0,
precipType = 'None'))
hist_wx
}
# Get current and forecast weather
get_curr_forecast_wx <- function(){
# Get key.
if(!exists('key')) key = scan('../keys/DarkSky_Key.txt', what = 'character')
# Get lat and long from station_meta
ll = station_meta %>% filter(url == api_get) %>% dplyr::select(lat, lon)
tz = station_meta %>% filter(url == api_get) %>% dplyr::select(tz)
# Find date range to obtain: Start from last day in hist_wx (or today, whichever is minimum, and go to tomorrow
last_day_in_hist = max(as.Date(hist_wx$date))
start_day = min(Sys.Date(), last_day_in_hist)
get_date_range <- c(start_day, Sys.Date() + 1)
if(length(get_date_range) > 2) {
iter_date_range = seq(get_date_range[1], get_date_range[2], by = 1)
} else { iter_date_range = get_date_range }
curr_wx <- vector()
for(i in 1:length(iter_date_range)){ # i = 1
query_time = format(
as.numeric(
as.POSIXct(as.character(iter_date_range[i]), format = '%F', tz = as.character(tz$tz))
),
scientific = F)
query = paste('https://api.darksky.net/forecast',
key,
paste(as.numeric(ll[1]), as.numeric(ll[2]), query_time, sep = ','),
sep = '/')
curr_response <- fromJSON(query)
# Add any additional columns if new ones found in this iteration.
# Since hist_response can have different columns, just use full_join with no messages rather than specifying columns with 'by'.
if(i == 1) {
curr_wx = curr_response$hourly$data
} else {
curr_wx <- suppressMessages( full_join(curr_wx, curr_response$hourly$data) )
}
}
# Format datetime
curr_wx <- curr_wx %>%
mutate(datetime = as.POSIXct(time, origin = '1970-01-01', tz = as.character(tz$tz)),
date = format(datetime, '%F'),
year = format(datetime, '%Y'),
month = format(datetime, '%m'),
day = format(datetime, '%A'),
hour = format(datetime, '%H'))
# Prepare variable types and fill NA with zero for specific columns
curr_wx <- curr_wx %>%
mutate(date = as.Date(date),
hour = as.numeric(hour),
day = as.character(day)) %>%
tidyr::replace_na(list(precipIntensity = 0,
precipProbability = 0,
precipType = 'None'))
curr_wx
}
# as.POSIXct(1555902000, origin = '1970-01-01', tz = 'America/New_York')
# Detect gaps, interpolate values for missing data if the gap is small
# Aim to run on hist_count in initial data collection or on new_count when incrementing
# also... there are duplicates! need to filter out for double-counted data.
# Ok, for now, not filling gaps, just deleting duplicate rows. Consider imputation later.
NOTRUN = T
if(!NOTRUN){
# fill_gaps <- function(count_dat = hist_count){
count_dat <- count_dat[order(count_dat$datetime),]
# detect any gaps, e.g. count_dat %>% filter(date > '2017-07-29' & date < '2017-08-02')
dt <- diff(count_dat$date)
dt_gaps <- which(as.numeric(dt, 'days') > 1.05)
count_dat[dt_gaps,]
count_dat[73594:(73594+70),]
as.numeric(diff(count_dat[37171:(37171+24),'date']), 'days')
}
# Not a function, but a snippet to make an animated 'Fork Me' badge in the dashboard
# From https://codepen.io/Rplus/pen/wKZOBo
octocat_badge = '<a href="https://github.com/flynn-d/BikeCount" class="github-corner" aria-label="View source on GitHub"><svg width="80" height="80" viewBox="0 0 250 250" style="fill:#151513; color:#fff; position: absolute; top: 0; border: 0; right: 0;" aria-hidden="true"><path d="M0,0 L115,115 L130,115 L142,142 L250,250 L250,0 Z"></path><path d="M128.3,109.0 C113.8,99.7 119.0,89.6 119.0,89.6 C122.0,82.7 120.5,78.6 120.5,78.6 C119.2,72.0 123.4,76.3 123.4,76.3 C127.3,80.9 125.5,87.3 125.5,87.3 C122.9,97.6 130.6,101.9 134.4,103.2" fill="currentColor" style="transform-origin: 130px 106px;" class="octo-arm"></path><path d="M115.0,115.0 C114.9,115.1 118.7,116.5 119.8,115.4 L133.7,101.6 C136.9,99.2 139.9,98.4 142.2,98.6 C133.8,88.0 127.5,74.4 143.8,58.0 C148.5,53.4 154.0,51.2 159.7,51.0 C160.3,49.4 163.2,43.6 171.4,40.1 C171.4,40.1 176.1,42.5 178.8,56.2 C183.1,58.6 187.2,61.8 190.9,65.4 C194.5,69.0 197.7,73.2 200.1,77.6 C213.8,80.2 216.3,84.9 216.3,84.9 C212.7,93.1 206.9,96.0 205.4,96.6 C205.1,102.4 203.0,107.8 198.3,112.5 C181.9,128.9 168.3,122.5 157.7,114.1 C157.9,116.9 156.7,120.9 152.7,124.9 L141.0,136.5 C139.8,137.7 141.6,141.9 141.8,141.8 Z" fill="currentColor" class="octo-body"></path></svg></a><style>.github-corner:hover .octo-arm{animation:octocat-wave 560ms ease-in-out}@keyframes octocat-wave{0%,100%{transform:rotate(0)}20%,60%{transform:rotate(-25deg)}40%,80%{transform:rotate(10deg)}}@media (max-width:500px){.github-corner:hover .octo-arm{animation:none}.github-corner .octo-arm{animation:octocat-wave 560ms ease-in-out}}</style>'