-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDATAGENERATOR.py
189 lines (153 loc) · 8.09 KB
/
DATAGENERATOR.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
"""### WHAT DOES THIS FILE DO? ####
1. Pulls ~approximate~ city coordinates from "NA_Cities.csv"
2. Identifies a current weather station in each of these cities & pulls relevant data from these stations (only current temp used right now)
3. Determines if that city currently has daylight or not
4. Colourmaps all weather stations current temp relative to one another, and sets daylight as 1/0 (T/F) for each weather station.
If 'debugging=1' this will export a CSV and .pkl file to explore the data.
5.
"""
def GENERATEDATA(debugging=0,times=[0,0]):
"""
Debugging (optional) - 1: True - will go through some data vis steps to help understand the process
0: False - will be faster.
Times (optional) - [datetime.now(),datetime.now(timezone.utc)] - this can be used to generate data for a specific time in history. If no input, data will be generated for NOW.
"""
# Import Meteostat library and dependencies
from datetime import datetime, timedelta, date, timezone
from pytz import timezone as ptztz
import pandas as pd
import numpy as np
from meteostat import Stations, Hourly
import os
from suntime import Sun, SunTimeException
import matplotlib as mpl
#preamble
# debugging=0
Hourly.cache_dir=r'.'
WS=pd.DataFrame()
stations = Stations()
if times == [0,0]:
# this means no inputs are provided
ctime_local=datetime.now()
cUTCtime=datetime.now(ptztz('America/Chicago'))
else:
#this means the times variable is used to provide current times
ctime_local=times[0]
cUTCtime=times[1]
## ---------------------- GET LATLONG FROM CSV FILE --------------------------------
df = pd.read_csv(os.path.join(os.getcwd(),'NA_cities.csv'))
Nrows=df.shape[0]
if debugging:
print(df.head())
print(df.LONG)
print(df.loc[1,'LONG'])
## --------------------- OPEN LOOP TO GRAB RELEVANT DATA FROM EACH OF THESE LOCATIONS ------------------------------
# init empty lists to append data to
OIndex=df.ORDEREDINDEX.tolist()
ICAO=[]
LATS=[]
LONGS=[]
CTEMP=[]
SNOW=[]
COUNTRY=[]
REGION=[]
NAME=[]
SUNRISE=[]
SUNSET=[]
#this is a quick and dirty way to filter away the stations that arent being used, narrowing the list for the next step.
AcceptableCutoffDate=ctime_local.date()-timedelta(days=10)
for i in range(0,Nrows):
## ---------------------- GET WEATHER STATION CLOSEST TO THESE POINTS (FROM CSV) -----------------
loop=1
stations = stations.nearby(df.loc[i,'LAT'],df.loc[i,'LONG'])
FilteringNewList=False
if FilteringNewList:
#this is good for filtering away bad stations (old ones that arent reporting anymore) from an arbitrary list of coordinates.
# current coordinates in the cities are based on weather station coordinates, NOT city coords, so this isnt needed every time. Causes issues sometimes at start of months/years.
stations = stations.inventory('hourly',datetime.replace(ctime_local,hour=0,minute=0,second=0,microsecond=0)) #inventory by what stations had reported hourly data as of hour 0 today.
station = stations.fetch(loop)
if debugging:
print('station:',station)
print(f"loop number: {loop}")
print("Looking for station at coordinates: (lat,long)",df.loc[i,'LAT'],df.loc[i,'LONG'])
print("at time:",datetime.replace(ctime_local,hour=0,minute=0,second=0,microsecond=0))
while station.empty:
print("empty df:\n",station.head)
loop+=1
station=stations.fetch(loop)
if debugging:
print(f'Station empty loop init, loop number: {loop}')
if debugging:
print(station)
varr=station['hourly_end'].iloc[0].date()
while station['hourly_end'].iloc[-1].date() < (AcceptableCutoffDate):
station=stations.fetch(loop)
varr=station['hourly_end'].iloc[-1].date()
loop+=1
## ---------------------- GET WEATHER STATION DATA IN DICT & ADD TO DF -----------------
data = Hourly(station,ctime_local-timedelta(hours=1),ctime_local)
data=data.fetch()
if debugging:
print(data)
ICAO.append(station.icao[0])
COUNTRY.append(station.country[0])
REGION.append(station.region[0])
NAME.append(station.name[0])
LATS.append(station.latitude[0])
LONGS.append(station.longitude[0])
CTEMP.append(data['temp'].iloc[0])
SNOW.append(data['snow'].iloc[0])
## -------------------- GET DAYLIGHT OR NOT AT EACH STATION -----------------------------
sun=Sun(station.latitude[0],station.longitude[0])
# try/except blocks for local sunrise/sunset times so we avoid the raised exceptions for (northern in this DS) places
# which do not have sunrise/sunset every day all year 'round.
try:
SR=sun.get_local_sunrise_time(at_date=ctime_local,time_zone=ptztz('America/Chicago'))
except:
# deals with sunrise exception. Set SR to be hour 1 on Jan01 2024. This way current time can never be SR<=ctime<=SS.
SR=datetime(2024,1,1,1,1,1,tzinfo=ptztz('America/Chicago'))
try:
SS=sun.get_local_sunset_time(at_date=ctime_local,time_zone=ptztz('America/Chicago'))
## Dealing with known issue of returning sunset time in previous day --- https://github.com/SatAgro/suntime/issues/13
## sol'n from https://stackoverflow.com/questions/72087825/issue-with-python-sun-get-sunset-time-routine
if SR>SS:
SS=sun.get_local_sunset_time(at_date=ctime_local+timedelta(days=1),time_zone=ptztz('America/Chicago'))
except:
# deals with sunrise exception. Set SS to be hour 0 on Jan01 2024. This way current time can never be SR<=ctime<=SS.
SS=datetime(2024,1,1,0,0,0,tzinfo=ptztz('America/Chicago'))
SUNRISE.append(SR)
SUNSET.append(SS)
if debugging:
print('current lists')
print(ICAO,LATS,LONGS,CTEMP,SNOW)
## -------------------------------------- COLORMAPPING HERE ------------------------------------------------
if mpl.__version__=='3.9.2': # for local dev on pc
cm=mpl.colormaps['jet']
norm=mpl.colors.Normalize(min(CTEMP),max(CTEMP))
colors=cm(norm(CTEMP))
if mpl.__version__=='3.3.4': #for the rpi running mpl 3.3.4
from matplotlib import cm
cm=cm.get_cmap('jet')
norm=mpl.colors.Normalize(min(CTEMP),max(CTEMP))
colors=cm(norm(CTEMP))
## -------------------------------------- ADD DATA TO DF ---------------------------------------------------
add2DF={'ORDEREDINDEX':OIndex,'ICAO':ICAO,'Name':NAME,'Country':COUNTRY,'Region':REGION,'Latitude':LATS,'Longitude':LONGS,'Ctemp':CTEMP,'Snow':SNOW,'RGBA':colors.tolist(),'Sunrise':SUNRISE,'Sunset':SUNSET}
keepers=pd.DataFrame(add2DF)
if debugging:
keepers.to_csv(r'./cDATA.csv')
#saves data to look at in csv format so its easily read.
keepers.to_pickle(r'./cDATA.pkl') #always export the pkl file.
if debugging: #just for showing data on local machine
import matplotlib.pyplot as plt
import matplotlib as mpl
## -------------------------------------- VISUALIZE DATA LOCALLY ---------------------------------------------------
plt.scatter(keepers['Longitude'],keepers['Latitude'],c=keepers.RGBA)
plt.ylabel('LATITUDE')
plt.xlabel('LONGITUDE')
plt.title('North America Data Points - point density checker')
#Add coordinate annotations on plot:
for L in range(0,keepers.shape[0]):
plt.annotate(f"({keepers['Longitude'][L]},{keepers['Latitude'][L]})",[keepers['Longitude'][L],keepers['Latitude'][L]],fontsize=2)
plt.show()
if __name__ == '__main__':
GENERATEDATA(debugging=1)