This repository has been archived by the owner on Jun 2, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathmeteostats.py
588 lines (484 loc) · 23 KB
/
meteostats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
"""
Weather statistics module.
Descriptive statistics of annual and monthly weather data.
!!! warning "Required installation"
* [Anaconda](https://www.continuum.io/downloads) for `scipy` for statistics
# Author - Christopher Teh Boon Sung
------------------------------------
"""
import math
from collections import OrderedDict
from scipy.stats import exponweib
from scipy.stats import gamma
from scipy.stats import rv_continuous
from simweather import SimWeather
class MeteoStats(object):
"""
Descriptive statistics of annual and monthly weather data.
Calculate several basic descriptive statistics on the weather parameters:
```text
count
total
minimum
maximum
average
amplitude
standard deviation
cv (coeff. of variation)
```
The rain parameter has additional statistics:
```text
no. of wet and dry days
```
and the probability of having:
```text
two consecutive dry days P(D|D),
two consecutive wet days P(W|W),
a dry day, then a wet day P(W|D),
a wet day, then a dry day P(D|W),
a dry day P(D)
a wet day P(W)
```
This class also fits a gamma probabilty distribution function to the rainfall data, and
returns the shape and scale of the rainfall distribution. Likewise, this class also fits
the Weibull probability distribution function to the windspeed data, and returns the
Weibull shape and scale factors for the windspeed distribution.
# ATTRIBUTES
metdata (list): Annual weather data (list of `OrderedDict` objects)
statdict (list): 3-D dictionary that hold the results of the statistical values
(list of `OrderedDict` objects).
# Example
```python
statdict[month][param][stat] = value
```
thus,
```python
statdict[2]['wind']['avg']
```
returns the average (`avg`) wind speed (`wind`) for `month` = 2.
Use the `collect()` method to retrieve all statistical values for a given
condition. See the `collect()` method docmentation on how to use it.
# METHODS
slice: Extract a given month's weather data
collect: Create a list that contain all statistical values for a given month,
parameter, or statistic
output_stats: Print the weather data and their descriptive statistics to file
(can append data and descriptive statistics to file).
annual_weather: Return the daily weather for the entire current year
"""
def __init__(self, met):
"""
Create and initialize the MeteoStats object.
# Arguments
met (Meteo): the Meteo object
"""
self.__met = met # the Meteo object
self.metdata = None # list of annual weather data for 1 or more years
self.statdict = None # statistical analysis of the annual weather (list)
@staticmethod
def slice(month):
"""
Return the start and end positions in the weather data list for a given month.
# Arguments
month (int): 0 to 11 for Jan to Dec, and 12 for whole year
# Returns
tuple: start and end positions (`int`)
"""
s = 0 if (month == 0 or month == 12) else SimWeather.cumulative_days[month - 1]
e = 365 if month == 12 else SimWeather.cumulative_days[month]
return s, e
def _basic_stats(self, field, start_slice, end_slice, exclude_0=False):
"""
Calculate weather statistics.
The following statistics are determined
```text
count, sum, min., max., mean, amplitude, standard deviation, and CV
```
# Arguments
field (str): name of weather parameter
start_slice (int): start index of weather data (starting day of month)
end_slice (int): end index of weather data (ending day of month)
exclude_0 (bool): `True` to remove 0 from weather data (used for rain statistics),
else `False` to extract all values including 0
# Returns
OrderedDict: weather statistics of given month
"""
lst = self.metdata[-1][field][start_slice:end_slice]
# filter out zeroes if instructed
# note: the list can be empty or contain only 1 value after filtering (for rain data)
ar = [val for val in lst if val > 0] if exclude_0 else lst
n = len(ar)
# need to check if sufficient data: -999 code means not enough data
total = sum(ar) if n > 0 else -999 # sum, mean, and range need at least 1 value
avg = total / n if n > 0 else -999
ssd = [(val - 1) ** 2 for val in ar] if n > 1 else -999 # variance needs at least 2 values
stdev = math.sqrt(sum(ssd) / (n - 1)) if n > 1 else -999
cv = stdev / avg if n > 1 else -999
mn = min(ar) if n > 0 else -999
mx = max(ar) if n > 0 else -999
amp = mx - avg if n > 0 else -999
return OrderedDict([('count', n), ('total', total), ('min', mn), ('max', mx),
('avg', avg), ('amp', amp), ('sd', stdev), ('cv', cv)])
def _rain_probs(self, start_slice, end_slice):
"""
Rain probability statistics.
Calculate the no. of wet and dry days, the probability of two consecutive
dry days (PDD), two consecutive wet days (PWW), a dry day, then a wet day (PWD),
a wet day, then a dry day (PDW), and the probability of a dry day (PD) and a
wet day (PW).
# Arguments
start_slice (int): start index of weather data (starting day of month)
end_slice (int): end index of weather data (ending day of month)
# Returns
OrderedDict: rain statistics
"""
lst = self.metdata[-1]['rain'][start_slice:end_slice]
nwd, nww, nd, nw = 0, 0, 0, 0 # no. of wet-dry, wet-wet, dry, and wet days
numdays = len(lst) # number of days in a year (365)
nday = 0
# count number of dry and wet days; checks two consecutive days per loop cycle
while nday + 1 < numdays:
# a rain day if rainfall > 0.0
rain1 = lst[nday] > 0.0 # any rain on day (t)?
rain2 = lst[nday + 1] > 0.0 # any rain on the next day (t+1)?
if not rain1 and rain2: # a dry day, followed by a wet day
nwd += 1
if rain1 and rain2: # a wet day, followed by another wet day
nww += 1
if not rain1: # count the number of dry days
nd += 1
# need to check the rain status on the last day of the year
# because this loop checks every two consecutive days (not every day)
if (nday + 1) == (numdays - 1) and not rain2:
nd += 1 # the last day of the year is a dry day
nday += 1
# finished counting the number of dry and wet days, so now calculate the probabilities:
nw = numdays - nd
pwd = nwd / nd # P(W|D) - dry then wet day
pww = nww / nw # P(W|W) - both wet days
pdd = 1 - pwd # P(D|D) - both dry days
pdw = 1 - pww # P(D|W) - wet then dry day
pw = nw / numdays # P(W) - a wet day
pd = nd / numdays # P(D) - a dry day
return OrderedDict([('wetdays', nw), ('drydays', nd), ('p(w|d)', pwd),
('p(d|w)', pdw), ('p(w|w)', pww), ('p(d|d)', pdd),
('p(w)', pw), ('p(d)', pd)])
def _fit_gamma(self, *args):
"""
Fit a gamma probability distribution to the rainfall data.
Estimate the shape and scale parameters for a two-parameter gamma distribution.
!!! note
The location gamma parameter is set to zero.
# Arguments
args (list): start and end of list (to obtain monthly values, for example)
(default is empty to fit the whole wind speed data)
# Returns
OrderedDict: shape and scale of the gamma function
"""
raindata = self.metdata[-1]['rain']
x = raindata if len(args) == 0 else raindata[args[0]:args[1]]
x = [v for v in x if v > 0.0] # fit only to non-zero values (rainfall days)
# can only fit a gamma function with 2 or more values:
if len(x) > 1:
shape, scale = rv_continuous.fit(gamma, x, floc=0)[::2] # location set to zero
else:
shape, scale = (-999, -999) # error codes
return OrderedDict([('gamma_shape', shape), ('gamma_scale', scale)])
def _fit_weibull(self, *args):
"""
Fit a Weibull probability distribution to the wind speed data.
Estimate the shape and scale parameters for a two-parameter Weibull distribution.
# Arguments
args (list): start and end of list (to obtain monthly values, for example)
(default is empty to fit the whole wind speed data)
# Returns
OrderedDict: shape and scale of the Weibull function
"""
winddata = self.metdata[-1]['wind']
x = winddata if len(args) == 0 else winddata[args[0]:args[1]]
shape, scale = rv_continuous.fit(exponweib, x, fa=1, floc=0)[1::2]
return OrderedDict([('weibull_shape', shape), ('weibull_scale', scale)])
def _collect_months(self, find_param, find_stat):
"""
Return all the monthly values for a given parameter and statistic.
# Arguments
find_param (str): name of weather parameter
find_stat (str): name of weather statistic
# Returns
list: monthly values for a given parameter and statistic
"""
lst = []
for params in self.statdict[-1].values():
for param, stats in params.items():
for stat, val in stats.items():
if param == find_param and stat == find_stat:
lst.append(val)
return lst
def _collect_stats(self, find_month, find_param):
"""
Return all the statistical values for a given month and parameter.
# Arguments
find_month (int): month number
find_param (str): name of weather parameter
# Returns
list: statistical values for a given month and parameter
"""
lst = []
for month, params in self.statdict[-1].items():
for param, stats in params.items():
if month == find_month and param == find_param:
for val in stats.values():
lst.append(val)
return lst
def _collect_params(self, find_month, find_stat):
"""
Return all the parameter values for a given month and statistic.
# Arguments
find_month (int): month number
find_stat (str): name of statistic
# Returns
list: statistical values for a given month and statistic
"""
lst = []
for month, params in self.statdict[-1].items():
for stats in params.values():
for stat, val in stats.items():
if month == find_month and stat == find_stat:
lst.append(val)
return lst
def collect(self, find_month=None, find_param=None, find_stat=None):
"""
Collect all values for a given/specified condition.
# Arguments
find_month (int/None): month number
find_param (str/None): name of the weather parameter
find_stat (str/None): name of the statistic
!!! note
The `month` list is an zero-based index, so this means
`month`=0 is Jan, `month`=1 is Feb.,..., `month`=11 is Dec, and
`month`=12 is for the whole year.
# Returns
list: a list of values, depending on the following example
# Example
```text
Specified Returns (as a list)
------------------------------------------------------------------------------
parameter and statistic all monthly values
e.g.,
find_param = 'tmin' returns every month's average (find_stat='avg')
find_stat = 'avg' min. air temperature (find_param='tmin')
month and parameter all statistical values
e.g.,
find_month = 1 returns all the statistical values for max.
find_param = 'tmax' air temperature (find_param='tmax') for Feb.
(note: zero-based index, so, find_month=1 is Feb.).
Statistics related to max. air temperature are:
count, total, min., max., avg, amp, and cv (those
returned by function _basic_stats), but for
find_param='rain', the statistics related to rain
are those returned by functions _basic_stats and
_rain_stats (i.e., rain has more statistics than
max. air temperature).
month and stat all parameter values
e.g.,
find_month = 3 returns the minimum (find_stat='min') values for
find_stat = 'min' all of April's (find_month=3) parameters,
such as: 'tmin', 'tmax', 'wind', and 'rain'.
```
# Raises
ValueError: raised when the no. of arguments passed into this function is not two
"""
# Two (and only two) arguments must be supplied
if [find_month, find_param, find_stat].count(None) != 1:
raise ValueError('Requires two (and only two) arguments.')
if [find_month, find_param].count(None) == 0:
return self._collect_stats(find_month, find_param)
elif [find_month, find_stat].count(None) == 0:
return self._collect_params(find_month, find_stat)
else:
return self._collect_months(find_param, find_stat)
def _paramlist(self):
"""
Determine what parameters to show.
# Returns
list: weather parameters
"""
dct = OrderedDict()
for params in self.statdict[-1].values():
for param in params.keys():
dct[param] = 0
return list(dct.keys())
def _statlist(self):
"""
Determine what data to show.
# Returns
list: statistic parameters
"""
dct = OrderedDict()
for params in self.statdict[-1].values():
for stats in params.values():
for stat in stats.keys():
dct[stat] = 0
return list(dct.keys())
def _update_stats(self):
"""
Perform all statistics on the weather data.
# Returns
None:
"""
self.metdata.append(self.annual_weather()) # weather data for current year
self.statdict.append(OrderedDict()) # new set of statistics results
for nmth in range(13): # index 0 to 11: Jan to Dec, index 12: whole year
s, e = MeteoStats.slice(nmth)
self.statdict[-1][nmth] = OrderedDict()
# statistics for every weather parameter:
for f in self.metdata[-1].keys():
self.statdict[-1][nmth][f] = OrderedDict(self._basic_stats(f, s, e, f == 'rain'))
# additional statistics for the rain parameter:
self.statdict[-1][nmth]['rain'].update(self._rain_probs(s, e))
self.statdict[-1][nmth]['rain'].update(self._fit_gamma(s, e))
# additional statistics for the wind parameter:
self.statdict[-1][nmth]['wind'].update(self._fit_weibull(s, e))
def _write_stats(self, fname, append_to_file=False):
"""
Write to file the annual weather data for current year and their statistics.
Also writes to file the raw statistics for every month and year. The name
'-raw' is appended to the fileame of the output txt file.
# Arguments
fname (str): path and name of output text file
append_to_file (bool): `False` (default) to create a new output file, else
`True` to append output to an exisiting file
# Returns
None:
"""
def _are_all_ints():
"""Return True if all values in a list are integers."""
bintegers = True # assume all integers first
for strval in flst:
try:
sv = str(strval)
int(sv) # will throw exception if conversion fails
if sv.find('.') >= 0:
bintegers = False # found a decimal, so assume it is a float
break # exit loop since not all are integers
except ValueError:
bintegers = False # at least one item in the list is not a number
break # exit loop
return bintegers
self._update_stats() # perform all the statistics calculations
paramlst = self._paramlist() # get the full list of parameters
statlst = self._statlist() # get the full list of statistics
nspan = 15 # width (no. of spaces) for each column
mode = 'w' if not append_to_file else 'a'
with open(fname, mode) as fout:
fout.write('## Year no. {0}:\n'.format(self.__met.nyears))
# print to file the column headings and format the data for each column:
nfields = len(paramlst)
txt = '{:>' + str(nspan) + 's},'
txt += (('{:>' + str(nspan) + 's},') * nfields).rstrip(',') + '\n'
fout.write(txt.format(*list(['doy', ] + [field for field in paramlst])))
txt = '{:>' + str(nspan) + 'd},'
txt += (('{:>' + str(nspan) + '.3f},') * nfields).rstrip(',') + '\n'
# print to file the weather parameters for each day of the year:
gw = self.metdata[-1]
for day in range(365):
lss = list([day + 1, ] + [gw[field][day] for field in paramlst])
fout.write(txt.format(*lss))
# print to file the statistics for every weather parameter:
txt = '\n\nSUMMARY OF EACH MONTH AND WHOLE YEAR:\n'
txt += (('{:>' + str(nspan) + 's},') * 15).rstrip(',') + '\n'
fout.write(txt.format('PARAM', 'STAT',
*['MONTH ' + str(n + 1) for n in range(12)], 'ALL'))
txt = ('{:>' + str(nspan) + 's},') * 2
txt_d = txt + (('{:>' + str(nspan) + 'd},') * 13).rstrip(',') + '\n'
txt_f = txt + (('{:>' + str(nspan) + '.3f},') * 13).rstrip(',') + '\n'
for p in paramlst:
for s in statlst:
flst = self._collect_months(p, s)
if flst: # write only if stat found
if _are_all_ints():
fout.write(txt_d.format(*[p, s], *flst))
else:
fout.write(txt_f.format(*[p, s], *flst))
fout.write('\n') # additional line separator between two different parameters
# write the raw statistics data to another file (CSV text file):
ndot = fname.rfind('.') # find '.' for any file extension
# '.' location must not precede back or forward slash
if ndot > 0 and (fname.rfind('\\') < ndot or fname.rfind('/') < ndot):
fnameraw = "{0}-{2}.{1}".format(*fname.rsplit('.', 1) + ['raw'])
else:
fnameraw = fname + "-raw"
with open(fnameraw, mode) as fout:
if not append_to_file:
# write the column headers/titles:
txt = 'year,month'
# retrieve the statistics name for every weather parameter
for param, vals in self.statdict[-1][0].items():
for stat in vals.keys():
txt += ',' + param + '_' + stat
fout.write(txt + '\n')
# now write the raw data:
for mth in range(13):
fout.write('{},{}'.format(self.__met.nyears, mth+1))
for p in paramlst:
statlst = self._collect_stats(mth, p)
if statlst:
txt = ',{}' * len(statlst)
fout.write(txt.format(*[stat for stat in statlst]))
fout.write('\n')
def output_stats(self, fname, append_to_file=False):
"""
Write to file the annual weather data for all years and their statistics.
Also writes to file the raw statistics for every month and year. The name
'-raw' is appended to the fileame of the output txt file.
# Arguments
fname (str): path and name of output text file
append_to_file (bool): `False` (default) to create a new output file, else
`True` to append output to an exisiting file
# Returns
None:
"""
self.metdata = [] # reset lists
self.statdict = []
met = self.__met
if met.is_generated:
# weather is generated per year, so just do stats for this current year
self._write_stats(fname, append_to_file)
else:
# weather is from file, and weather could be for several years
numyears = int(met.annwthr.total_years()) # no. of years
# save current position in weather data
savedyear = met.nyears
saveddoy = met.doy
# do stats for each year:
for yr in range(numyears):
met.annwthr.update(yr)
met.nyears = yr + 1
bappend = append_to_file if yr == 0 else numyears > 1
self._write_stats(fname, bappend)
# restore to previous position in weather table
met.annwthr.update(savedyear)
met.nyears = savedyear
met.update_weather(nextdoy=saveddoy, reuse=True)
def annual_weather(self):
"""
Return the entire daily weather data set for the current year.
# Returns
OrderedDict: annual daily weather
"""
met = self.__met
current_doy = met.doy # save current DOY to reset later
# get all weather fields from annual weather and add new ones
annwthr = OrderedDict()
fields = [field for field in met.annwthr.fields] + ['totrad', 'drrad', 'dfrad']
for field in fields:
annwthr.update({field: [0.0 for _ in range(365)]})
# do weather for whole year
for i in range(365):
met.update_weather(nextdoy=i + 1, reuse=True)
annwthr['totrad'][i], annwthr['drrad'][i], annwthr['dfrad'][i] = met.dayrad
for field in met.annwthr.fields:
annwthr[field][i] = met.annwthr(field)[i]
# reset to previous state before updating the weather table:
met.update_weather(nextdoy=current_doy, reuse=True)
return annwthr