-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathalternative_solar_profiles.py
1108 lines (900 loc) · 52.7 KB
/
alternative_solar_profiles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# -*- coding: utf-8 -*-
"""
Alternative Solar Profiles (ASP) algorithm used for solar forecasting:
Original author in MATLAB: James Follum and Trevor Hardy
The algorithm creates a probabilistic model for the solar "state" (defined by the ghi, dni,
cloud type, and temperature values) for each month on daily and hourly timescales based on
historical data downloaded from NREL's NRSDB: https://nsrdb.nrel.gov/
To create solar trials, the algorithm randomly samples the model for a given month and hour
given the probability of transitioning from one "state" to another "state". States are first
created for each day of a trial, and then hourly states are generated iteratively until a set
of hourly states is found that matches the daily state or the maximum number of iterations
is reached. This maximum number is set to 200 by default, as testing showed that for 85% of
days, 300 iterations was sufficient to reach agreement.
File contents:
Classes:
AlternativeSolarProfiles
Standalone functions:
date_parser
"""
import numpy as np
import pandas as pd
import random
import math
import os
import multiprocessing
from scipy import stats
import datetime as dt
from validation import validate_all_parameters, log_error
from config import SOLAR_DATA_DIR
class AlternativeSolarProfiles:
"""
Class to create solar profiles using a probabilistic model based on historical solar data.
Parameters
----------
longitude: Site longitude in degrees
latitude: Site latitude in degrees
num_trials: Number of solar profiles to create
length_trials: Length of solar profiles in hours
start_year: Start year for solar data download
end_year: End year for solar data download
num_hourly_ghi_states: Number of discrete GHI states for hourly model
num_hourly_dni_states: Number of discrete DNI states for hourly model
num_daily_ghi_states: Number of discrete GHI states for daily model
num_daily_dni_states: Number of discrete DNI states for daily model
cloud_hours: Tuple containing maximum and minimum hours of the day to be used to set
the day's cloud state
temp_bins: Temperature bins
max_iter: The maximum number of iterations allowed for trying to match hourly and
daily states. After this number is reached the best set of hourly states generated
thus far is used.
multithreading: Whether to use multithreading to speed up the calculation. This is
set to True by default, but should be set to False for debugging
Methods
----------
create_state_transition_matrices: Creates monthly state transition matrices from
historical NREL solar data to hold the probability of transitioning from one solar
state to another.
load_nrel_data: Loads pre-downloaded NREL solar data from csv files.
bin_hourly_data: Bins hourly historical solar data into buckets based on ghi, dni,
cloud type, and temperature values.
bin_daily_data: Bins daily historical solar data into buckets based on ghi, dni, cloud
type, and temperature values.
clean_temperature: Replaces consecutive 0s in historical temperature data with nans,
with a threshold at 4 or more 0s in a row.
create_hourly_state_transition_matrices: Creates an hourly state transition matrix,
based on historical solar data. The transition matrix specifies the probability of
transition from one solar state (determined by GHI, DNI, cloud cover type, and
temperature) to another, for each hour and month of the year.
create_daily_state_transition_matrices: Creates a daily state transition matrix, based
on historical solar data. The transition matrix specifies the probability of
transition from one solar state (determined by GHI, DNI, and cloud cover type) to
another, for each month of the year.
create_trial_data: Simulates solar profiles based on hourly and daily transition
matrices, which specify the probability of transitioning from a given solar state
(determined by GHI, DNI,temperature, and cloud cover type) to another for each
hour and month of the year.
calc_solar_params_from_states: Calculates solar params (ghi, dni, temp) from hourly
states and clear sky data.
generate_trial_date_ranges: Generates date_ranges for each trial and removes leap
days.
preprocess_states: Groups state dataframes to speed up creation of trials.
generate_random_state_daily: Generates random daily states for a solar trial.
compare_hourly_daily_states: Iteratively generates hourly states, checking for
consistency with the daily state.
generate_random_state_hourly: Generates random hourly states for a solar trial.
aggregate_hourly_states: Aggregates hourly states to a daily state for comparison.
Calculated Attributes
----------
nrel_data_df: Dataframe with historical data downloaded from NREL's NRSDB
hourly_states: Dataframe with binned hourly historical solar data
daily_states: Dataframe with binned daily historical solar data
simple_prob_hourly: Dataframe containing every unique set of month, hour, and
to-states, along with the probability of getting to that to-state for that
month/hour, independent of a from-state.
state_prob_hourly: Dataframe containing every unique set of month, hour, from-
(previous hour) states and to- (current hour) states, along with counts and
probabilities. This is the probability of getting a certain to-state given the
from-state.
simple_prob_daily: Dataframe containing every unique set of month and to-states, along
with the probability of getting to that to-state for that month, independent of a
from-state.
state_prob_daily: Dataframe containing every unique set of month, from- (previous
hour) states, and to- (current hour) states, along with counts and probabilities.
This is the probability of getting a certain to-state given the from-state.
num_iters: Keeps track of the number of iterations required for the daily and hourly
states to match.
state_prob_hourly_grouped: Dictionary containing state_prob_hourly data grouped by
month, hour, night state, and from-state. This is stored as a dictionary to speed
up the calculation.
state_prob_daily_grouped: Dictionary containing state_prob_daily data grouped by month
and from-state.
simple_prob_hourly_grouped: Dictionary containing simple_prob_hourly data grouped by
month, hour, and night state.
simple_prob_daily_grouped: Dictionary containing simple_prob_daily data grouped by
month.
solar_trials: Holds generated trial data.
"""
def __init__(self, latitude, longitude, num_trials, length_trials, start_year=1998,
end_year=2016, num_hourly_ghi_states=11, num_hourly_dni_states=11,
num_daily_ghi_states=11, num_daily_dni_states=11, cloud_hours=(10, 17),
temp_bins=range(-30, 50, 3), max_iter=200, multithreading=True,
solar_data_dict = {}, validate=True):
# Assign parameters from arguments
self.latitude = latitude
self.longitude = longitude
self.num_trials = int(num_trials)
self.length_trials = int(length_trials)
self.start_year = int(start_year)
self.end_year = int(end_year)
self.num_hourly_ghi_states = num_hourly_ghi_states
self.num_hourly_dni_states = num_hourly_dni_states
self.num_daily_ghi_states = num_daily_ghi_states
self.num_daily_dni_states = num_daily_dni_states
self.cloud_hours = cloud_hours
self.temp_bins = temp_bins
self.max_iter = max_iter
self.multithreading = multithreading
self.solar_data_dict = solar_data_dict
# Initialize other attributes
self.nrel_data_df = None
self.hourly_states = None
self.daily_states = None
self.simple_prob_hourly = None
self.state_prob_hourly = None
self.simple_prob_daily = None
self.state_prob_daily = None
self.num_iters = []
self.state_prob_hourly_grouped = None
self.state_prob_daily_grouped = None
self.simple_prob_hourly_grouped = None
self.simple_prob_daily_grouped = None
self.solar_trials = []
# Validate input arguments
if validate:
# List of initialized parameters to validate
args_dict = {'latitude': self.latitude,
'longitude': self.longitude,
'num_trials': self.num_trials,
'length_trials': self.length_trials,
'start_year': self.start_year,
'end_year': self.end_year,
'num_ghi_states': self.num_hourly_ghi_states,
'num_dni_states': self.num_hourly_dni_states,
'num_daily_ghi_states': self.num_daily_ghi_states,
'num_daily_dni_states': self.num_daily_dni_states,
'cld_hours': self.cloud_hours,
'temp_bins': self.temp_bins,
'max_iter': self.max_iter,
'multithreading': self.multithreading,
'solar_data_dict': self.solar_data_dict}
# Validate input parameters
validate_all_parameters(args_dict)
def create_state_transition_matrices(self):
"""
Create monthly state transition matrices from historical NREL solar data.
inputs
self.latitude: Site latitude in degrees
self.longitude: Site longitude in degrees
self.start_year: Start year for solar data download
self.end_year: End year for solar data download
self.num_hourly_ghi_states: Number of discrete GHI states for hourly model
self.num_hourly_dni_states: Number of discrete DNI states for hourly model
self.temp_bins: Bins for temperature data
self.num_daily_ghi_states: Number of discrete GHI states for daily model
self.num_daily_dni_states: Number of discrete DNI states for daily model
self.cloud_hours: Hours of the day (range) used to set the day's cloud state
outputs
self.state_prob_hourly: Dataframe containing every unique set of month, hour,
from- (previous hour) states and to- (current hour) states, along with counts
and probabilities. This is the probability of getting a certain to-state given
the from-state.
self.simple_prob_hourly: Dataframe containing every unique set of month, hour, and
to-states, along with the probability of getting that to-state for that
month/hour, independent of a from-state.
self.state_prob_daily: Dataframe containing every unique set of month, from-
(previous hour) states and to- (current hour) states, along with counts and
probabilities. This is the probability of getting a certain to-state given
the from-state.
self.simple_prob_daily: Dataframe containing every unique set of month and
to-states, along with the probability of getting that to-state for that
month, independent of a from-state.
"""
# Read in historical NREL solar data
self.load_nrel_data()
# Clean data
self.clean_temperature()
self.nrel_data_df.loc[self.nrel_data_df['cloud_type'] < 0, 'cloud_type'] = \
self.nrel_data_df.loc[self.nrel_data_df['cloud_type'] < 0, 'cloud_type'].apply(
lambda x: random.randrange(11))
# Bin data into buckets
self.bin_hourly_data()
self.bin_daily_data()
# Create state transition matrices
self.create_hourly_state_transition_matrices()
self.create_daily_state_transition_matrices()
def load_nrel_data(self):
"""
Load pre-downloaded NREL solar data from csv files.
inputs
self.latitude: Site latitude in degrees
self.longitude: Site longitude in degrees
self.start_year: Start year for solar data download
self.end_year: End year for solar data download
outputs
self.nrel_data_df: Dataframe with historical data downloaded from NREL's NRSDB
"""
# Read in historical NREL solar data
if not len(self.solar_data_dict):
filedir = os.path.join(SOLAR_DATA_DIR, 'nrel',
'{}_{}'.format(self.latitude, self.longitude))
# Check that files exist in directory
if '{}_{}_{}.csv'.format(self.latitude, self.longitude,
self.start_year) not in os.listdir(filedir):
message = 'NREL data not found, check the latitude and longitude '\
'or run get_solar_data() first.'
log_error(message)
raise Exception(message)
# Read in files and add to dataframe
self.nrel_data_df = pd.DataFrame()
for year in range(self.start_year, self.end_year+1):
self.nrel_data_df = pd.concat([self.nrel_data_df, pd.read_csv(
os.path.join(filedir, '{}_{}_{}.csv'.format(
self.latitude, self.longitude, year)),
usecols=['Year', 'Month', 'Day', 'Hour', 'DNI', 'GHI',
'Clearsky DNI', 'Clearsky GHI', 'Cloud Type',
'Solar Zenith Angle', 'Temperature'])])
else:
self.nrel_data_df = pd.DataFrame()
for year in range(self.start_year, self.end_year+1):
self.solar_data_dict[year] = self.solar_data_dict[year][
['Year', 'Month', 'Day', 'Hour', 'DNI', 'GHI', 'Clearsky DNI',
'Clearsky GHI', 'Cloud Type', 'Solar Zenith Angle', 'Temperature']]
self.nrel_data_df = pd.concat([self.nrel_data_df, self.solar_data_dict[year]])
self.nrel_data_df['datetime'] = self.nrel_data_df.apply(
lambda x: date_parser(x['Month'], x['Day'], x['Year'], x['Hour']), axis=1)
self.nrel_data_df.columns = ['year', 'month', 'day',
'hour', 'dni', 'ghi', 'clear_sky_dni',
'clear_sky_ghi', 'cloud_type',
'solar_zenith_angle', 'temperature', 'datetime']
# Recast month, hour, year, day datatypes - this doesn't work with
# parse_dates
self.nrel_data_df['year'] = self.nrel_data_df['year'].astype(np.int64)
self.nrel_data_df['month'] = self.nrel_data_df['month'].astype(np.int64)
self.nrel_data_df['day'] = self.nrel_data_df['day'].astype(np.int64)
self.nrel_data_df['hour'] = self.nrel_data_df['hour'].astype(np.int64)
def bin_hourly_data(self):
"""
Bin hourly historical solar data into buckets based on ghi, dni, and temperature
values.
inputs
self.nrel_data_df: Downloaded historical solar data
self.num_hourly_ghi_states: Number of discrete GHI states for hourly model
self.num_hourly_dni_states: Number of discrete DNI states for hourly model
self.temp_bins: Bins for temperature data
outputs
self.hourly_states: Dataframe with binned hourly historical solar data
"""
# Calculate states (bins) for ghi and dni
hourly_data = self.nrel_data_df.copy(deep=True)
hourly_data['ghi_state'] = np.round(
(hourly_data['clear_sky_ghi'] - hourly_data['ghi']) /
hourly_data['clear_sky_ghi'] *
(self.num_hourly_ghi_states - 1)).fillna(math.exp(1))
hourly_data['dni_state'] = np.round(
(hourly_data['clear_sky_dni'] - hourly_data['dni']) /
hourly_data['clear_sky_dni'] *
(self.num_hourly_dni_states - 1)).fillna(math.exp(1))
# Gets rid of any negative states arising from, e.g., dni > clear sky dni
hourly_data.loc[hourly_data['ghi_state'] < 0, 'ghi_state'] = 0
hourly_data.loc[hourly_data['dni_state'] < 0, 'dni_state'] = 0
# Calculate states for temperature data
hourly_data['temp_state'] = hourly_data['temperature'].apply(
lambda x: np.digitize(x, self.temp_bins))
hourly_data.loc[np.isnan(hourly_data['temperature']), 'temp_state'] = np.nan
# Clean up columns and index
self.hourly_states = hourly_data[['year', 'month', 'day', 'hour',
'ghi_state', 'dni_state',
'cloud_type', 'temp_state']]
self.hourly_states.index = range(len(self.hourly_states))
def bin_daily_data(self):
"""
Bin daily historical solar data into buckets based on ghi, dni, cloud type, and
temperature values. Bins represent the percentage (as a decimal) of the available
radiation that was observed during the day.
inputs
self.nrel_data_df: Downloaded historical solar data
self.num_daily_ghi_states: Number of discrete GHI states for daily model
self.num_daily_dni_states: Number of discrete DNI states for daily model
self.cloud_hours: Hours of the day (range) used to set the day's cloud state
outputs
self.daily_states: Dataframe with binned daily historical solar data
"""
# Aggregate historical data to daily values
daily_data = self.nrel_data_df.groupby(['year', 'month', 'day']).sum(numeric_only=True).reset_index()
# Calculate states (bins) for daily ghi and dni data
daily_data['ghi_state'] = np.round(daily_data['ghi'] /
daily_data['clear_sky_ghi'] *
(self.num_daily_ghi_states-1)) + 1
daily_data['dni_state'] = np.round(daily_data['dni'] /
daily_data['clear_sky_dni'] *
(self.num_daily_dni_states-1)) + 1
# Daily cloud data: get most common cloud type for each day from daylight hours
daily_cloud_type = self.nrel_data_df.loc[
(self.nrel_data_df['hour'] >= self.cloud_hours[0]) &
(self.nrel_data_df['hour'] < self.cloud_hours[1])].groupby(
['year', 'month', 'day'])['cloud_type'].apply(
lambda x: stats.mode(x).mode)
daily_data = daily_data.merge(
daily_cloud_type.reset_index(name='cloud_state'),
left_on=['year', 'month', 'day'],
right_on=['year', 'month', 'day'])
self.daily_states = daily_data[['year', 'month', 'day', 'ghi_state',
'dni_state', 'cloud_state']]
def clean_temperature(self):
"""
Replaces consecutive 0s in historical temperature data with nans, with a threshold at
4 or more 0s in a row.
inputs
self.nrel_data_df
outputs
self.nrel_data_df
"""
# Get number of 0s to keep
temp = self.nrel_data_df['temperature'].copy(deep=True)
temp.index = range(len(temp))
# Get 0s from temperature array
temp_zeros_index = temp[temp == 0].index
if not len(temp_zeros_index):
return
# Get the start and end index of each group of consecutive 0s
diff_index = temp_zeros_index[1:] - temp_zeros_index[:-1]
group_end_index = np.where(diff_index > 1)[0]
group_start_index = np.append([0], group_end_index+1)
group_end_index = np.append(group_end_index, len(temp_zeros_index)-1)
# Get the length of each group of 0s
group_lengths = group_end_index - group_start_index + 1
# Sort groups of 0s based on length
groups_lengths_df = pd.Series(group_lengths).sort_values(ascending=False)
groups_sorted = groups_lengths_df.values
group_start_index = group_start_index[groups_lengths_df.index]
group_end_index = group_end_index[groups_lengths_df.index]
# Replace all groups of 4 or more 0s
last_group = np.where(groups_sorted < 4)[0][0]
# For each group to replace, set 0s to NaNs
for group_index in range(last_group):
temp.iloc[temp_zeros_index[group_start_index[group_index]]:
temp_zeros_index[group_end_index[group_index]]+1] = np.nan
self.nrel_data_df['temperature'] = temp.values
def create_hourly_state_transition_matrices(self):
"""
Creates an hourly state transition matrix, based on historical solar data. The
transition matrix specifies the probability of transition from one solar state
(determined by GHI, DNI, cloud cover type, and temperature) to another, for each
hour and month of the year.
inputs:
self.hourly_states: ghi, dni, cloud, and temperature states (binned values) for
each hour from the historical data
outputs:
self.state_prob_hourly: dataframe containing every unique set of month, hour,
from- (previous hour) states and to- (current hour) states, along with counts
and probabilities. This is the probability of getting a certain to-state given
the from-state.
self.simple_prob_hourly: dataframe containing every unique set of month, hour, and
to-states, along with the probability of getting that to-state for that
month/hour, independent of a from-state.
"""
# Create dataframe with from- and to-states for each hour
from_and_to_states = self.hourly_states.iloc[1:]
from_and_to_states.index = range(len(from_and_to_states))
from_and_to_states = from_and_to_states.merge(
self.hourly_states.iloc[:-1][['ghi_state', 'dni_state',
'cloud_type', 'temp_state']],
right_index=True, left_index=True, suffixes=['_to', '_from'])
# Group by month, hour, from-, and to-states to get probabilities
state_prob = from_and_to_states.groupby(
['month', 'hour', 'ghi_state_to', 'dni_state_to', 'cloud_type_to',
'temp_state_to', 'ghi_state_from', 'dni_state_from',
'cloud_type_from', 'temp_state_from'])['year'].count().to_frame(
name='count').reset_index()
# Divide by the total number of to-states for each from-state to get a probability
sum_of_to_states_for_each_from_state = state_prob.groupby(
['month', 'hour', 'ghi_state_from', 'dni_state_from',
'cloud_type_from', 'temp_state_from'])['count'].sum().reset_index()
state_prob = state_prob.merge(
sum_of_to_states_for_each_from_state,
left_on=['month', 'hour', 'ghi_state_from', 'dni_state_from',
'cloud_type_from', 'temp_state_from'],
right_on=['month', 'hour', 'ghi_state_from', 'dni_state_from',
'cloud_type_from', 'temp_state_from'],
suffixes=['', '_sum'])
state_prob['prob'] = state_prob['count'] / state_prob['count_sum']
# Get a list of all to-states for each month and hour to calculate the simple
# probability of transitioning to a given to-state independent of the from-state
simple_prob = from_and_to_states.groupby(
['month', 'hour', 'ghi_state_to', 'dni_state_to', 'cloud_type_to',
'temp_state_to'])['year'].count().to_frame(name='count').reset_index()
sum_of_to_states_for_each_hour_month = simple_prob.groupby(
['month', 'hour'])['count'].sum().reset_index()
simple_prob = simple_prob.merge(
sum_of_to_states_for_each_hour_month,
left_on=['month', 'hour'], right_on=['month', 'hour'],
suffixes=['', '_sum'])
simple_prob['prob'] = simple_prob['count'] / simple_prob['count_sum']
# Add a column to each probabilty dataframe indicating if it is night
simple_prob['night_state'] = ((simple_prob['ghi_state_to'] == math.exp(1)) |
(simple_prob['dni_state_to'] == math.exp(1)))
state_prob['night_state'] = ((state_prob['ghi_state_to'] == math.exp(1)) |
(state_prob['dni_state_to'] == math.exp(1)))
# Save to class attributes
self.simple_prob_hourly = simple_prob
self.state_prob_hourly = state_prob
def create_daily_state_transition_matrices(self):
"""
Creates a daily state transition matrix, based on historical solar data. The
transition matrix specifies the probability of transition from one solar state
(determined by GHI, DNI, and cloud cover type) to another, for each month of the
year.
inputs:
self.daily_states: ghi, dni, and cloud states (binned values) for each day from
the historical data
outputs:
self.state_prob_daily: dataframe containing every unique set of month, from-
(previous hour) states and to- (current hour) states, along with counts and
probabilities. This is the probability of getting a certain to-state given the
from-state.
self.simple_prob_daily: dataframe containing every unique set of month and
to-states, along with the probability of getting that to-state for that
month, independent of a from-state.
"""
# Create dataframe with from- and to-states for each day
from_and_to_states = self.daily_states.iloc[1:]
from_and_to_states.index = range(len(from_and_to_states))
from_and_to_states = from_and_to_states.merge(
self.daily_states.iloc[:-1][['ghi_state', 'dni_state', 'cloud_state']],
right_index=True, left_index=True, suffixes=['_to', '_from'])
# Group by month, from-, and to-states to get probabilities
state_prob = from_and_to_states.groupby(
['month', 'ghi_state_to', 'dni_state_to', 'cloud_state_to',
'ghi_state_from', 'dni_state_from', 'cloud_state_from'])[
'year'].count().to_frame(name='count').reset_index()
# Divide by the total number of to-states for each from-state to get a probability
sum_of_to_states_for_each_from_state = state_prob.groupby(
['month', 'ghi_state_from', 'dni_state_from',
'cloud_state_from'])['count'].sum().reset_index()
state_prob = state_prob.merge(
sum_of_to_states_for_each_from_state,
left_on=['month', 'ghi_state_from', 'dni_state_from', 'cloud_state_from'],
right_on=['month', 'ghi_state_from', 'dni_state_from', 'cloud_state_from'],
suffixes=['', '_sum'])
state_prob['prob'] = state_prob['count'] / state_prob['count_sum']
# Get a list of all to-states for each month to calculate the simple probability of
# transitioning to a given to-state independent of the from-state
simple_prob = from_and_to_states.groupby(
['month', 'ghi_state_to', 'dni_state_to', 'cloud_state_to'])[
'year'].count().to_frame(name='count').reset_index()
sum_of_to_states_for_each_month = simple_prob.groupby(
['month'])['count'].sum().reset_index()
simple_prob = simple_prob.merge(
sum_of_to_states_for_each_month, left_on=['month'],
right_on=['month'], suffixes=['', '_sum'])
simple_prob['prob'] = simple_prob['count'] / simple_prob['count_sum']
# Save to class attributes
self.simple_prob_daily = simple_prob
self.state_prob_daily = state_prob
def create_trial_data(self, start_datetimes=None, validate=True):
"""
Simulates solar profiles based on hourly and daily transition matrices, which specify
the probability of transitioning from a given solar state (determined by GHI, DNI,
temperature, and cloud cover type) to another for each hour and month of the year.
inputs:
self.nrel_data_df: historical solar data
self.simple_prob_hourly: dataframe holding the probabilities of transitioning to a
given hourly state, independent of the from-state.
self.simple_prob_daily: dataframe holding the probabilities of transitioning to a
given daily state, independent of the from-state.
self.state_prob_hourly: dataframe holding the probabilities of transition to a
given hourly state, given the from-state.
self.state_prob_daily: dataframe holding the probabilities of transition to a
given daily state, given the from-state.
self.num_trials: number of solar profiles to generate
self.length_trials: length of profiles in hours
outputs:
self.solar_trials: dict containing a dataframe for each trial
"""
# Validate input arguments
if validate and start_datetimes is not None:
# List of initialized parameters to validate
args_dict = {'num_trials': self.num_trials,
'start_year': self.start_year,
'end_year': self.end_year,
'start_datetimes': start_datetimes}
# Validate input parameters
validate_all_parameters(args_dict)
# Generate date ranges for each trial
date_ranges = self.generate_trial_date_ranges(start_datetimes)
# Group state dataframes to speed up trial creation
self.preprocess_states()
# Generate daily states
input_list = []
for date_range in date_ranges:
daily_state = self.generate_random_state_daily(date_range)
clear_sky_data = self.nrel_data_df.merge(
date_range.to_frame(), left_on='datetime', right_on=0,
how='inner').set_index('datetime')
input_list += [(daily_state, date_range, clear_sky_data)]
# Iterate through each day and create hourly states
hourly_states_list = []
# ...with parallelization
if self.multithreading:
# Create list to hold processes and Queue for multithreading
procs = []
queue = multiprocessing.Queue()
for daily_state, date_range, clear_sky_data in input_list:
proc = multiprocessing.Process(
target=self.compare_hourly_daily_states,
args=(daily_state, date_range, clear_sky_data, queue))
procs.append(proc)
proc.start()
# Unpack results from Queue
for proc in procs:
hourly_states = queue.get()
hourly_states_list.append(hourly_states)
# Wait until all processes have finished
for proc in procs:
proc.join()
# ...without parallelization
else:
for daily_state, date_range, clear_sky_data in input_list:
hourly_states_list += [self.compare_hourly_daily_states(
daily_state, date_range, clear_sky_data)]
# Calculate hourly ghi, dni, and temperature values from states
for hourly_states, clear_sky_data in hourly_states_list:
self.solar_trials += [self.calc_solar_params_from_states(
hourly_states, clear_sky_data)]
def calc_solar_params_from_states(self, hourly_states, clear_sky_data):
"""
Calculate solar params (ghi, dni, temp) from hourly states and clear sky data.
inputs:
hourly_states: Dataframe with hourly state data for a given trial
clear_sky_data: historical solar data for the same time period as the hourly
states
self.num_hourly_ghi_states: Number of discrete GHI states for hourly model
self.num_hourly_dni_states: Number of discrete DNI states for hourly model
self.temp_bins: Bins for temperature data
outputs:
hourly_states: Dataframe with hourly ghi, dni, cloud type, and temperature values
for a given trial
"""
# Merge hourly states with clearsky data from the same period
hourly_states = hourly_states.merge(clear_sky_data[[
'clear_sky_ghi', 'clear_sky_dni']], right_index=True, left_index=True)
# Calculate solar params from clear sky data and solar states
hourly_states['ghi'] = hourly_states['clear_sky_ghi'] * \
(1 - hourly_states['ghi_state'] / (self.num_hourly_ghi_states - 1))
hourly_states['dni'] = hourly_states['clear_sky_dni'] * \
(1 - hourly_states['dni_state'] / (self.num_hourly_dni_states - 1))
hourly_states['temp'] = np.diff(self.temp_bins)[0] * \
(hourly_states['temp_state'] - 0.5) + self.temp_bins[0]
return hourly_states[['ghi', 'dni', 'cloud_state', 'temp']]
def generate_trial_date_ranges(self, start_datetimes=None):
"""
Generate date_ranges for each trial and remove leap days. Allows for an input argument
specifying the start datetimes to use.
inputs
start_datetimes: list of datetime objects. If set as None (default), these are
generated randomly.
self.nrel_data_df: historical solar data
self.length_trials: number of hours in each trial
self.num_trials: number of trials to generate
outputs
date_ranges: list of Pandas date_range objects
"""
# Randomly generate start times for each trial
if start_datetimes is None:
start_datetimes = self.nrel_data_df.iloc[:-self.length_trials].sample(
self.num_trials)['datetime'].values
# Create a date range object for each start datetime
date_ranges = [pd.date_range(start=start_date,
periods=self.length_trials,
freq='H')
for start_date in start_datetimes]
# Remove any leap days and pad the date_range accordingly
for i, date_range in enumerate(date_ranges):
replace_timesteps = len(date_range[(date_range.month == 2) &
(date_range.day == 29)])
if replace_timesteps:
# Remove 2/29
date_range = date_range[date_range.date != dt.date(date_range[0].year, 2, 29)]
# Add more timesteps
date_ranges[i] = date_range.append(pd.date_range(
date_range[-1] + dt.timedelta(hours=1),
periods=replace_timesteps, freq='H'))
return date_ranges
def preprocess_states(self):
"""
Group state dataframes to speed up creation of trials.
inputs
self.state_prob_hourly: Dataframe containing every unique set of month, hour,
from- (previous hour) states and to- (current hour) states, along with counts
and probabilities.
self.simple_prob_hourly: Dataframe containing every unique set of month, hour,
and to- (current hour) states, along with counts and probabilities.
self.simple_prob_hourly: Dataframe containing every unique set of month, from-
(previous hour) states and to- (current day) states, along with counts and
probabilities.
self.simple_prob_daily: Dataframe containing every unique set of month and to-
(current day) states, along with counts and probabilities.
outputs
(Dictionaries containing groupings of the input parameters)
self.state_prob_hourly_grouped: Dict keys: (month, hour, night_state, ghi_state,
dni_state, cloud_type, temp_state)
self.simple_prob_hourly_grouped: Dict keys: (month, hour, night_state)
self.state_prob_daily_grouped: Dict keys: (month, ghi_state, dni_state,
cloud_type)
self.simple_prob_daily_grouped: Dict keys: (month)
"""
# Create pandas DataFrameGroupBy objects
state_prob_hourly_groups = self.state_prob_hourly.groupby([
'month', 'hour', 'night_state', 'ghi_state_from',
'dni_state_from', 'cloud_type_from', 'temp_state_from'])
simple_prob_hourly_groups = self.simple_prob_hourly.groupby([
'month', 'hour', 'night_state'])
state_prob_daily_groups = self.state_prob_daily.groupby([
'month', 'ghi_state_from', 'dni_state_from', 'cloud_state_from'])
simple_prob_daily_groups = self.simple_prob_daily.groupby([
'month'])
# Parse into dictionaries for faster access
self.state_prob_hourly_grouped = {
label: dataframe for label, dataframe in state_prob_hourly_groups}
self.simple_prob_hourly_grouped = {
label: dataframe for label, dataframe in simple_prob_hourly_groups}
self.state_prob_daily_grouped = {
label: dataframe for label, dataframe in state_prob_daily_groups}
self.simple_prob_daily_grouped = {
label: dataframe for label, dataframe in simple_prob_daily_groups}
def generate_random_state_daily(self, date_range):
"""
Generate random daily states for a solar trial.
inputs:
date_range: datetimeindex object for the trial
self.length_trials: length of profiles in hours
self.state_prob_daily_grouped: dataframe holding the probabilities of transition
to a given daily state, given the from-state, grouped by month and from-state.
self.simple_prob_daily_grouped: dataframe holding the probabilities of
transitioning to a given daily state, independent of the from-state, grouped
by month.
outputs:
dataframe with daily trial states
"""
# Create lists to hold generated states and the current state
current_state = [None, None, None]
state_list = []
# Create daily states iteratively
sorted_dates = np.sort(list(set(date_range.date)))
for date in sorted_dates:
try:
# Get the numpy values from the subset of states with the corresponding month
# and from-states. While the state data is stored in a pandas dataframe, the
# following operations are carried out on numpy arrays to speed up the
# calculation.
# Grouped daily states have labels of the form:
# (month, ghi_state_from, dni_state_from, cloud_state_from)
states_subset = self.state_prob_daily_grouped[
tuple([date.month] + list(current_state))].values
# Except statement catches cases where state subset is empty (i.e. if the
# from-state does not exist in the corresponding month) or the first day in the
# trial, in which case it uses the simple probability dataframe which doesn't
# require a from-state.
except KeyError:
# Same as above, but for simple probabilities
states_subset = self.simple_prob_daily_grouped[
tuple([date.month])].values
# Get the probabilities for this subset of states
probs = states_subset[:, -1].astype(float)
# Randomly select a state based on the probabilities
sample_index = np.random.choice(range(len(states_subset)),
p=probs / np.sum(probs))
# Assign the current state and add to list
current_state = states_subset[sample_index, 1:4]
state_list += [current_state]
return pd.DataFrame(data=state_list, index=sorted_dates,
columns=['ghi_state', 'dni_state', 'cloud_state'])
def compare_hourly_daily_states(self, daily_states, date_range,
clear_sky_data, queue=None):
"""
Iteratively generates hourly states, checking for consistency with the daily state.
An hourly state is chosen if the aggregated ghi, dni, and cloud state values match
that ofthe daily state or the maximum number of iterations is reached. If the
latter, the generated state which matches the most daily state values is used.
The hourly states are only checked for consistency with the daily state if the current
day contains all of the "cloud hours", i.e. if a trial starts at 2pm, this check
is not run for the first day.
inputs:
daily_states: Dataframe with daily states for a trial
date_range: Pandas date_range object for the trial
clear_sky_data: historical solar data for the same dates as the trial
self.max_iter: maximum number of iterations for generating hourly states
self.cloud_hours: Tuple containing maximum and minimum hours of the day to be used
to set the day's cloud state
self.num_iters: List containing the number of iterations required to generate
hourly states for each day and each trial
outputs:
Dataframe with hourly states for the given trial
"""
# Initialize list to hold hourly states
all_hourly_states = []
current_state = [None, None, None, None]
# Loop through days in the trial
for date_time, daily_state in daily_states.iterrows():
# Set up parameters for while loop
num_iter = 0
best_num_matches = 0
best_states = None
# Filter date_range to only include this day
one_day_date_range = date_range[date_range.date == date_time]
# Get clearsky data from date range and get night states, which indicate if it is
# day or night time - this is used to filter which states are considered.
one_day_clear_sky_data = clear_sky_data[
clear_sky_data.index.date == date_time]
night_states = one_day_clear_sky_data['clear_sky_ghi'] == 0
# Keep generating hourly states until a set is found which matches the daily
# states or the maximum number of iterations is reached
while num_iter < self.max_iter:
# Generate hourly states
hourly_states = self.generate_random_state_hourly(
one_day_date_range, night_states, current_state)
# Check if this day contains all daylight hours, via the cloud hours param -
# the daily/hourly check should not be performed for the beginning or end of
# a trial with only a partial day of generated hourly states
if not len(set(range(self.cloud_hours[0], self.cloud_hours[1])) -
set(one_day_date_range.hour)):
# Aggregate hourly states to daily
agg_ghi_state, agg_dni_state, cloud_modes = \
self.aggregate_hourly_states(one_day_clear_sky_data, hourly_states)
# Check if states match
num_matches = np.sum([
agg_ghi_state == daily_state['ghi_state'],
agg_dni_state == daily_state['dni_state'],
daily_state['cloud_state'] in cloud_modes])
# If all states match, this is an acceptable state, if not, save the state
# if it exceeds the previous number of best matches
if num_matches == 3:
best_states = hourly_states
break
elif num_matches > best_num_matches:
best_num_matches = num_matches
best_states = hourly_states
else:
best_states = hourly_states
break
num_iter += 1
# If the maximum number of iterations has been reached and no acceptable state has
# been found, just use the current state
if best_states is None:
best_states = hourly_states
# Record the number of iterations required to find a match
# Note: this only works without multi-threading
self.num_iters += [num_iter]
# Add to hourly states list
all_hourly_states += list(best_states.values)
current_state = best_states.iloc[-1].values
if queue is None:
# Without multithreading, return hourly states dataframe
return pd.DataFrame(
data=all_hourly_states, index=date_range,
columns=['ghi_state', 'dni_state', 'cloud_state',
'temp_state']), clear_sky_data
else:
# With multithreading, add dataframe to Queue
queue.put([pd.DataFrame(
data=all_hourly_states, index=date_range,
columns=['ghi_state', 'dni_state', 'cloud_state',
'temp_state']), clear_sky_data])
def generate_random_state_hourly(self, hourly_date_range, night_states,
current_state):
"""
Generate random hourly states for a solar trial.
inputs:
hourly_date_range: date_range object with the hours for the current day to be
generated
night_states: list containing a Boolean indicating if it is night-time for each
hour in the day
current_state: array with hourly state from previous hour
self.length_trials: length of profiles in hours
self.nrel_data_df: historical solar data
self.state_prob_hourly_grouped: dataframe holding the probabilities of transition
to a given daily state, given the from-state, grouped by month, hour,
night_state and from-states.
self.simple_prob_hourly_grouped: dataframe holding the probabilities of
transitioning to a given daily state, independent of the from-state, grouped
by month, hour, and night_state.