diff --git a/docs/api.rst b/docs/api.rst index 94e149b1..78d482fa 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -248,12 +248,16 @@ identification. Daytime ------- -Functions that return a Boolean mask indicating day and night. +Functions that relate to determining day/night periods in a time +series, and getting sunrise and sunset times based on the day-night mask +outputs. .. autosummary:: :toctree: generated/ features.daytime.power_or_irradiance + features.daytime.get_sunrise + features.daytime.get_sunset Shading ------- diff --git a/docs/whatsnew/0.2.0.rst b/docs/whatsnew/0.2.0.rst index 695a8150..21873b64 100644 --- a/docs/whatsnew/0.2.0.rst +++ b/docs/whatsnew/0.2.0.rst @@ -1,6 +1,6 @@ .. _whatsnew_020: -0.2.0 (anticipated August 2023) +0.2.0 (anticipated December 2023) ----------------------------- Breaking Changes @@ -21,6 +21,14 @@ Breaking Changes Enhancements ~~~~~~~~~~~~ +* Added function :py:func:`~pvanalytics.features.daytime.get_sunrise` + for calculating the daily sunrise datetimes for a time series, based on the + :py:func:`~pvanalytics.features.daytime.power_or_irradiance` day/night mask output. + (:pull:`187`) +* Added function :py:func:`~pvanalytics.features.daytime.get_sunset` + for calculating the daily sunset datetimes for a time series, based on the + :py:func:`~pvanalytics.features.daytime.power_or_irradiance` day/night mask output. + (:pull:`187`) * Updated function :py:func:`~pvanalytics.features.daytime.power_or_irradiance` to be more performant by vectorization; the original logic was using a lambda call that was slowing the function speed down considerably. This update resulted in a ~50X speedup. (:pull:`186`) diff --git a/pvanalytics/features/daytime.py b/pvanalytics/features/daytime.py index c61c3381..880e6c0d 100644 --- a/pvanalytics/features/daytime.py +++ b/pvanalytics/features/daytime.py @@ -2,6 +2,7 @@ import numpy as np import pandas as pd from pvanalytics import util +from pandas.tseries.frequencies import to_offset def _rolling_by_minute(data, days, f): @@ -175,7 +176,6 @@ def power_or_irradiance(series, outliers=None, median length of the day when correcting errors in the morning or afternoon. [days] - Returns ------- Series @@ -187,8 +187,10 @@ def power_or_irradiance(series, outliers=None, ``NA`` values are treated like zeros. - Derived from the PVFleets QA Analysis project. - + References + ------- + .. [1] Perry K., Meyers B., and Muller, M. "Survey of Time Shift Detection + Algorithms for Measured PV Data", 2023 PV Reliability Workshop (PVRW). """ series = series.fillna(value=0) series_norm = _filter_and_normalize(series, outliers).fillna(value=0) @@ -229,3 +231,141 @@ def power_or_irradiance(series, outliers=None, correction_window ) return ~night_corrected_edges + + +def _get_sunrise_sunset_daily_series(daytime_mask, transform): + # Get the sunset/sunrise series based on getting the first or last + # 'day' value for each day in the time series + series = daytime_mask.index[daytime_mask].to_series().groupby( + daytime_mask[daytime_mask].index.date).transform(transform).reindex( + daytime_mask.index) + series = series.groupby(series.index.date).ffill().bfill() + # Backfilling and front filling fills all NaN's, so we set cases not in + # the right day to NaN + series.loc[series.index.date != series.dt.date] = np.nan + return series + + +def get_sunrise(daytime_mask, freq=None, data_alignment='L'): + """ + Using the outputs of :py:func:`power_or_irradiance`, derive sunrise values + for each day in the associated time series. + + This function assumes that each midnight-to-midnight period + (according to the timezone of the input data) has one sunrise + followed by one sunset. In cases where this is not satisfied + (timezone of data is substantially different from the location's + local time, locations near the poles, etc), or in the case of missing + data, the returned sunrise and sunset times may be invalid. + + Parameters + ---------- + daytime_mask : Series + Boolean series delineating night periods from day periods, where + day is True and night is False. + freq : str, optional + A pandas freqstr specifying the expected timestamp spacing for + the series. If None, the frequency will be inferred from the index of + ``daytime_mask``. + data_alignment : str, default 'L' + The data alignment of the series (left-aligned or right-aligned). Data + alignment affects the value selected as sunrise. Options are 'L' (left- + aligned), 'R' (right-aligned), or 'C' (center-aligned) + + Returns + ------- + Series + Series of daily sunrise times with the same index as ``daytime_mask``. + + References + ------- + .. [1] Perry K., Meyers B., and Muller, M. "Survey of Time Shift Detection + Algorithms for Measured PV Data", 2023 PV Reliability Workshop (PVRW). + """ + # Get the first day period for each day + sunrise_series = _get_sunrise_sunset_daily_series(daytime_mask, "first") + # If there's no frequency value, infer it from the daytime_mask series + if not freq: + freq = pd.infer_freq(daytime_mask.index) + # For left-aligned data, we want the first 'day' mask for + # each day in the series; this will act as a proxy for sunrise. + # Because of this, we will just return the sunrise_series with + # no modifications + if data_alignment == 'L': + return sunrise_series + # For center-aligned data, we want the mid-point between the last night + # mask and the first day mask. To do this, we subtract freq / 2 from + # each sunrise time in the sunrise_series. + elif data_alignment == 'C': + return (sunrise_series - (to_offset(freq) / 2)) + # For right-aligned data, get the last nighttime mask datetime + # before the first 'day' mask in the series. To do this, we subtract freq + # from each sunrise time in the sunrise_series. + elif data_alignment == 'R': + return (sunrise_series - to_offset(freq)) + else: + # Throw an error if right,left, or center-alignment are not declared + raise ValueError("No valid data alignment given. Please pass 'L'" + " for left-aligned data, 'R' for right-aligned data," + " or 'C' for center-aligned data.") + + +def get_sunset(daytime_mask, freq=None, data_alignment='L'): + """ + Using the outputs of :py:func:`power_or_irradiance`, derive sunset + values for each day in the associated time series. + + This function assumes that each midnight-to-midnight period + (according to the timezone of the input data) has one sunrise + followed by one sunset. In cases where this is not satisfied + (timezone of data is substantially different from the location's + local time, locations near the poles, etc), or in the case of missing + data, the returned sunrise and sunset times may be invalid. + + Parameters + ---------- + daytime_mask : Series + Boolean series delineating night periods from day periods, where + day is True and night is False. + freq : str, optional + A pandas freqstr specifying the expected timestamp spacing for + the series. If None, the frequency will be inferred from the index + of ``daytime_mask``. + data_alignment : str, default 'L' + The data alignment of the series (left-aligned or right-aligned). Data + alignment affects the value selected as sunrise. Options are 'L' (left- + aligned), 'R' (right-aligned), or 'C' (center-aligned) + + Returns + ------- + Series + Series of daily sunrise times with the same index as ``daytime_mask``. + + References + ------- + .. [1] Perry K., Meyers B., and Muller, M. "Survey of Time Shift Detection + Algorithms for Measured PV Data", 2023 PV Reliability Workshop (PVRW). + """ + # Get the last day period for each day + sunset_series = _get_sunrise_sunset_daily_series(daytime_mask, "last") + # If there's no frequency value, infer it from the daytime_mask series + if not freq: + freq = pd.infer_freq(daytime_mask.index) + # For left-aligned data, sunset is the first nighttime period + # after the day mask. To get this, we add freq to each sunset time in + # the sunset time series. + if data_alignment == 'L': + return (sunset_series + to_offset(freq)) + # For center-aligned data, sunset is the midpoint between the last day + # mask and the first nighttime mask. We calculate this by adding (freq / 2) + # to each sunset time in the sunset_series. + elif data_alignment == 'C': + return (sunset_series + (to_offset(freq) / 2)) + # For right-aligned data, the last 'day' mask time stamp is sunset. + elif data_alignment == 'R': + return sunset_series + else: + # Throw an error if right, left, or center-alignment are not declared + raise ValueError("No valid data alignment given. Please pass 'L'" + " for left-aligned data, 'R' for right-aligned data," + " or 'C' for center-aligned data.") diff --git a/pvanalytics/tests/features/test_daytime.py b/pvanalytics/tests/features/test_daytime.py index 61e3569d..24ab262a 100644 --- a/pvanalytics/tests/features/test_daytime.py +++ b/pvanalytics/tests/features/test_daytime.py @@ -2,8 +2,13 @@ import pytest import pandas as pd import numpy as np +import pvlib from pvlib.location import Location +from datetime import date from pvanalytics.features import daytime +from ..conftest import DATA_DIR + +test_file_1 = DATA_DIR / "serf_east_1min_ac_power.csv" @pytest.fixture(scope='module', @@ -20,6 +25,65 @@ def clearsky_january(request, albuquerque): ) +@pytest.fixture +def ac_power_series(): + # Pull down the saved PVLib dataframe and process it + time_series = pd.read_csv(test_file_1, + parse_dates=True, + index_col=0).squeeze() + return time_series + + +@pytest.fixture +def modeled_midday_series(ac_power_series): + # Get the modeled sunrise and sunset for the location + modeled_sunrise_sunset_df = pvlib.solarposition.sun_rise_set_transit_spa( + ac_power_series.index, 39.742, -105.1727) + modeled_sunrise_sunset_df.index = modeled_sunrise_sunset_df.index.date + modeled_sunrise_sunset_df = modeled_sunrise_sunset_df.drop_duplicates() + # Take the 'transit' column as the midday point between sunrise and + # sunset for each day in the modeled irradiance series + modeled_midday_series = modeled_sunrise_sunset_df['transit'] + return modeled_midday_series + + +@pytest.fixture +def daytime_mask_left_aligned(ac_power_series): + # Resample the time series to 5-minute left aligned intervals + ac_power_series_left = ac_power_series.resample('5T', + label='left').mean() + data_freq = pd.infer_freq(ac_power_series_left.index) + daytime_mask = daytime.power_or_irradiance(ac_power_series_left, + freq=data_freq) + return daytime_mask + + +@pytest.fixture +def daytime_mask_right_aligned(ac_power_series): + # Resample the time series to 5-minute right aligned intervals. Lop off the + # last entry as it is moved to the next day (3/20) + ac_power_series_right = ac_power_series.resample('5T', + label='right').mean()[:-1] + data_freq = pd.infer_freq(ac_power_series_right.index) + daytime_mask = daytime.power_or_irradiance(ac_power_series_right, + freq=data_freq) + return daytime_mask + + +@pytest.fixture +def daytime_mask_center_aligned(ac_power_series): + # Resample the time series to 5-minute center aligned intervals (take + # left alignment and shift by frequency/2) + ac_power_series_center = ac_power_series.resample('5T', + label='left').mean() + ac_power_series_center.index = (ac_power_series_center.index + + (pd.Timedelta("5T") / 2)) + data_freq = pd.infer_freq(ac_power_series_center.index) + daytime_mask = daytime.power_or_irradiance(ac_power_series_center, + freq=data_freq) + return daytime_mask + + def _assert_daytime_no_shoulder(clearsky, output): # every night-time value in `output` has low or 0 irradiance assert all(clearsky[~output] < 3) @@ -182,3 +246,146 @@ def test_daytime_variable(clearsky_january): clearsky_january['ghi'], daytime.power_or_irradiance(ghi) ) + + +def test_get_sunrise_left_alignment(daytime_mask_left_aligned): + sunrise_left_aligned = daytime.get_sunrise(daytime_mask_left_aligned, + data_alignment='L') + # Assert that the output time series index is the same as the input + pd.testing.assert_index_equal(sunrise_left_aligned.index, + daytime_mask_left_aligned.index) + # Check that the output matches expected + sunrise_3_19 = sunrise_left_aligned[sunrise_left_aligned.index.date == + date(2022, 3, 19)] + # Assert all values for the day equal '2022-03-19 06:10:00-07:00' + assert all(sunrise_3_19 == pd.to_datetime('2022-03-19 06:10:00-07:00')) + + +def test_get_sunrise_center_alignment(daytime_mask_center_aligned): + sunrise_center_aligned = daytime.get_sunrise(daytime_mask_center_aligned, + data_alignment='C') + # Assert that the output time series index is the same as the input + pd.testing.assert_index_equal(sunrise_center_aligned.index, + daytime_mask_center_aligned.index) + # Check that the output matches expected + sunrise_3_19 = sunrise_center_aligned[sunrise_center_aligned.index.date == + date(2022, 3, 19)] + # Assert all values for the day equal '2022-03-19 06:10:00-07:00' + assert all(sunrise_3_19 == pd.to_datetime('2022-03-19 06:10:00-07:00')) + + +def test_get_sunrise_right_alignment(daytime_mask_right_aligned): + sunrise_right_aligned = daytime.get_sunrise(daytime_mask_right_aligned, + data_alignment='R') + # Assert that the output time series index is the same as the input + pd.testing.assert_index_equal(sunrise_right_aligned.index, + daytime_mask_right_aligned.index) + # Check that the output matches expected + sunrise_3_19 = sunrise_right_aligned[sunrise_right_aligned.index.date == + date(2022, 3, 19)] + # Assert all values for the day equal '2022-03-19 06:10:00-07:00' + assert all(sunrise_3_19 == pd.to_datetime('2022-03-19 06:10:00-07:00')) + + +def test_get_sunset_left_alignment(daytime_mask_left_aligned): + sunset_left_aligned = daytime.get_sunset(daytime_mask_left_aligned, + data_alignment='L') + # Assert that the output time series index is the same as the input + pd.testing.assert_index_equal(sunset_left_aligned.index, + daytime_mask_left_aligned.index) + # Check that the output matches expected + sunset_3_19 = sunset_left_aligned[sunset_left_aligned.index.date == + date(2022, 3, 19)] + # Assert all values for the day equal '2022-03-19 06:10:00-07:00' + assert all(sunset_3_19 == pd.to_datetime('2022-03-19 17:55:00-07:00')) + + +def test_get_sunset_center_alignment(daytime_mask_center_aligned): + sunset_center_aligned = daytime.get_sunset(daytime_mask_center_aligned, + data_alignment='C') + # Assert that the output time series index is the same as the input + pd.testing.assert_index_equal(sunset_center_aligned.index, + daytime_mask_center_aligned.index) + # Check that the output matches expected + sunset_3_19 = sunset_center_aligned[sunset_center_aligned.index.date == + date(2022, 3, 19)] + # Assert all values for the day equal '2022-03-19 06:10:00-07:00' + assert all(sunset_3_19 == pd.to_datetime('2022-03-19 17:55:00-07:00')) + + +def test_get_sunset_right_alignment(daytime_mask_right_aligned): + sunset_right_aligned = daytime.get_sunset(daytime_mask_right_aligned, + data_alignment='R') + # Assert that the output time series index is the same as the input + pd.testing.assert_index_equal(sunset_right_aligned.index, + daytime_mask_right_aligned.index) + # Check that the output matches expected + sunset_3_19 = sunset_right_aligned[sunset_right_aligned.index.date == + date(2022, 3, 19)] + # Assert all values for the day equal '2022-03-19 06:10:00-07:00' + assert all(sunset_3_19 == pd.to_datetime('2022-03-19 17:55:00-07:00')) + + +def test_sunrise_alignment_error(daytime_mask_left_aligned): + with pytest.raises(ValueError, + match=("No valid data alignment given. Please pass 'L'" + " for left-aligned data, 'R' for " + "right-aligned data, or 'C' for " + "center-aligned data.")): + daytime.get_sunrise(daytime_mask_left_aligned, + data_alignment='M') + + +def test_sunset_alignment_error(daytime_mask_left_aligned): + with pytest.raises(ValueError, + match=("No valid data alignment given. Please pass 'L'" + " for left-aligned data, 'R' for " + "right-aligned data, or 'C' for " + "center-aligned data.")): + daytime.get_sunset(daytime_mask_left_aligned, + data_alignment='M') + + +def test_consistent_modeled_midday_series(daytime_mask_right_aligned, + daytime_mask_left_aligned, + daytime_mask_center_aligned, + modeled_midday_series): + # Get sunrise and sunset times for each time series (left, right, center), + # calculate the midday point, compared to modeled midday, + # and compare that it's consistent across all three series + # Right-aligned data + right_sunset = daytime.get_sunset(daytime_mask_right_aligned, + data_alignment='R') + right_sunrise = daytime.get_sunrise(daytime_mask_right_aligned, + data_alignment='R') + midday_series_right = right_sunrise + ((right_sunset - right_sunrise)/2) + midday_series_right.index = midday_series_right.index.date + midday_diff_right = (modeled_midday_series - + midday_series_right.drop_duplicates()) + # Left-aligned data + left_sunset = daytime.get_sunset(daytime_mask_left_aligned, + data_alignment='L') + left_sunrise = daytime.get_sunrise(daytime_mask_left_aligned, + data_alignment='L') + midday_series_left = left_sunrise + ((left_sunset - left_sunrise)/2) + midday_series_left.index = midday_series_left.index.date + midday_diff_left = (modeled_midday_series - + midday_series_left.drop_duplicates()) + # Center-aligned data + center_sunset = daytime.get_sunset(daytime_mask_center_aligned, + data_alignment='C') + center_sunrise = daytime.get_sunrise(daytime_mask_center_aligned, + data_alignment='C') + midday_series_center = center_sunrise + ((center_sunset - + center_sunrise)/2) + midday_series_center.index = midday_series_center.index.date + midday_diff_center = (modeled_midday_series - + midday_series_center.drop_duplicates()) + assert (midday_diff_right.equals(midday_diff_left) & + midday_diff_center.equals(midday_diff_right)) + # Assert that the difference between modeled midday for midday + # center-aligned data (and consequently left- and right-aligned, + # which are asserted above as identical to center-aligned data) is less + # than 10 minutes/600 seconds (this threshold was generally considered + # noise in the time shift detection paper). + assert all(midday_diff_center.dt.total_seconds().abs() <= 600)