Skip to content

Commit

Permalink
added examples and suggested edits
Browse files Browse the repository at this point in the history
  • Loading branch information
qnguyen345 committed Aug 19, 2024
1 parent cbd9a6c commit 4dcf2a2
Show file tree
Hide file tree
Showing 7 changed files with 240 additions and 74 deletions.
18 changes: 18 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,24 @@ API Reference
Quality
=======

Energy
-----------

Functions for checking if an energy data stream is cumulative
or not.
.. autosummary::
:toctree: generated/

quality.energy.cumulative_energy_simple_diff_check
quality.energy.cumulative_energy_avg_diff_check

If the energy data stream passes the checks, then
it is converted to non-cumulative energy data stream via simple or
average differencing
.. autosummary::
:toctree: generated/
quality.energy.convert_cumulative_energy

Data Shifts
-----------

Expand Down
69 changes: 69 additions & 0 deletions docs/examples/energy/cumulative-energy-simple-avg-check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""
Cumulative Energy via Average Difference
=======================================
Check and correct energy series for cumulative energy with average differencing.
"""

# %%
# AC energy data streams are often cumulative, meaning they increase
# over time. These energy streams need to be corrected into a form
# that resembles regular, non-cumulative data. This process involves
# applying a percent threshold to the average difference in subsequent values
# to check if the data is always increasing. If it passes,
# that check, then the data stream is cumulative and it can be corrected
# via average differencing.

import os
import pandas as pd
import matplotlib.pyplot as plt
from pvanalytics.quality import energy

# %%
# First, read in the ac energy data. This data set contains one week of
# 10-minute ac energy data.

script_directory = os.path.dirname(__file__)
energy_filepath = os.path.join(
script_directory,
"../../../pvanalytics/data/system_10004_ac_energy.csv")
data = pd.read_csv(energy_filepath)
energy_series = data['ac_energy_inv_16425']

# %%
# Now check if the energy time series is cumulative via average differencing.
# This is done using the
# :py:func:`pvanalytics.quality.energy.cumulative_energy_avg_diff_check`
# function.

is_cumulative = energy.cumulative_energy_avg_diff_check(energy_series)

# %%
# If the energy series is cumulative,, then it can be converted to
# non-cumulative energy series via average differencing.
corrected_energy_series = 0.5 * \
(energy_series.diff().shift(-1) + energy_series.diff())

# %%
# Plot the original, cumulative energy series.

data.plot(x="local_measured_on", y='ac_energy_inv_16425')
plt.title("Cumulative Energy Series")
plt.xticks(rotation=45)
plt.xlabel("Datetime")
plt.ylabel("AC Energy (kWh)")
plt.show()

# %%
# Plot the corrected, non-cumulative energy series.

corrected_energy_df = pd.DataFrame({"local_measured_on": data["local_measured_on"],
"corrected_ac_energy_inv_16425": corrected_energy_series})
corrected_energy_df = corrected_energy_df[corrected_energy_df["corrected_ac_energy_inv_16425"] >= 0]
corrected_energy_df.plot(x="local_measured_on",
y="corrected_ac_energy_inv_16425")
plt.title("Corrected, Non-cumulative Energy Series")
plt.xticks(rotation=45)
plt.xlabel("Datetime")
plt.ylabel("AC Energy (kWh)")
plt.show()
69 changes: 69 additions & 0 deletions docs/examples/energy/cumulative-energy-simple-diff-check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""
Cumulative Energy via Simple Difference
=======================================
Check and correct energy series for cumulative energy with simple differencing.
"""

# %%
# AC energy data streams are often cumulative, meaning they increase
# over time. These energy streams need to be corrected into a form
# that resembles regular, non-cumulative data. This process involves
# applying a percent threshold to the difference in subsequent values
# to check if the data is always increasing. If it passes,
# that check, then the data stream is cumulative and it can be corrected
# via simple differencing.

import os
import pandas as pd
import matplotlib.pyplot as plt
from pvanalytics.quality import energy

# %%
# First, read in the ac energy data. This data set contains one week of
# 10-minute ac energy data.

script_directory = os.path.dirname(__file__)
energy_filepath = os.path.join(
script_directory,
"../../../pvanalytics/data/system_10004_ac_energy.csv")
data = pd.read_csv(energy_filepath)
energy_series = data['ac_energy_inv_16425']

# %%
# Now check if the energy time series is cumulative via simple differencing.
# This is done using the
# :py:func:`pvanalytics.quality.energy.cumulative_energy_simple_diff_check`
# function.

is_cumulative = energy.cumulative_energy_simple_diff_check(energy_series)

# %%
# If the energy series is cumulative, then it can be converted
# to the non-cumulative energy series. This is done using the
# :py:func:`pvanalytics.quality.energy.convert_cumulative_energy` function.
corrected_energy_series = energy.convert_cumulative_energy(energy_series)

# %%
# Plot the original, cumulative energy series.

data.plot(x="local_measured_on", y='ac_energy_inv_16425')
plt.title("Cumulative Energy Series")
plt.xticks(rotation=45)
plt.xlabel("Datetime")
plt.ylabel("AC Energy (kWh)")
plt.show()

# %%
# Plot the corrected, non-cumulative energy series.

corrected_energy_df = pd.DataFrame({"local_measured_on": data["local_measured_on"],
"corrected_ac_energy_inv_16425": corrected_energy_series})
corrected_energy_df = corrected_energy_df[corrected_energy_df["corrected_ac_energy_inv_16425"] >= 0]
corrected_energy_df.plot(x="local_measured_on",
y="corrected_ac_energy_inv_16425")
plt.title("Corrected, Non-cumulative Energy Series")
plt.xticks(rotation=45)
plt.xlabel("Datetime")
plt.ylabel("AC Energy (kWh)")
plt.show()
16 changes: 14 additions & 2 deletions docs/whatsnew/0.2.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,17 @@
Enhancements
~~~~~~~~~~~~
* Compatibility with numpy 2.0. (:pull:`211`)
* Added function :py:func:`~pvanalytics.quality.energy.cumulative_energy_simple_diff_check` to
check if an energy time series is cumulative via simple differencing.
(:issue:`165`, :pull:`212`)
* Added function :py:func:`~pvanalytics.quality.energy.cumulative_energy_avg_diff_check` to
check if an energy time series is cumulative via average differencing.
(:issue:`165`, :pull:`212`)
* Added function :py:func:`~pvanalytics.quality.energy.convert_cumulative_energy` to correct
the cumulative energy to interval-based, non-cumulative energy series, if the energy series
passes the simple difference :py:func:`~pvanalytics.quality.energy.cumulative_energy_simple_diff_check`
or average difference :py:func:`~pvanalytics.quality.energy.cumulative_energy_avg_diff_check`
checks. (:issue:`165`, :pull:`212`)


Bug Fixes
Expand All @@ -20,12 +31,13 @@ Requirements

Documentation
~~~~~~~~~~~~~

* Added examples for checking cumulative energy time series. (:issue:`165`, :pull:`212`)

Testing
~~~~~~~

* Added testing for ``pvanalytics.quality.energy``. (:issue:`165`, :pull:`212`)


Contributors
~~~~~~~~~~~~
* Quyen Nguyen (:ghuser:`qnguyen345`)
File renamed without changes.
94 changes: 52 additions & 42 deletions pvanalytics/quality/energy.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@
def cumulative_energy_simple_diff_check(energy_series,
pct_increase_threshold=95,
system_self_consumption=-0.5):
"""Check if an energy time series has cumulative energy or not.
"""Check if an energy time series is cumulative via simple differencing.
The check uses the simple diff function .diff().
To determine if an energy data stream is cumulative, subsequent values in
the series are differenced to determine if the data stream is consistently
increasing. If the percentage of increasing values in the data set exceeds
the pct_increase_threshold parameter, the energy series is determined as
cumulative and a True boolean is returned. Otherwise, False is returned.
Parameters
----------
energy_series : Pandas series with datetime index.
energy_series: Series
Time series of energy data stream with datetime index.
pct_increase_threshold: Int, default 95
The percentage threshold to consider the energy series as cumulative.
Expand Down Expand Up @@ -46,13 +50,18 @@ def cumulative_energy_simple_diff_check(energy_series,
def cumulative_energy_avg_diff_check(energy_series,
pct_increase_threshold=95,
system_self_consumption=-0.5):
"""Check if an energy time series has cumulative energy or not.
"""Check if an energy time series is cumulative via average differencing.
The check uses the average difference.
To determine if an energy data stream is cumulative, subsequent values in
the series are average differenced to determine if the data stream is
consistently increasing. If the percentage of increasing values in the
data set exceeds the pct_increase_threshold parameter, the energy series
is determined as cumulative and a True boolean is returned.
Otherwise, False is returned.
Parameters
----------
energy_series : Pandas series with datetime index.
energy_series: Series
Time series of energy data stream with datetime index.
pct_increase_threshold: int, default 95
The percentage threshold to consider the energy series as cumulative.
Expand Down Expand Up @@ -85,13 +94,19 @@ def cumulative_energy_avg_diff_check(energy_series,
return False


def check_cumulative_energy(energy_series, pct_increase_threshold=95,
system_self_consumption=-0.5):
"""Run the cumulative energy check for simple and averaged difference.
def convert_cumulative_energy(energy_series, pct_increase_threshold=95,
system_self_consumption=-0.5):
"""Convert cumulative to interval-based, non-cumulative energy, if needed.
Two main test are run to determine if the associated energy
data stream is cumulative or not: a simple differencing function is run
on the series via cumulative_energy_simple_diff_check, and an
average differencing function is run on the series via
cumulative_energy_avg_diff_check.
Parameters
----------
energy_series : Pandas series with datetime index.
energy_series: Series
Time series of energy data stream with datetime index.
pct_increase_threshold: int, default 95
The percentage threshold to consider the energy series as cumulative.
Expand All @@ -101,37 +116,32 @@ def check_cumulative_energy(energy_series, pct_increase_threshold=95,
Returns
-------
Tuple
(simple_diff_energy_series, avg_diff_energy_series, cumulative_energy)
simple_diff_energy_series: Pandas series with datetime index.
The differenced energy series using the simple .diff() function if
energy is cumulative; otherwise, it remains as the original,
noncumulative energy series.
avg_diff_energy_series: Pandas series with datetime index.
The averaged difference energy series using the averaged difference
method if energy is cumulative; otherwise, it remains as the
original, noncumulative energy series.
cumulative_energy: Boolean
True if energy series is cumulative, False otherwise.
Series
corrected_energy_series is retuned if the energy series is cumulative.
If the energy series passes the simple difference check, then the
the series is corrected via the simple differencing. Else, if
energy series passes the average difference check, then the series is
corrected via average differencing.
If neither checks are passes, then the original non-cumulative
energy_series is returned.
"""
# Check if energy series is cumulative for both simple and averaged diff
cumulative_energy = (
cumulative_energy_simple_diff_check(energy_series,
pct_increase_threshold,
system_self_consumption) and
cumulative_energy_avg_diff_check(energy_series,
pct_increase_threshold,
system_self_consumption))
if cumulative_energy:
# Adjust energy series if it is cumulative
simple_diff_energy_series = energy_series.diff()
avg_diff_energy_series = 0.5 * \
(simple_diff_energy_series.shift(-1) + simple_diff_energy_series)
return (simple_diff_energy_series, avg_diff_energy_series,
cumulative_energy)
# Check if energy series is cumulative with simple difference and average
# difference
simple_diff_check = cumulative_energy_simple_diff_check(
energy_series, pct_increase_threshold, system_self_consumption)
avg_diff_check = cumulative_energy_avg_diff_check(energy_series,
pct_increase_threshold,
system_self_consumption)
if simple_diff_check:
# Return simple difference of energy series if it passes the simple
# difference check
corrected_energy_series = energy_series.diff()
return corrected_energy_series
elif avg_diff_check:
# Return average differnce of energy series if it passes the
# average difference check
corrected_energy_series = 0.5 * \
(energy_series.diff().shift(-1) + energy_series.diff())
return corrected_energy_series
else:
simple_diff_energy_series = energy_series
avg_diff_energy_series = energy_series
return (simple_diff_energy_series, avg_diff_energy_series,
cumulative_energy)
return energy_series
Loading

0 comments on commit 4dcf2a2

Please sign in to comment.