Skip to content

Commit

Permalink
reindex (#113)
Browse files Browse the repository at this point in the history
* reindex

* reindex tests
  • Loading branch information
rwijtvliet authored Nov 5, 2024
1 parent 1b5e94a commit 3807266
Show file tree
Hide file tree
Showing 7 changed files with 217 additions and 29 deletions.
20 changes: 20 additions & 0 deletions docs/core/pfline.rst
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,25 @@ Another slicing method is implemented with the ``.slice[]`` property. The improv
print(pfl.slice['2024':'2026'])
# --- hide: stop ---

Reindexing
----------

A portfolio line can be reindexed with ``.index()``, using another index, e.g. of another portfolio line. This returns a new portfolio line, with the specified index. Any timestamps that were not present in the original object are filled with "zero" (as applicable).

.. exec_code::

# --- hide: start ---
import portfolyo as pf, pandas as pd
index = pd.date_range('2024', freq='YS', periods=3)
input_df = pd.DataFrame({'w':[200, 220, 300], 'p': [100, 150, 200]}, index)
pfl = pf.PfLine(input_df)
# --- hide: stop ---
# continuation of previous code example
index2 = pd.date_range('2025', freq='YS', periods=3)
pfl.reindex(index2) # 2024 is dropped; 2025 and 2026 are kept; 2027 is new (0)
# --- hide: start ---
print(pfl.reindex(index2))
# --- hide: stop ---


Concatenation
Expand Down Expand Up @@ -390,6 +408,8 @@ General remarks:
# --- hide: start ---
print(repr(pfl_1 == pfl_2 == pfl_3 == pfl_4))

* If two portfolio lines span distinct periods, only their overlap is kept. If instead we want to keep all timestamps, e.g., when adding a portfolio line which spans a quarter to one that spans a year (with the same frequency, e.g. hourly), first use the ``.reindex()`` method on the former with the index of the latter. The values outside the specified quarter are filled with "zero" values as is applicable to the kind of portfolio line under consideration.

* A single value is understood to apply uniformly to each timestamp in the index of the portfolio line.

* When doing arithmatic with a flat portfolio line, the result is again a flat portfolio line.
Expand Down
66 changes: 41 additions & 25 deletions portfolyo/core/pfline/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,11 @@ def flatten(self) -> PfLine:
"""Return flattened instance, i.e., without children."""
...

@abc.abstractmethod
def reindex(self, index: pd.DatetimeIndex):
"""Reindex and fill any new values with zero (where applicable)."""
...

@abc.abstractmethod
def po(
self: PfLine, peak_fn: tools.peakfn.PeakFunction, freq: str = "MS"
Expand Down Expand Up @@ -221,43 +226,46 @@ def __eq__(self, other) -> bool:

class VolumePfLine:
kind = Kind.VOLUME
w: pd.Series = property(lambda self: self.df["w"])
q: pd.Series = property(lambda self: self.df["q"])
p: pd.Series = series_property_raising_typeerror("price")
r: pd.Series = series_property_raising_typeerror("revenue")
volume: VolumePfLine = property(lambda self: self)
price: PricePfLine = series_property_raising_typeerror("price")
revenue: RevenuePfLine = series_property_raising_typeerror("revenue")
w = property(lambda self: self.df["w"])
q = property(lambda self: self.df["q"])
p = series_property_raising_typeerror("price")
r = series_property_raising_typeerror("revenue")
volume = property(lambda self: self)
price = series_property_raising_typeerror("price")
revenue = series_property_raising_typeerror("revenue")


class PricePfLine:
kind = Kind.PRICE
w: pd.Series = series_property_raising_typeerror("volume")
q: pd.Series = series_property_raising_typeerror("volume")
p: pd.Series = property(lambda self: self.df["p"])
r: pd.Series = series_property_raising_typeerror("revenue")
volume: VolumePfLine = series_property_raising_typeerror("volume")
price: PricePfLine = property(lambda self: self)
revenue: RevenuePfLine = series_property_raising_typeerror("revenue")
w = series_property_raising_typeerror("volume")
q = series_property_raising_typeerror("volume")
p = property(lambda self: self.df["p"])
r = series_property_raising_typeerror("revenue")
volume = series_property_raising_typeerror("volume")
price = property(lambda self: self)
revenue = series_property_raising_typeerror("revenue")


class RevenuePfLine:
kind = Kind.REVENUE
w: pd.Series = series_property_raising_typeerror("volume")
q: pd.Series = series_property_raising_typeerror("volume")
p: pd.Series = series_property_raising_typeerror("price")
r: pd.Series = property(lambda self: self.df["r"])
volume: VolumePfLine = series_property_raising_typeerror("volume")
price: PricePfLine = series_property_raising_typeerror("price")
revenue: RevenuePfLine = property(lambda self: self)
w = series_property_raising_typeerror("volume")
q = series_property_raising_typeerror("volume")
p = series_property_raising_typeerror("price")
r = property(lambda self: self.df["r"])
volume = series_property_raising_typeerror("volume")
price = series_property_raising_typeerror("price")
revenue = property(lambda self: self)


class CompletePfLine:
kind = Kind.COMPLETE
w: pd.Series = property(lambda self: self.df["w"])
q: pd.Series = property(lambda self: self.df["q"])
p: pd.Series = property(lambda self: self.df["p"])
r: pd.Series = property(lambda self: self.df["r"])
w = property(lambda self: self.df["w"])
q = property(lambda self: self.df["q"])
p = property(lambda self: self.df["p"])
r = property(lambda self: self.df["r"])
# volume => on child clasess
# price => on child clasess
# revenue => on child classes


class FlatPfLine:
Expand All @@ -269,6 +277,7 @@ class FlatPfLine:
hedge_with = flat_methods.hedge_with
loc = flat_methods.loc
slice = flat_methods.slice
reindex = flat_methods.reindex
__getitem__ = flat_methods.__getitem__
# __bool__ => on child classes
__eq__ = flat_methods.__eq__
Expand All @@ -283,6 +292,7 @@ class NestedPfLine(children.ChildFunctionality):
hedge_with = nested_methods.hedge_with
loc = nested_methods.loc
slice = nested_methods.slice
reindex = nested_methods.reindex
__bool__ = nested_methods.__bool__
__eq__ = nested_methods.__eq__

Expand Down Expand Up @@ -418,6 +428,12 @@ def asfreq(self, freq: str = "MS") -> FlatCompletePfLine:
newdf["p"] = newdf["r"] / newdf["q"]
return FlatCompletePfLine(newdf)

def reindex(self, index: pd.DatetimeIndex) -> FlatCompletePfLine:
tools.testing.assert_indices_compatible(self.index, index)
newdf = self.df[["w", "q", "r"]].reindex(index, fill_value=0)
newdf["p"] = newdf["r"] / newdf["q"]
return FlatCompletePfLine(newdf)

def __bool__(self) -> bool:
return not (
np.allclose(self.df["w"].pint.magnitude, 0.0)
Expand Down
6 changes: 6 additions & 0 deletions portfolyo/core/pfline/flat_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,12 @@ def slice(self: FlatPfLine) -> SliceIndexer:
return SliceIndexer(self)


def reindex(self: FlatPfLine, index: pd.DatetimeIndex) -> FlatPfLine:
tools.testing.assert_indices_compatible(self.index, index)
newdf = self.df.reindex(index, fill_value=0)
return self.__class__(newdf)


class LocIndexer:
"""Helper class to obtain FlatPfLine instance, whose index is subset of original index."""

Expand Down
5 changes: 5 additions & 0 deletions portfolyo/core/pfline/nested_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ def slice(self: NestedPfLine) -> SliceIndexer:
return SliceIndexer(self)


def reindex(self: NestedPfLine, index: pd.DatetimeIndex) -> NestedPfLine:
newchildren = {name: child.reindex(index) for name, child in self.pfl.items()}
return self.pfl.__class__(newchildren)


class LocIndexer:
"""Helper class to obtain NestedPfLine instance, whose index is subset of original index."""

Expand Down
2 changes: 1 addition & 1 deletion portfolyo/core/shared/ndframelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def slice(self):

@abc.abstractmethod
def dataframe(
self, cols: Iterable[str] = None, has_units: bool = True, *args, **kwargs
self, cols: Iterable[str] | None = None, has_units: bool = True, *args, **kwargs
) -> pd.DataFrame:
"""DataFrame for portfolio line in default units.
Expand Down
9 changes: 6 additions & 3 deletions portfolyo/tools/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,17 @@ def assert_series_equal(left: pd.Series, right: pd.Series, *args, **kwargs):


def assert_indices_compatible(left: pd.DatetimeIndex, right: pd.DatetimeIndex):
"""Assert that indices are compatible, i.e., with equal frequency, start-of-day, and timezone."""
if (lf := left.freq) != (r := right.freq):
raise AssertionError(f"Indices have unequal frequency: {lf} and {r}.")
if (lf := left[0].time()) != (r := right[0].time()):
raise AssertionError(f"Indices that have unequal start-of-day; {lf} and {r}.")
if (lt := left[0].time()) != (rt := right[0].time()):
raise AssertionError(f"Indices that have unequal start-of-day; {lt} and {rt}.")
if (lz := left.tz) != (rz := right.tz):
raise AssertionError(f"Indices that have unequal timezone; {lz} and {rz}.")


def assert_w_q_compatible(freq: str, w: pd.Series, q: pd.Series):
"""Assert if timeseries with power- and energy-values are consistent."""
"""Assert that timeseries with power- and energy-values are consistent."""
if freq == "15min":
assert_series_equal(q, w * tools_unit.Q_(0.25, "h"), check_names=False)
elif freq == "h":
Expand Down
138 changes: 138 additions & 0 deletions tests/core/pfline/test_pfline_reindex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import portfolyo as pf
import numpy as np
import pytest
import pandas as pd

idx = pd.date_range("2020", freq="MS", periods=3)
idx2 = pd.date_range("2020-02", freq="MS", periods=3)
s = pd.Series([10, 20, 30], idx)
s2 = pd.Series([20, 30, 0], idx2)


@pytest.mark.parametrize(
"pfl,idx,expected",
[
(pf.PfLine(s.astype("pint[MWh]")), idx2, pf.PfLine(s2.astype("pint[MWh]"))),
(pf.PfLine(s.astype("pint[MW]")), idx2, pf.PfLine(s2.astype("pint[MW]"))),
(pf.PfLine(s.astype("pint[Wh]")), idx2, pf.PfLine(s2.astype("pint[Wh]"))),
],
)
def test_pfl_reindex(pfl: pf.PfLine, idx: pd.DatetimeIndex, expected: pf.PfLine):
assert pfl.reindex(idx) == expected


KIND_AND_UNITS = {
pf.Kind.VOLUME: ["MWh", "tce/h"],
pf.Kind.PRICE: ["Eur/MWh", "ctEur/tce"],
pf.Kind.REVENUE: ["Eur", "kEur"],
}


@pytest.fixture(scope="session", params=["h", "15min", "D", "MS", "QS", "YS"])
def freq(request):
return request.param


@pytest.fixture(scope="session", params=[None, "Europe/Berlin", "Asia/Kolkata"])
def tz(request):
return request.param


@pytest.fixture(scope="session", params=[((10, 20, 30), (20, 30, 0))])
def floats_inout(request):
return request.param


@pytest.fixture(scope="session", params=pf.Kind)
def kind(request):
return request.param


@pytest.fixture(scope="session", params=[0, 1])
def units(request, kind):
# returns 1 unit (non-complete) or 2 units (complete).
i = request.param
if kind is pf.Kind.COMPLETE:
return (KIND_AND_UNITS[pf.Kind.VOLUME][i], KIND_AND_UNITS[pf.Kind.REVENUE][i])
else:
return (KIND_AND_UNITS[kind][i],)


@pytest.fixture
def valuecount(freq):
if freq == "h":
return 96 # must be full number of days
if freq == "15min":
return 96 * 4 # must be full number of days
return 3


@pytest.fixture
def diff(freq):
if freq == "h":
return 24 # must be full number of days
if freq == "15min":
return 96 # must be full number of days
return 1


@pytest.fixture
def index_in(freq, tz, valuecount):
return pd.date_range("2020", freq=freq, periods=valuecount, tz=tz)


@pytest.fixture
def index_out(freq, tz, valuecount, diff):
return pd.date_range("2020", freq=freq, periods=valuecount + diff, tz=tz)[diff:]


@pytest.fixture
def floats_in(valuecount):
return np.arange(1, valuecount + 1) # 1, 2, 3, ..., valuecount


@pytest.fixture
def floats_out(valuecount, diff):
return np.array(
[*np.arange(diff + 1, valuecount + 1), *np.zeros(diff)]
) # 1+diff, 2+diff, ..., valuecount, 0, 0, .., 0


@pytest.fixture
def pfl_in(floats_in, index_in, units):
data = [
pd.Series(floats_in * (i * 10 + 1), index_in).astype(f"pint[{u}]")
for i, u in enumerate(units)
]
return pf.PfLine(*data)


@pytest.fixture
def pfl_out(floats_out, index_out, units):
data = [
pd.Series(floats_out * (i * 10 + 1), index_out).astype(f"pint[{u}]")
for i, u in enumerate(units)
]
return pf.PfLine(*data)


@pytest.fixture
def pfl(pfl_in):
return pfl_in


@pytest.fixture
def index(index_out):
return index_out


@pytest.fixture
def expected(pfl_out):
return pfl_out


def test_reindex_flatpfline(
pfl: pf.PfLine, index: pd.DatetimeIndex, expected: pf.PfLine
):
result = pfl.reindex(index)
assert result == expected

0 comments on commit 3807266

Please sign in to comment.