Skip to content

Commit

Permalink
added general query methods (#2)
Browse files Browse the repository at this point in the history
* fixed typo

* general_pfl method

Co-authored-by: Ruud Wijtvliet <[email protected]>
  • Loading branch information
rwijtvliet and rwijtvliet-lb authored Sep 21, 2022
1 parent 8210793 commit 88eb277
Show file tree
Hide file tree
Showing 5 changed files with 151 additions and 19 deletions.
2 changes: 1 addition & 1 deletion belvys/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from . import _version, adjustment
from .api import Api
from .structure import Structure
from .structure import Structure, TsTree, Ts
from .tenant import Tenant
from .example import (
example_structure,
Expand Down
70 changes: 66 additions & 4 deletions belvys/api.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
"""Get data from Belvis rest api."""

from __future__ import annotations
from dataclasses import dataclass

import datetime as dt
import json
import pathlib
import urllib
from dataclasses import dataclass
from typing import Callable, Dict, List, Union

import numpy as np
import pandas as pd
import requests
import yaml

from .common import print_status


Expand Down Expand Up @@ -339,11 +340,13 @@ def find_tsid(self, pfid: str, tsname: str) -> int:
# Raise error if 0 or > 1 found.
if len(hits) == 0:
raise ValueError(
f"No timeseries with exact name '{tsname}' found in portfolio '{pfid}'. Check if ``pfid`` and ``tsname`` are correct."
f"No timeseries with exact name '{tsname}' found in portfolio '{pfid}'."
" Check if ``pfid`` and ``tsname`` are correct."
)
elif len(hits) > 1:
raise ValueError(
f"Multiple timeseries with exact name '{tsname}' found in portfolio '{pfid}'. Check your Belvis instance."
f"Multiple timeseries with exact name '{tsname}' found in portfolio '{pfid}'."
" Check your Belvis instance."
)
return next(iter(hits.values()))

Expand All @@ -358,7 +361,7 @@ def series(
missing2zero: bool = True,
blocking: bool = True,
) -> pd.Series:
"""Return timeseries in given delivery time interval.
"""Return timeseries in given delivery time interval using its id.
Parameters
----------
Expand Down Expand Up @@ -411,3 +414,62 @@ def series(
s = pd.Series(df["v"].to_list(), pd.DatetimeIndex(df["ts"]), f"pint[{unit}]")
s.index.freq = pd.infer_freq(s.index)
return s

def series_from_tsname(
self,
pfid: str,
tsname: str,
ts_left: Union[pd.Timestamp, dt.datetime],
ts_right: Union[pd.Timestamp, dt.datetime],
*,
leftrange: str = "exclusive",
rightrange: str = "inclusive",
missing2zero: bool = True,
blocking: bool = True,
) -> pd.Series:
"""Return timeseries in given delivery time interval, using its name and portfolio
id.
Parameters
----------
pfid : int
ID (=short name) of portfolio in Belvis.
tsname : str
Name of the timeseries. Must be exact.
ts_left : Union[pd.Timestamp, dt.datetime]
ts_right : Union[pd.Timestamp, dt.datetime]
leftrange : str, optional (default: 'exclusive')
'inclusive' ('exclusive') to get values with timestamp that is >= (>) ts_left.
Default: 'exclusive' because timestamps in Belvis are *usually* right-bound.
rightrange : str, optional (default: 'inclusive')
'inclusive' ('exclusive') to get values with timestamp that is <= (<) ts_right.
Default: 'inclusive' because timestamps in Belvis are *usually* right-bound.
missing2zero : bool, optional (default: True)
What to do with values that are flagged as 'missing'. True to replace with 0,
False to replace with nan.
blocking : bool, optional (default: True)
If True, recalculates data that is not up-to-date before returning; might take
long time or result in internal-server-error. If False, return most up-to-date
data that is available without recalculating.
Returns
-------
pd.Series
with resulting information.
Notes
-----
- Returns series with data as found in Belvis; no correction (e.g. for right-bounded
timestamps) done.
- If not yet cached, the ``.series()`` method is potentially a lot faster.
"""
tsid = self.find_tsid(pfid, tsname)
return self.series(
tsid,
ts_left,
ts_right,
leftrange=leftrange,
rightrange=rightrange,
missing2zero=missing2zero,
blocking=blocking,
)
43 changes: 43 additions & 0 deletions belvys/tenant.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,49 @@ def _pfline(self, ts_tree: TsTree) -> pf.PfLine:
pfl = pf.PfLine(data)
return pfl.asfreq(self.structure.freq)

def general_pfl(
self,
pfid: str,
tsname: str,
ts_left: Union[str, pd.Timestamp, dt.datetime] = None,
ts_right: Union[str, pd.Timestamp, dt.datetime] = None,
missing2zero: bool = True,
debug: bool = False,
) -> pf.PfLine:
"""Retrieve a portfolio line with portfolio-specific volume and/or price data
from Belvis, without using the structure Use if wanted timeseries is not specified in the structure file.
Parameters
----------
pfid : str
Id of portfolio as found in Belvis. Must be original.
tsname : str
Name of the timeseries. Must be exact.
ts_left : Union[str, pd.Timestamp, dt.datetime], optional
ts_right : Union[str, pd.Timestamp, dt.datetime], optional
Start and end of delivery period. If both omitted, uses the front year. If
one omitted, uses the start of the (same or following) year.
missing2zero : bool, optional (default: True)
What to do with values that are flagged as 'missing'. True to replace with 0,
False to replace with nan.
debug : bool, optional (default: False)
If True, stops after fetching timeseries data from api; before applying the
aftercare functions.
Returns
-------
pf.PfLine
"""
# Fix timestamps.
ts_left, ts_right = pf.ts_leftright(ts_left, ts_right)
# Get ts tree and fetch data.
ts_tree = Ts(pfid, tsname)
self.api.series(ts_tree, ts_left, ts_right, missing2zero=missing2zero)
if debug:
return ts_tree
# Turn into portfolio line.
return self._pfline(ts_tree)

def portfolio_pfl(
self,
pfid: str,
Expand Down
11 changes: 8 additions & 3 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -106,15 +106,20 @@ A few things to note here about the data as it is returned by the Belvis API:

* Timestamps are localized to the UTC timezone. A conversion to the correct (in this case "Europe/Berlin") timezone is necessary.

* Timestamps are right-bound. The final timestamp is ``2022-09-29 23:00:00+00:00``, which is the same as ``2022-09-30 01:00:00+02:00`` in the Europe/Berlin timezone: the first hour of 2022-09-30 is denoted with the 01:00 o'clock timestamp, which is when that hour *ends*, not when it starts.
* Timestamps are right-bound. In the example above, the first timestamp is ``2022-09-29 23:00:00+00:00``, which is the same as ``2022-09-30 01:00:00+02:00`` in the Europe/Berlin timezone. The first hour of 2022-09-30 is thus denoted with the 01:00 o'clock timestamp, which is when that hour *ends*, not when it starts.

* A peculiarity of the gas market can also be seen: daily values do not apply from midnight to midnight, but rather from 06:00 to 06:00. The values change with the timestamp ``2022-10-01 05:00:00+00:00``, which is ``2022-10-01 07:00:00+02:00``, which in Belvis denotes the time period from 06:00 to 07:00 (see previous point).
* A peculiarity of the gas market can also be seen: daily values do not apply from midnight to midnight, but rather from 06:00 to 06:00. The values change with the timestamp ``2022-10-01 05:00:00+00:00``, which is ``2022-10-01 07:00:00+02:00`` in the Europe/Berlin timezone, which in Belvis denotes the hour starting at 06:00 (see previous point).

More series
-----------

For convenience, the method ``.series_from_tsname()`` combines looking up the timeseries id with fetching the data. It is a thin wrapper around ``.series()``.

--------------
Timeseries IDs
--------------

For the methods above, the timeseries ID is needed. This is a number uniquely identifying a timeseries (including the portfolio that contains it) in the Belvis database.
For most methods above, the timeseries ID is needed. This is a number uniquely identifying a timeseries (including the portfolio that contains it) in the Belvis database.

In order to find the ``tsid``, several methods are available, depending on how much information is known about the timeseries.

Expand Down
44 changes: 33 additions & 11 deletions docs/tenant.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,7 @@ A tenant can be initialised using a ``Structure`` and an ``Api`` instance. Conti
Usage
-----

The most important methods are ``.portfolio_pfl()`` and ``.price_pfl()``. Both return a ``portfolyo.PfLine`` (see `here <portfolyo.readthedocs.io>`_) instance. Here we see how they are used to get data for the 3-day (left-closed) time period from midnight 2022-09-05 until midnight 2022-09-08:

.. code-block:: python
# Continuation of previous example.
import pandas as pd
ts_left = pd.Timestamp('2022-09-05')
ts_right = pd.Timestamp('2022-09-08')
The most important methods are ``.portfolio_pfl()``, ``.price_pfl()``, and ``general_pfl()``. All return a ``portfolyo.PfLine`` (see `here <portfolyo.readthedocs.io>`_) instance. Here we see how they are used to get data for the 3-day (left-closed) time period from midnight 2022-09-05 until midnight 2022-09-08.

Portfolio data
--------------
Expand All @@ -48,7 +40,7 @@ Portfolio data
.. code-block:: python
# Continuation of previous example.
offtake = tenant.portfolio_pfl("B2C_household", "current_offtake", ts_left, ts_right)
offtake = tenant.portfolio_pfl("B2C_household", "current_offtake", "2022-09-05", "2022-09-08")
offtake
.. code-block:: text
Expand Down Expand Up @@ -78,7 +70,7 @@ Prices
.. code-block:: python
# Continuation of previous example.
prices = tenant.price_pfl("fwc_monthly_DE", ts_left, ts_right)
prices = tenant.price_pfl("fwc_monthly_DE", "2022-09-05", "2022-09-08")
prices
.. code-block:: text
Expand All @@ -101,6 +93,36 @@ Prices
2022-09-07 23:45:00 +0200 295.72
General
-------

For convenience, the method ``.general_pfl()`` exists. It can be used to fetch data using the timeseries name. This is useful if data is needed from a timeseries that is *not* specified in the ``Structure`` instance.

.. code-block:: python
# Continuation of previous example.
churn = tenant.general_pfl("B2C_Household", "Expected churn in MW", "2022-09-05", "2022-09-08")
churn
.. code-block:: text
PfLine object with volume information.
. Timestamps: first: 2022-09-05 00:00:00+02:00 timezone: Europe/Berlin
last: 2022-09-07 23:45:00+02:00 freq: <15 * Minutes> (288 datapoints)
w q
MW MWh
2022-09-05 00:00:00 +0200 -9.9 -2.5
2022-09-05 00:15:00 +0200 -9.8 -2.5
2022-09-05 00:30:00 +0200 -9.7 -2.4
2022-09-05 00:45:00 +0200 -9.5 -2.4
2022-09-05 01:00:00 +0200 -9.5 -2.4
.. .. ..
2022-09-07 23:00:00 +0200 -11.2 -2.8
2022-09-07 23:15:00 +0200 -10.9 -2.7
2022-09-07 23:30:00 +0200 -10.7 -2.7
2022-09-07 23:45:00 +0200 -10.5 -2.6
-----
Cache
Expand Down

0 comments on commit 88eb277

Please sign in to comment.