Skip to content

Commit

Permalink
Merge pull request #254 from google/variable-length-windows
Browse files Browse the repository at this point in the history
Variable window length
  • Loading branch information
ianspektor authored Sep 12, 2023
2 parents cc13aa4 + 76c55cd commit 2c4893d
Show file tree
Hide file tree
Showing 24 changed files with 1,214 additions and 513 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
run: poetry install --no-interaction

- name: Run tests
run: poetry run bazel test //temporian/...:all //docs/...:all //tools/...:all --test_output=errors
run: poetry run bazel test //temporian/...:all //docs/...:all //tools/...:all --test_output=errors --compilation_mode=dbg
# Note: Since mid-August 23, using "//...:all" incorrectly try to
# compile the directory "build" created during the installation of one
# of the dependency during "poetry install".
5 changes: 4 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
"prettier.tabWidth": 2,
"editor.detectIndentation": true,
"files.associations": {
"*.excalidraw": "json",
"*.excalidrawlib": "json",
"system_error": "cpp",
"type_traits": "cpp",
"xtr1common": "cpp",
Expand Down Expand Up @@ -92,7 +94,8 @@
"random": "cpp",
"string_view": "cpp",
"numbers": "cpp",
"any": "cpp"
"any": "cpp",
"bitset": "cpp"
},
"python.analysis.typeCheckingMode": "basic",
"jupyter.notebookFileRoot": "${fileDirname}",
Expand Down
2 changes: 2 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ bazel test //...:all

You can use the Bazel test flag `--test_output=streamed` to see the test logs in realtime.

If developing and testing C++ code, the `--compilation_mode=dbg` flag enables additional assertions that are otherwise disabled.

Note that these tests also include docstring examples, using the builtin `doctest` module.
See the [Adding code examples](#adding-code-examples) section for more information.

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: temporian.core.typing.WindowLength
151 changes: 103 additions & 48 deletions temporian/core/event_set_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,19 @@

# pylint: disable=import-outside-toplevel


from __future__ import annotations
from typing import Any, Dict, List, Optional, Union, TYPE_CHECKING

from temporian.core.data.duration import Duration


if TYPE_CHECKING:
from temporian.core.typing import EventSetOrNode, TypeOrDType, IndexKeyList
from temporian.core.typing import (
EventSetOrNode,
TypeOrDType,
IndexKeyList,
WindowLength,
)

T_SCALAR = (int, float)

Expand Down Expand Up @@ -1379,17 +1383,20 @@ def leak(self: EventSetOrNode, duration: Duration) -> EventSetOrNode:

def moving_count(
self: EventSetOrNode,
window_length: Duration,
window_length: WindowLength,
sampling: Optional[EventSetOrNode] = None,
) -> EventSetOrNode:
"""Gets the number of events in a sliding window.
Create a tp.int32 feature containing the number of events in the time
window (t - window_length, t].
If the `sampling` argument is not provided, outputs a timestamp for
each timestamp in `input`. If the `sampling` argument is provided,
outputs a timestamp for each timestamp in `sampling`.
`sampling` can't be specified if a variable `window_length` is
specified (i.e. if `window_length` is an EventSet).
If `sampling` is specified or `window_length` is an EventSet, the moving
window is sampled at each timestamp in them, else it is sampled on the
input's.
Example without sampling:
```python
Expand Down Expand Up @@ -1418,6 +1425,27 @@ def moving_count(
```
Example with variable window length:
```python
>>> a = tp.event_set(timestamps=[0, 1, 2, 5])
>>> b = tp.event_set(
... timestamps=[0, 3, 3, 3, 9],
... features={
... "w": [1, 0.5, 3.5, 2.5, 5],
... },
... )
>>> c = a.moving_count(window_length=b)
>>> c
indexes: []
features: [('count', int32)]
events:
(5 events):
timestamps: [0. 3. 3. 3. 9.]
'count': [1 0 3 2 1]
...
```
Example with index:
```python
>>> a = tp.event_set(
Expand Down Expand Up @@ -1459,7 +1487,7 @@ def moving_count(

def moving_max(
self: EventSetOrNode,
window_length: Duration,
window_length: WindowLength,
sampling: Optional[EventSetOrNode] = None,
) -> EventSetOrNode:
"""Computes the maximum in a sliding window over an
Expand All @@ -1469,11 +1497,16 @@ def moving_max(
returns at time t the max of non-nan values for the feature in the window
(t - window_length, t].
If `sampling` is provided samples the moving window's value at each
timestamp in `sampling`, else samples it at each timestamp in the input.
`sampling` can't be specified if a variable `window_length` is
specified (i.e. if `window_length` is an EventSet).
If the window does not contain any values (e.g., all the values are missing,
or the window does not contain any sampling), outputs missing values.
If `sampling` is specified or `window_length` is an EventSet, the moving
window is sampled at each timestamp in them, else it is sampled on the
input's.
If the window does not contain any values (e.g., all the values are
missing, or the window does not contain any sampling), outputs missing
values.
Example:
```python
Expand Down Expand Up @@ -1509,7 +1542,7 @@ def moving_max(

def moving_min(
self: EventSetOrNode,
window_length: Duration,
window_length: WindowLength,
sampling: Optional[EventSetOrNode] = None,
) -> EventSetOrNode:
"""Computes the minimum of values in a sliding window over an
Expand All @@ -1519,11 +1552,16 @@ def moving_min(
returns at time t the minimum of non-nan values for the feature in the window
(t - window_length, t].
If `sampling` is provided samples the moving window's value at each
timestamp in `sampling`, else samples it at each timestamp in the input.
`sampling` can't be specified if a variable `window_length` is
specified (i.e. if `window_length` is an EventSet).
If `sampling` is specified or `window_length` is an EventSet, the moving
window is sampled at each timestamp in them, else it is sampled on the
input's.
If the window does not contain any values (e.g., all the values are missing,
or the window does not contain any sampling), outputs missing values.
If the window does not contain any values (e.g., all the values are
missing, or the window does not contain any sampling), outputs missing
values.
Example:
```python
Expand All @@ -1542,8 +1580,8 @@ def moving_min(
```
See [`EventSet.moving_count()`][temporian.EventSet.moving_count] for examples
of moving window operations with external sampling and indices.
See [`EventSet.moving_count()`][temporian.EventSet.moving_count] for
examples of moving window operations with external sampling and indices.
Args:
window_length: Sliding window's length.
Expand All @@ -1559,23 +1597,28 @@ def moving_min(

def moving_standard_deviation(
self: EventSetOrNode,
window_length: Duration,
window_length: WindowLength,
sampling: Optional[EventSetOrNode] = None,
) -> EventSetOrNode:
"""Computes the standard deviation of values in a sliding window over an
[`EventSet`][temporian.EventSet].
For each t in sampling, and for each feature independently, returns at time
t the standard deviation for the feature in the window
For each t in sampling, and for each feature independently, returns at
time t the standard deviation for the feature in the window
(t - window_length, t].
If `sampling` is provided samples the moving window's value at each
timestamp in `sampling`, else samples it at each timestamp in the input.
`sampling` can't be specified if a variable `window_length` is
specified (i.e. if `window_length` is an EventSet).
If `sampling` is specified or `window_length` is an EventSet, the moving
window is sampled at each timestamp in them, else it is sampled on the
input's.
Missing values (such as NaNs) are ignored.
If the window does not contain any values (e.g., all the values are missing,
or the window does not contain any sampling), outputs missing values.
If the window does not contain any values (e.g., all the values are
missing, or the window does not contain any sampling), outputs missing
values.
Example:
```python
Expand All @@ -1594,8 +1637,8 @@ def moving_standard_deviation(
```
See [`EventSet.moving_count()`][temporian.EventSet.moving_count] for examples of moving window
operations with external sampling and indices.
See [`EventSet.moving_count()`][temporian.EventSet.moving_count] for
examples of moving window operations with external sampling and indices.
Args:
window_length: Sliding window's length.
Expand All @@ -1616,22 +1659,27 @@ def moving_standard_deviation(

def moving_sum(
self: EventSetOrNode,
window_length: Duration,
window_length: WindowLength,
sampling: Optional[EventSetOrNode] = None,
) -> EventSetOrNode:
"""Computes the sum of values in a sliding window over an
[`EventSet`][temporian.EventSet].
For each t in sampling, and for each feature independently, returns at time
t the sum of the feature in the window (t - window_length, t].
For each t in sampling, and for each feature independently, returns at
time t the sum of the feature in the window (
If `sampling` is provided samples the moving window's value at each
timestamp in `sampling`, else samples it at each timestamp in the input.
in them. `sampling` can't be specified if `window_length` is an EventSet.ta variable - window_length
specified, (i.e. if `window_length` is an EventSet).
If `sampling` is specified or `window_length` is an EventSet, the moving
window is sampled at each timesta, else it is sampled on the
input's.p
Missing values (such as NaNs) are ignored.
If the window does not contain any values (e.g., all the values are missing,
or the window does not contain any sampling), outputs missing values.
If the window does not contain any values (e.g., all the values are
missing, or the window does not contain any sampling), outputs missing
values.
Example:
```python
Expand All @@ -1650,8 +1698,8 @@ def moving_sum(
```
See [`EventSet.moving_count()`][temporian.EventSet.moving_count] for examples of moving window
operations with external sampling and indices.
See [`EventSet.moving_count()`][temporian.EventSet.moving_count] for
examples of moving window operations with external sampling and indices.
Args:
window_length: Sliding window's length.
Expand Down Expand Up @@ -2149,37 +2197,44 @@ def set_index(
Args:
indexes: List of index / feature names (strings) used as
the new indexes. These names should be either indexes or features in
the input.
the new indexes. These names should be either indexes or
features in the input.
Returns:
EventSet with the updated indexes.
Raises:
KeyError: If any of the specified `indexes` are not found in the input.
KeyError: If any of the specified `indexes` are not found in the
input.
"""
from temporian.core.operators.add_index import set_index

return set_index(self, indexes=indexes)

def simple_moving_average(
self: EventSetOrNode,
window_length: Duration,
window_length: WindowLength,
sampling: Optional[EventSetOrNode] = None,
) -> EventSetOrNode:
"""Computes the average of values in a sliding window over an
[`EventSet`][temporian.EventSet].
For each t in sampling, and for each feature independently, returns at time
t the average value of the feature in the window (t - window_length, t].
For each t in sampling, and for each feature independently, returns at
time t the average value of the feature in the window
(t - window_length, t].
`sampling` can't be specified if a variable `window_length` is
specified (i.e. if `window_length` is an EventSet).
If `sampling` is provided samples the moving window's value at each
timestamp in `sampling`, else samples it at each timestamp in the input.
If `sampling` is specified or `window_length` is an EventSet, the moving
window is sampled at each timestamp in them, else it is sampled on the
input's.
Missing values (such as NaNs) are ignored.
If the window does not contain any values (e.g., all the values are missing,
or the window does not contain any sampling), outputs missing values.
If the window does not contain any values (e.g., all the values are
missing, or the window does not contain any timestamp), outputs missing
values.
Example:
```python
Expand All @@ -2198,8 +2253,8 @@ def simple_moving_average(
```
See [`EventSet.moving_count()`][temporian.EventSet.moving_count] for examples of moving window
operations with external sampling and indices.
See [`EventSet.moving_count()`][temporian.EventSet.moving_count] for
examples of moving window operations with external sampling and indices.
Args:
window_length: Sliding window's length.
Expand Down
1 change: 1 addition & 0 deletions temporian/core/operators/window/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ py_library(
srcs = ["base.py"],
srcs_version = "PY3",
deps = [
"//temporian/core:typing",
"//temporian/core/data:dtype",
"//temporian/core/data:duration_utils",
"//temporian/core/data:node",
Expand Down
Loading

0 comments on commit 2c4893d

Please sign in to comment.