Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH1055 Add pandas.api.typing to pandas-stubs #1058

Merged
merged 9 commits into from
Dec 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions pandas-stubs/api/typing/__init__.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from pandas.core.groupby import (
DataFrameGroupBy as DataFrameGroupBy,
SeriesGroupBy as SeriesGroupBy,
)
from pandas.core.resample import (
DatetimeIndexResamplerGroupby as DatetimeIndexResamplerGroupby,
PeriodIndexResamplerGroupby as PeriodIndexResamplerGroupby,
Resampler as Resampler,
TimedeltaIndexResamplerGroupby as TimedeltaIndexResamplerGroupby,
TimeGrouper as TimeGrouper,
)
from pandas.core.window import (
Expanding as Expanding,
ExpandingGroupby as ExpandingGroupby,
ExponentialMovingWindow as ExponentialMovingWindow,
ExponentialMovingWindowGroupby as ExponentialMovingWindowGroupby,
Rolling as Rolling,
RollingGroupby as RollingGroupby,
Window as Window,
)

from pandas._libs import NaTType as NaTType
from pandas._libs.missing import NAType as NAType

from pandas.io.json._json import JsonReader as JsonReader

# SASReader is not defined so commenting it out for now
# from pandas.io.sas.sasreader import SASReader as SASReader
from pandas.io.stata import StataReader as StataReader
218 changes: 218 additions & 0 deletions tests/test_api_typing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
"""Test module for classes in pandas.api.typing."""

import numpy as np
import pandas as pd
from pandas._testing import ensure_clean
from pandas.api.typing import (
DataFrameGroupBy,
DatetimeIndexResamplerGroupby,
Expanding,
ExpandingGroupby,
ExponentialMovingWindow,
ExponentialMovingWindowGroupby,
JsonReader,
NaTType,
NAType,
PeriodIndexResamplerGroupby,
Resampler,
Rolling,
RollingGroupby,
SeriesGroupBy,
StataReader,
TimedeltaIndexResamplerGroupby,
TimeGrouper,
Window,
)
import pytest
from typing_extensions import (
TypeAlias,
assert_type,
)

from tests import check

from pandas.io.json._json import read_json

ResamplerGroupBy: TypeAlias = (
DatetimeIndexResamplerGroupby
| PeriodIndexResamplerGroupby
| TimedeltaIndexResamplerGroupby
)


def test_dataframegroupby():
df = pd.DataFrame({"a": [1, 2, 3]})
group = df.groupby("a")

def f1(gb: DataFrameGroupBy):
check(gb, DataFrameGroupBy)

f1(group)


def test_seriesgroupby():
sr = pd.Series([1, 2, 3], index=pd.Index(["a", "b", "a"]))

def f1(gb: SeriesGroupBy):
check(gb, SeriesGroupBy)

f1(sr.groupby(level=0))


def tests_datetimeindexersamplergroupby() -> None:
idx = pd.date_range("1999-1-1", periods=365, freq="D")
df = pd.DataFrame(
np.random.standard_normal((365, 2)), index=idx, columns=["col1", "col2"]
)
gb_df = df.groupby("col2")

def f1(gb: ResamplerGroupBy):
check(gb, DatetimeIndexResamplerGroupby)

f1(gb_df.resample("ME"))


def test_timedeltaindexresamplergroupby() -> None:
idx = pd.TimedeltaIndex(["0 days", "1 days", "2 days", "3 days", "4 days"])
df = pd.DataFrame(
np.random.standard_normal((5, 2)), index=idx, columns=["col1", "col2"]
)
gb_df = df.groupby("col2")

def f1(gb: ResamplerGroupBy):
check(gb, TimedeltaIndexResamplerGroupby)

f1(gb_df.resample("1D"))


@pytest.mark.skip("Resampling with a PeriodIndex is deprecated.")
def test_periodindexresamplergroupby() -> None:
idx = pd.period_range("2020-01-28 09:00", periods=4, freq="D")
df = pd.DataFrame(data=4 * [range(2)], index=idx, columns=["a", "b"])

def f1(gb: ResamplerGroupBy):
check(gb, PeriodIndexResamplerGroupby)

f1(df.groupby("a").resample("3min"))


def test_natype() -> None:
i64dt = pd.Int64Dtype()
check(assert_type(i64dt.na_value, NAType), NAType)


def test_nattype() -> None:
td = pd.Timedelta("1 day")
as_nat = pd.NaT

check(assert_type(td + as_nat, NaTType), NaTType)


def test_expanding() -> None:
df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})

def f1(gb: Expanding):
check(gb, Expanding)

f1(df.expanding())


def test_expanding_groubpy() -> None:
df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})

def f1(gb: ExpandingGroupby):
check(gb, ExpandingGroupby)

f1(df.groupby("B").expanding())


def test_ewm() -> None:
df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})

def f1(gb: ExponentialMovingWindow):
check(gb, ExponentialMovingWindow)

f1(df.ewm(2))


def test_ewm_groubpy() -> None:
df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})

def f1(gb: ExponentialMovingWindowGroupby):
check(gb, ExponentialMovingWindowGroupby)

f1(df.groupby("B").ewm(2))


def test_json_reader() -> None:
df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})

def f1(gb: JsonReader):
check(gb, JsonReader)

with ensure_clean() as path:
check(assert_type(df.to_json(path), None), type(None))
json_reader = read_json(path, chunksize=1, lines=True)
f1(json_reader)
json_reader.close()


def test_resampler() -> None:
s = pd.Series([1, 2, 3, 4, 5], index=pd.date_range("20130101", periods=5, freq="s"))

def f1(gb: Resampler):
check(gb, Resampler)

f1(s.resample("3min"))


def test_rolling() -> None:
df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})

def f1(gb: Rolling):
check(gb, Rolling)

f1(df.rolling(2))


def test_rolling_groupby() -> None:
df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})

def f1(gb: RollingGroupby):
check(gb, RollingGroupby)

f1(df.groupby("B").rolling(2))


def test_timegrouper() -> None:
grouper = pd.Grouper(key="Publish date", freq="1W")

def f1(gb: TimeGrouper):
check(gb, TimeGrouper)

f1(grouper)


def test_window() -> None:
ser = pd.Series([0, 1, 5, 2, 8])

def f1(gb: Window):
check(gb, Window)

f1(ser.rolling(2, win_type="gaussian"))


def test_statereader() -> None:
df = pd.DataFrame([[1, 2], [3, 4]], columns=["col_1", "col_2"])
time_stamp = pd.Timestamp(2000, 2, 29, 14, 21)
variable_labels = {"col_1": "This is an example"}
with ensure_clean() as path:
df.to_stata(
path, time_stamp=time_stamp, variable_labels=variable_labels, version=None
)

def f1(gb: StataReader):
check(gb, StataReader)

with StataReader(path) as reader:
f1(reader)