Rmspe test stat (#24)

* RMSPE WIP * Rmspe test stat (#22) * Minor change in README to fix guidance for developers (#18) * Noise transform (#19) * Add noise transformation that apply perturbations on treatment * Formatting * Add docstring * Fix linting * Add tests to check perturbation impact and randomness over timepoints * bump version (#20) * Initial implementation of RMSPE * Add TestResultFrame parent class for test results * Add test for RMSPE * Add doc string * Fix linting * Update src/causal_validation/validation/rmspe.py Co-authored-by: Thomas Pinder <[email protected]> * Fix typo --------- Co-authored-by: Thomas Pinder <[email protected]> --------- Co-authored-by: Thomas Pinder <[email protected]> Co-authored-by: Semih Akbayrak <[email protected]>
amazon-science · Sep 24, 2024 · 8cfdcfa · 8cfdcfa
1 parent 8b44c71
commit 8cfdcfa
Show file tree

Hide file tree

Showing 11 changed files with 512 additions and 34 deletions.
diff --git a/docs/examples/placebo_test.ipynb b/docs/examples/placebo_test.ipynb
@@ -207,6 +207,22 @@
     "datasets = DatasetContainer([data, complex_data], names=[\"Simple\", \"Complex\"])\n",
     "PlaceboTest([model, did_model], datasets).execute().summary()"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "14",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "15",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {

diff --git a/src/causal_validation/data.py b/src/causal_validation/data.py
@@ -27,6 +27,7 @@ class Dataset:
     yte: Float[np.ndarray, "M 1"]
     _start_date: dt.date
     counterfactual: tp.Optional[Float[np.ndarray, "M 1"]] = None
+    synthetic: tp.Optional[Float[np.ndarray, "M 1"]] = None
     _name: str = None
 
     def to_df(
@@ -151,7 +152,13 @@ def drop_unit(self, idx: int) -> Dataset:
         Xtr = np.delete(self.Xtr, [idx], axis=1)
         Xte = np.delete(self.Xte, [idx], axis=1)
         return Dataset(
-            Xtr, Xte, self.ytr, self.yte, self._start_date, self.counterfactual
+            Xtr,
+            Xte,
+            self.ytr,
+            self.yte,
+            self._start_date,
+            self.counterfactual,
+            self.synthetic,
         )
 
     def to_placebo_data(self, to_treat_idx: int) -> Dataset:
@@ -204,4 +211,6 @@ def reassign_treatment(
 ) -> Dataset:
     Xtr = data.Xtr
     Xte = data.Xte
-    return Dataset(Xtr, Xte, ytr, yte, data._start_date, data.counterfactual)
+    return Dataset(
+        Xtr, Xte, ytr, yte, data._start_date, data.counterfactual, data.synthetic
+    )
diff --git a/src/causal_validation/models.py b/src/causal_validation/models.py
@@ -1,17 +1,29 @@
 from dataclasses import dataclass
 import typing as tp
 
+from azcausal.core.effect import Effect
 from azcausal.core.error import Error
 from azcausal.core.estimator import Estimator
-from azcausal.core.result import Result
+from azcausal.core.result import Result as _Result
+from jaxtyping import Float
 
 from causal_validation.data import Dataset
+from causal_validation.types import NPArray
+
+
+@dataclass
+class Result:
+    effect: Effect
+    counterfactual: Float[NPArray, "N 1"]
+    synthetic: Float[NPArray, "N 1"]
+    observed: Float[NPArray, "N 1"]
 
 
 @dataclass
 class AZCausalWrapper:
     model: Estimator
     error_estimator: tp.Optional[Error] = None
+    _az_result: _Result = None
 
     def __post_init__(self):
         self._model_name = self.model.__class__.__name__
@@ -21,4 +33,30 @@ def __call__(self, data: Dataset, **kwargs) -> Result:
         result = self.model.fit(panel, **kwargs)
         if self.error_estimator:
             self.model.error(result, self.error_estimator)
-        return result
+        self._az_result = result
+
+        res = Result(
+            effect=result.effect,
+            counterfactual=self.counterfactual,
+            synthetic=self.synthetic,
+            observed=self.observed,
+        )
+        return res
+
+    @property
+    def counterfactual(self) -> Float[NPArray, "N 1"]:
+        df = self._az_result.effect.by_time
+        c_factual = df.loc[:, "CF"].values.reshape(-1, 1)
+        return c_factual
+
+    @property
+    def synthetic(self) -> Float[NPArray, "N 1"]:
+        df = self._az_result.effect.by_time
+        synth_control = df.loc[:, "C"].values.reshape(-1, 1)
+        return synth_control
+
+    @property
+    def observed(self) -> Float[NPArray, "N 1"]:
+        df = self._az_result.effect.by_time
+        treated = df.loc[:, "T"].values.reshape(-1, 1)
+        return treated
diff --git a/src/causal_validation/transforms/base.py b/src/causal_validation/transforms/base.py
@@ -74,7 +74,9 @@ def apply_values(
         ytr = ytr + pre_intervention_vals[:, :1]
         Xte = Xte + post_intervention_vals[:, 1:]
         yte = yte + post_intervention_vals[:, :1]
-        return Dataset(Xtr, Xte, ytr, yte, data._start_date, data.counterfactual)
+        return Dataset(
+            Xtr, Xte, ytr, yte, data._start_date, data.counterfactual, data.synthetic
+        )
 
 
 @dataclass(kw_only=True)
@@ -91,4 +93,6 @@ def apply_values(
         ytr = ytr * pre_intervention_vals
         Xte = Xte * post_intervention_vals
         yte = yte * post_intervention_vals
-        return Dataset(Xtr, Xte, ytr, yte, data._start_date, data.counterfactual)
+        return Dataset(
+            Xtr, Xte, ytr, yte, data._start_date, data.counterfactual, data.synthetic
+        )
diff --git a/src/causal_validation/types.py b/src/causal_validation/types.py
@@ -1,5 +1,6 @@
 import typing as tp
 
+import numpy as np
 from scipy.stats._distn_infrastructure import (
     rv_continuous,
     rv_discrete,
@@ -10,3 +11,4 @@
 InterventionTypes = tp.Literal["pre-intervention", "post-intervention", "both"]
 RandomVariable = tp.Union[rv_continuous, rv_discrete]
 Number = tp.Union[float, int]
+NPArray = np.ndarray
diff --git a/src/causal_validation/validation/placebo.py b/src/causal_validation/validation/placebo.py
@@ -9,20 +9,26 @@
     Column,
     DataFrameSchema,
 )
-from rich import box
 from rich.progress import (
     Progress,
     ProgressBar,
     track,
 )
-from rich.table import Table
 from scipy.stats import ttest_1samp
+from tqdm import (
+    tqdm,
+    trange,
+)
 
 from causal_validation.data import (
     Dataset,
     DatasetContainer,
 )
-from causal_validation.models import AZCausalWrapper
+from causal_validation.models import (
+    AZCausalWrapper,
+    Result,
+)
+from causal_validation.validation.testing import TestResultFrame
 
 PlaceboSchema = DataFrameSchema(
     {
@@ -39,13 +45,13 @@
 
 
 @dataclass
-class PlaceboTestResult:
-    effects: tp.Dict[tp.Tuple[str, str], tp.List[Effect]]
+class PlaceboTestResult(TestResultFrame):
+    effects: tp.Dict[tp.Tuple[str, str], tp.List[Result]]
 
     def _model_to_df(
-        self, model_name: str, dataset_name: str, effects: tp.List[Effect]
+        self, model_name: str, dataset_name: str, effects: tp.List[Result]
     ) -> pd.DataFrame:
-        _effects = [effect.value for effect in effects]
+        _effects = [e.effect.percentage().value for e in effects]
         _n_effects = len(_effects)
         expected_effect = np.mean(_effects)
         stddev_effect = np.std(_effects)
@@ -71,21 +77,6 @@ def to_df(self) -> pd.DataFrame:
         PlaceboSchema.validate(df)
         return df
 
-    def summary(self, precision: int = 4) -> Table:
-        table = Table(show_header=True, box=box.MARKDOWN)
-        df = self.to_df()
-        numeric_cols = df.select_dtypes(include=[np.number])
-        df.loc[:, numeric_cols.columns] = np.round(numeric_cols, decimals=precision)
-
-        for column in df.columns:
-            table.add_column(str(column), style="magenta")
-
-        for _, value_list in enumerate(df.values.tolist()):
-            row = [str(x) for x in value_list]
-            table.add_row(*row)
-
-        return table
-
 
 @dataclass
 class PlaceboTest:
@@ -109,7 +100,7 @@ def execute(self, verbose: bool = True) -> PlaceboTestResult:
         datasets = self.dataset_dict
         n_datasets = len(datasets)
         n_control = sum([d.n_units for d in datasets.values()])
-        with Progress() as progress:
+        with Progress(disable=not verbose) as progress:
             model_task = progress.add_task(
                 "[red]Models", total=len(self.models), visible=verbose
             )
@@ -130,7 +121,6 @@ def execute(self, verbose: bool = True) -> PlaceboTestResult:
                         progress.update(unit_task, advance=1)
                         placebo_data = dataset.to_placebo_data(i)
                         result = model(placebo_data)
-                        result = result.effect.percentage()
                         model_result.append(result)
                     results[(model._model_name, data_name)] = model_result
         return PlaceboTestResult(effects=results)
diff --git a/src/causal_validation/validation/rmspe.py b/src/causal_validation/validation/rmspe.py
@@ -0,0 +1,133 @@
+from dataclasses import dataclass
+import typing as tp
+
+from jaxtyping import Float
+import numpy as np
+import pandas as pd
+from pandera import (
+    Check,
+    Column,
+    DataFrameSchema,
+)
+from rich import box
+from rich.progress import (
+    Progress,
+    ProgressBar,
+    track,
+)
+
+from causal_validation.validation.placebo import PlaceboTest
+from causal_validation.validation.testing import (
+    RMSPETestStatistic,
+    TestResult,
+    TestResultFrame,
+)
+
+RMSPESchema = DataFrameSchema(
+    {
+        "Model": Column(str),
+        "Dataset": Column(str),
+        "Test statistic": Column(float, coerce=True),
+        "p-value": Column(
+            float,
+            checks=[
+                Check.greater_than_or_equal_to(0.0),
+                Check.less_than_or_equal_to(1.0),
+            ],
+            coerce=True,
+        ),
+    }
+)
+
+
+@dataclass
+class RMSPETestResult(TestResultFrame):
+    """
+    A subclass of TestResultFrame, RMSPETestResult stores test statistics and p-value
+    for the treated unit. Test statistics for pseudo treatment units are also stored.
+    """
+
+    treatment_test_results: tp.Dict[tp.Tuple[str, str], TestResult]
+    pseudo_treatment_test_statistics: tp.Dict[tp.Tuple[str, str], tp.List[Float]]
+
+    def to_df(self) -> pd.DataFrame:
+        dfs = []
+        for (model, dataset), test_results in self.treatment_test_results.items():
+            result = {
+                "Model": model,
+                "Dataset": dataset,
+                "Test statistic": test_results.test_statistic,
+                "p-value": test_results.p_value,
+            }
+            df = pd.DataFrame([result])
+            dfs.append(df)
+        df = pd.concat(dfs)
+        RMSPESchema.validate(df)
+        return df
+
+
+@dataclass
+class RMSPETest(PlaceboTest):
+    """
+    A subclass of PlaceboTest calculates RMSPE as test statistic for all units.
+    Given the RMSPE test stats, p-value for actual treatment is calculated.
+    """
+
+    def execute(self, verbose: bool = True) -> RMSPETestResult:
+        treatment_results, pseudo_treatment_results = {}, {}
+        datasets = self.dataset_dict
+        n_datasets = len(datasets)
+        n_control = sum([d.n_units for d in datasets.values()])
+        rmspe = RMSPETestStatistic()
+        with Progress(disable=not verbose) as progress:
+            model_task = progress.add_task(
+                "[red]Models", total=len(self.models), visible=verbose
+            )
+            data_task = progress.add_task(
+                "[blue]Datasets", total=n_datasets, visible=verbose
+            )
+            unit_task = progress.add_task(
+                f"[green]Treatment and Control Units",
+                total=n_control + 1,
+                visible=verbose,
+            )
+            for data_name, dataset in datasets.items():
+                progress.update(data_task, advance=1)
+                for model in self.models:
+                    progress.update(unit_task, advance=1)
+                    treatment_result = model(dataset)
+                    treatment_idx = dataset.ytr.shape[0]
+                    treatment_test_stat = rmspe(
+                        dataset,
+                        treatment_result.counterfactual,
+                        treatment_result.synthetic,
+                        treatment_idx,
+                    )
+                    progress.update(model_task, advance=1)
+                    placebo_test_stats = []
+                    for i in range(dataset.n_units):
+                        progress.update(unit_task, advance=1)
+                        placebo_data = dataset.to_placebo_data(i)
+                        result = model(placebo_data)
+                        placebo_test_stats.append(
+                            rmspe(
+                                placebo_data,
+                                result.counterfactual,
+                                result.synthetic,
+                                treatment_idx,
+                            )
+                        )
+                    pval_idx = 1
+                    for p_stat in placebo_test_stats:
+                        pval_idx += 1 if treatment_test_stat < p_stat else 0
+                    pval = pval_idx / (n_control + 1)
+                    treatment_results[(model._model_name, data_name)] = TestResult(
+                        p_value=pval, test_statistic=treatment_test_stat
+                    )
+                    pseudo_treatment_results[(model._model_name, data_name)] = (
+                        placebo_test_stats
+                    )
+        return RMSPETestResult(
+            treatment_test_results=treatment_results,
+            pseudo_treatment_test_statistics=pseudo_treatment_results,
+        )