winedarksea · winedarksea · Dec 5, 2024 · Nov 20, 2024 · Nov 22, 2024 · Nov 24, 2024
diff --git a/TODO.md b/TODO.md
@@ -13,34 +13,9 @@
 * Forecasts are desired for the future immediately following the most recent data.
 * trimmed_mean to AverageValueNaive
 
-# 0.6.16 🇺🇦 🇺🇦 🇺🇦
-* export_template added focus_models option
-* added OneClassSVM and GaussianMixture anomaly model options
-* added plot_unpredictability_score
-* added a few more NeuralForecast search options
-* bounds_only to Constraint transformer
-* updates for deprecated upstream args
-* FIRFilter transformer added
-* mle and imle downscaled to reduce score imbalance issues with these two in generate score
-* SectionalMotif now more robust to forecast lengths longer than history
-* new transformer and metric options for SectionalMotif
-* NaN robustness to matse
-* 'round' option to Constraint
-* minor change to mosaic min style ensembles to remove edge case errors
-* 'mosaic-profile', 'filtered', 'unpredictability_adjusted' and 'median' style mosaics added
-* updated profiler, and improved feature generation for horizontal generalization
-* changepoint style trend as an option to GLM and GLS
-* added ShiftFirstValue which is only a minor nuance on PositiveShift transformer
-* added BasicLinearModel model
-* datepart_method, scale, and fourier encodig to WindowRegression
-* trimmed_mean and more date part options to SeasonalityMotif
-* some additional options to MultivariateRegression
-* added ThetaTransformer
-* added TVVAR model (time varying VAR)
-* added ChangepointDetrend transformer
-* added MeanPercentSplitter transformer
-* updated load_daily with more recent history
-* added support for passing a custom metric
+# 0.6.17 🇺🇦 🇺🇦 🇺🇦
+* minor adjustments and bug fixes for scalability
+* added BallTreeRegressionMotif
 
 ### Unstable Upstream Pacakges (those that are frequently broken by maintainers)
 * Pytorch-Forecasting

diff --git a/autots/__init__.py b/autots/__init__.py
@@ -27,7 +27,7 @@
 from autots.models.cassandra import Cassandra
 
 
-__version__ = '0.6.16'
+__version__ = '0.6.17'
 
 TransformTS = GeneralTransformer
 

diff --git a/autots/evaluator/auto_model.py b/autots/evaluator/auto_model.py
@@ -48,6 +48,7 @@
     BallTreeMultivariateMotif,
     BasicLinearModel,
     TVVAR,
+    BallTreeRegressionMotif,
 )
 from autots.models.statsmodels import (
     GLS,
@@ -732,6 +733,17 @@ def ModelMonster(
             n_jobs=n_jobs,
             **parameters,
         )
+    elif model == 'BallTreeRegressionMotif':
+        return BallTreeRegressionMotif(
+            frequency=frequency,
+            prediction_interval=prediction_interval,
+            holiday_country=holiday_country,
+            random_seed=random_seed,
+            verbose=verbose,
+            forecast_length=forecast_length,
+            n_jobs=n_jobs,
+            **parameters,
+        )
     elif model == "":
         raise AttributeError(
             ("Model name is empty. Likely this means AutoTS has not been fit.")
@@ -1669,6 +1681,7 @@ def _eval_prediction_for_template(
     ).reset_index(drop=True)
 
     ps_metric = model_error.per_series_metrics
+    ps_metric["ValidationRound"] = validation_round
     ps_metric.index.name = "autots_eval_metric"
     ps_metric = ps_metric.reset_index(drop=False)
     ps_metric.index = [model_id] * ps_metric.shape[0]
@@ -2016,6 +2029,27 @@ def _eval_prediction_for_template(
             },
         },
     },
+    {  # best on VPV, 19.7 smape
+        "fillna": "quadratic",
+        "transformations": {"0": "AlignLastValue", "1": "ChangepointDetrend"},
+        "transformation_params": {
+            "0": {
+                "rows": 1,
+                "lag": 1,
+                "method": "multiplicative",
+                "strength": 1.0,
+                "first_value_only": False,
+                "threshold": None,
+                "threshold_method": "mean",
+            },
+            "1": {
+                "model": "Linear",
+                "changepoint_spacing": 180,
+                "changepoint_distance_end": 360,
+                "datepart_method": None,
+            },
+        },
+    },
 ]
 
 
@@ -2360,58 +2394,58 @@ def virtual_memory():
         )
         ps = template_result.per_series_metrics
         template_result.per_series_mae = ps[ps['autots_eval_metric'] == 'mae'].drop(
-            columns='autots_eval_metric'
+            columns=['autots_eval_metric', "ValidationRound"]
         )
         template_result.per_series_made = ps[ps['autots_eval_metric'] == 'made'].drop(
-            columns='autots_eval_metric'
+            columns=['autots_eval_metric', "ValidationRound"]
         )
         template_result.per_series_contour = ps[
             ps['autots_eval_metric'] == 'contour'
-        ].drop(columns='autots_eval_metric')
+        ].drop(columns=['autots_eval_metric', "ValidationRound"])
         template_result.per_series_rmse = ps[ps['autots_eval_metric'] == 'rmse'].drop(
-            columns='autots_eval_metric'
+            columns=['autots_eval_metric', "ValidationRound"]
         )
         template_result.per_series_spl = ps[ps['autots_eval_metric'] == 'spl'].drop(
-            columns='autots_eval_metric'
+            columns=['autots_eval_metric', "ValidationRound"]
         )
         template_result.per_series_mle = ps[ps['autots_eval_metric'] == 'mle'].drop(
-            columns='autots_eval_metric'
+            columns=['autots_eval_metric', "ValidationRound"]
         )
         template_result.per_series_imle = ps[ps['autots_eval_metric'] == 'imle'].drop(
-            columns='autots_eval_metric'
+            columns=['autots_eval_metric', "ValidationRound"]
         )
         template_result.per_series_maxe = ps[ps['autots_eval_metric'] == 'maxe'].drop(
-            columns='autots_eval_metric'
+            columns=['autots_eval_metric', "ValidationRound"]
         )
         template_result.per_series_oda = ps[ps['autots_eval_metric'] == 'oda'].drop(
-            columns='autots_eval_metric'
+            columns=['autots_eval_metric', "ValidationRound"]
         )
         template_result.per_series_mqae = ps[ps['autots_eval_metric'] == 'mqae'].drop(
-            columns='autots_eval_metric'
+            columns=['autots_eval_metric', "ValidationRound"]
         )
         template_result.per_series_dwae = ps[ps['autots_eval_metric'] == 'dwae'].drop(
-            columns='autots_eval_metric'
+            columns=['autots_eval_metric', "ValidationRound"]
         )
         template_result.per_series_ewmae = ps[ps['autots_eval_metric'] == 'ewmae'].drop(
-            columns='autots_eval_metric'
+            columns=['autots_eval_metric', "ValidationRound"]
         )
         template_result.per_series_uwmse = ps[ps['autots_eval_metric'] == 'uwmse'].drop(
-            columns='autots_eval_metric'
+            columns=['autots_eval_metric', "ValidationRound"]
         )
         template_result.per_series_smoothness = ps[
             ps['autots_eval_metric'] == 'smoothness'
-        ].drop(columns='autots_eval_metric')
+        ].drop(columns=['autots_eval_metric', "ValidationRound"])
         template_result.per_series_mate = ps[ps['autots_eval_metric'] == 'mate'].drop(
-            columns='autots_eval_metric'
+            columns=['autots_eval_metric', "ValidationRound"]
         )
         template_result.per_series_matse = ps[ps['autots_eval_metric'] == 'matse'].drop(
-            columns='autots_eval_metric'
+            columns=['autots_eval_metric', "ValidationRound"]
         )
         template_result.per_series_wasserstein = ps[
             ps['autots_eval_metric'] == 'wasserstein'
-        ].drop(columns='autots_eval_metric')
+        ].drop(columns=['autots_eval_metric', "ValidationRound"])
         template_result.per_series_dwd = ps[ps['autots_eval_metric'] == 'dwd'].drop(
-            columns='autots_eval_metric'
+            columns=['autots_eval_metric', "ValidationRound"]
         )
     else:
         template_result.per_series_metrics = pd.DataFrame()

diff --git a/autots/evaluator/auto_ts.py b/autots/evaluator/auto_ts.py
@@ -428,9 +428,9 @@ def __init__(
 
                 full_params['transformations'] = transformations
                 full_params['transformation_params'] = transformation_params
-                self.initial_template.loc[
-                    index, 'TransformationParameters'
-                ] = json.dumps(full_params)
+                self.initial_template.loc[index, 'TransformationParameters'] = (
+                    json.dumps(full_params)
+                )
 
         self.regressor_used = False
         self.subset_flag = False
@@ -1974,10 +1974,10 @@ def _run_template(
             self.model_count = template_result.model_count
         # capture results from lower-level template run
         if "TotalRuntime" in template_result.model_results.columns:
-            template_result.model_results[
-                'TotalRuntime'
-            ] = template_result.model_results['TotalRuntime'].fillna(
-                pd.Timedelta(seconds=60)
+            template_result.model_results['TotalRuntime'] = (
+                template_result.model_results['TotalRuntime'].fillna(
+                    pd.Timedelta(seconds=60)
+                )
             )
         else:
             # trying to catch a rare and sneaky bug (perhaps some variety of beetle?)
@@ -2094,9 +2094,9 @@ def _run_validations(
                         frac=0.8, random_state=self.random_seed
                     ).reindex(idx)
                 nan_frac = val_df_train.shape[1] / num_validations
-                val_df_train.iloc[
-                    -2:, int(nan_frac * y) : int(nan_frac * (y + 1))
-                ] = np.nan
+                val_df_train.iloc[-2:, int(nan_frac * y) : int(nan_frac * (y + 1))] = (
+                    np.nan
+                )
 
             # run validation template on current slice
             result = self._run_template(
@@ -3557,9 +3557,9 @@ def plot_validations(
             if subset is None:
                 series = random.choice(df_wide.columns)
             else:
-                scores = self.best_model_per_series_mape().index.tolist()
+                scores = self.best_model_per_series_score().index.tolist()
                 scores = [x for x in scores if "_lltmicro" not in str(x)]
-                mapes = self.best_model_per_series_score().index.tolist()
+                mapes = self.best_model_per_series_mape().index.tolist()
                 mapes = [x for x in mapes if "_lltmicro" not in str(x)]
                 if str(subset).lower() == "best":
                     series = mapes[-1]
@@ -4753,9 +4753,9 @@ def diagnose_params(self, target='runtime', waterfall_plots=True):
                     )
                     y = pd.json_normalize(json.loads(row["ModelParameters"]))
                     y.index = [row['ID']]
-                    y[
-                        'Model'
-                    ] = x  # might need to remove this and do analysis independently for each
+                    y['Model'] = (
+                        x  # might need to remove this and do analysis independently for each
+                    )
                     res.append(
                         pd.DataFrame(
                             {

diff --git a/autots/evaluator/validation.py b/autots/evaluator/validation.py
@@ -21,10 +21,10 @@ def extract_seasonal_val_periods(validation_method):
 
 
 def validate_num_validations(
-    validation_method,
-    num_validations,
-    df_wide_numeric,
-    forecast_length,
+    validation_method="backwards",
+    num_validations=2,
+    df_wide_numeric=None,
+    forecast_length=None,
     min_allowed_train_percent=0.5,
     verbose=0,
 ):

diff --git a/autots/models/base.py b/autots/models/base.py
@@ -490,18 +490,18 @@ def long_form_results(
             value_name=value_name,
             id_vars="datetime",
         ).set_index("datetime")
-        upload_upper[
-            interval_name
-        ] = f"{round(100 - ((1- self.prediction_interval)/2) * 100, 0)}%"
+        upload_upper[interval_name] = (
+            f"{round(100 - ((1- self.prediction_interval)/2) * 100, 0)}%"
+        )
         upload_lower = pd.melt(
             self.lower_forecast.rename_axis(index='datetime').reset_index(),
             var_name=id_name,
             value_name=value_name,
             id_vars="datetime",
         ).set_index("datetime")
-        upload_lower[
-            interval_name
-        ] = f"{round(((1- self.prediction_interval)/2) * 100, 0)}%"
+        upload_lower[interval_name] = (
+            f"{round(((1- self.prediction_interval)/2) * 100, 0)}%"
+        )
 
         upload = pd.concat([upload, upload_upper, upload_lower], axis=0)
         if datetime_column is not None: