winedarksea · winedarksea · Jan 18, 2024 · Jan 3, 2024 · Jan 4, 2024 · Jan 5, 2024
diff --git a/TODO.md b/TODO.md
@@ -12,11 +12,10 @@
 * The most recent data will generally be the most important
 * Forecasts are desired for the future immediately following the most recent data.
 
-# 0.6.7 🇺🇦 🇺🇦 🇺🇦
-* Cassandra bug fix
-* isolated_only to anomaly methods
-* matse metric is possibly temporary and not added to per series weighting options
-* added HistoricValues transformer
+# 0.6.8 🇺🇦 🇺🇦 🇺🇦
+* bug fixes, robust for OpenBLAS nan handling kernel failures
+* added BKBandpassFilter
+* added expand_horizontal for scaling mosaics
 
 ### Unstable Upstream Pacakges (those that are frequently broken by maintainers)
 * Pytorch-Forecasting

diff --git a/autots/__init__.py b/autots/__init__.py
@@ -26,7 +26,7 @@
 from autots.models.cassandra import Cassandra
 
 
-__version__ = '0.6.7'
+__version__ = '0.6.8'
 
 TransformTS = GeneralTransformer
 

diff --git a/autots/evaluator/auto_model.py b/autots/evaluator/auto_model.py
@@ -785,7 +785,10 @@ def __init__(
         if self.transformation_dict is None:
             self.transformation_dict = {}
         self.transformer_object = GeneralTransformer(
-            **self.transformation_dict, n_jobs=n_jobs, holiday_country=holiday_country
+            **self.transformation_dict,
+            n_jobs=n_jobs,
+            holiday_country=holiday_country,
+            verbose=self.verbose,
         )
         self.model = ModelMonster(
             model_str,

diff --git a/autots/evaluator/auto_ts.py b/autots/evaluator/auto_ts.py
diff --git a/autots/models/base.py b/autots/models/base.py
@@ -511,24 +511,24 @@ def long_form_results(
         """
         upload = pd.melt(
             self.forecast.rename_axis(index='datetime').reset_index(),
-            var_name="SeriesID",
-            value_name="Value",
+            var_name=id_name,
+            value_name=value_name,
             id_vars="datetime",
         ).set_index("datetime")
         upload[interval_name] = "50%"
         upload_upper = pd.melt(
             self.upper_forecast.rename_axis(index='datetime').reset_index(),
-            var_name="SeriesID",
-            value_name="Value",
+            var_name=id_name,
+            value_name=value_name,
             id_vars="datetime",
         ).set_index("datetime")
         upload_upper[
             interval_name
         ] = f"{round(100 - ((1- self.prediction_interval)/2) * 100, 0)}%"
         upload_lower = pd.melt(
             self.lower_forecast.rename_axis(index='datetime').reset_index(),
-            var_name="SeriesID",
-            value_name="Value",
+            var_name=id_name,
+            value_name=value_name,
             id_vars="datetime",
         ).set_index("datetime")
         upload_lower[
@@ -537,7 +537,8 @@ def long_form_results(
 
         upload = pd.concat([upload, upload_upper, upload_lower], axis=0)
         if datetime_column is not None:
-            upload = upload.reset_index(drop=False, names=datetime_column)
+            upload.index.name = str(datetime_column)
+            upload = upload.reset_index(drop=False)
         if update_datetime_name is not None:
             upload[update_datetime_name] = datetime.datetime.utcnow()
         return upload

diff --git a/autots/models/matrix_var.py b/autots/models/matrix_var.py
@@ -25,7 +25,7 @@ def rrvar(data, R, pred_step, maxiter=100):
     X1 = data[:, :-1]
     X2 = data[:, 1:]
     V = np.random.randn(R, N)
-    X1_pinv = np.linalg.pinv(X1)
+    X1_pinv = np.linalg.pinv(np.nan_to_num(X1))
     for it in range(maxiter):
         W = X2 @ np.linalg.pinv((V @ X1))
         V = np.linalg.pinv(W) @ X2 @ X1_pinv

diff --git a/autots/models/model_list.py b/autots/models/model_list.py
@@ -58,7 +58,7 @@
     'GLM': 1,
     'ETS': 1,
     'FBProphet': 0.5,
-    'GluonTS': 0.5,
+    # 'GluonTS': 0.5,
     'UnobservedComponents': 1,
     'VAR': 1,
     'VECM': 1,
@@ -75,7 +75,7 @@
     'ARDL': 1,
     'ARCH': 1,
     'MetricMotif': 1,
-    # 'SeasonalityMotif': 1,
+    'SeasonalityMotif': 1,
 }
 # fastest models at any scale
 superfast = [
@@ -86,6 +86,7 @@
     'SeasonalNaive',
     # 'MetricMotif',
     'SeasonalityMotif',
+    'SectionalMotif',  # not entirely sure but so far this is pretty fast
 ]
 # relatively fast
 fast = {
@@ -105,13 +106,13 @@
     'SectionalMotif': 1,
     'NVAR': 0.3,
     'MAR': 0.25,
-    'RRVAR': 1,
+    'RRVAR': 0.4,
     'KalmanStateSpace': 0.4,
     'MetricMotif': 1,
     'Cassandra': 0.6,
     'SeasonalityMotif': 1.5,
     'FFT': 0.8,
-    "BallTreeMultivariateMotif": 1,  # keep an eye on RAM
+    "BallTreeMultivariateMotif": 0.4,  # keep an eye on RAM, not the fastest at scale but works...
 }
 # models that can scale well if many CPU cores are available
 parallel = {

diff --git a/autots/models/sklearn.py b/autots/models/sklearn.py
@@ -675,18 +675,28 @@ def retrieve_classifier(
 }
 # these are models that are relatively fast with large multioutput Y, small n obs
 datepart_model_dict: dict = {
-    # 'RandomForest': 0.05,  # crashes sometimes at scale for unclear reasons
     'ElasticNet': 0.1,
-    'xgboost': 0.001,  # excess memory at scale
     'MLP': 0.05,
     'DecisionTree': 0.02,
     'Adaboost': 0.05,
     'SVM': 0.01,
     'KerasRNN': 0.02,
     'Transformer': 0.02,  # slow
-    'ExtraTrees': 0.00001,  # some params cause RAM crash?
     'RadiusNeighbors': 0.1,
-    'MultioutputGPR': 0.00001,
+}
+datepart_model_dict_deep = {
+    'RandomForest': 0.05,  # crashes sometimes at scale for unclear reasons
+    'ElasticNet': 0.1,
+    'xgboost': 0.05,
+    'MLP': 0.05,
+    'DecisionTree': 0.02,
+    'Adaboost': 0.05,
+    'SVM': 0.01,
+    'KerasRNN': 0.02,
+    'Transformer': 0.02,  # slow
+    'ExtraTrees': 0.01,  # some params cause RAM crash?
+    'RadiusNeighbors': 0.1,
+    'MultioutputGPR': 0.001,
 }
 gpu = ['Transformer', 'KerasRNN', 'MLP']  # or more accurately, no dnn
 gradient_boosting = {
@@ -788,7 +798,7 @@ def generate_classifier_params(
             }
         else:
             model_dict = {
-                'xgboost': 1,
+                'xgboost': 0.5,
                 'ExtraTrees': 0.2,
                 'RandomForest': 0.1,
                 'KNN': 1,
@@ -2335,7 +2345,7 @@ def predict(
             )
         except Exception as e:
             raise ValueError(
-                f"Datepart prediction with params {self.get_params()} failed"
+                f"Datepart prediction with params {self.get_params()} failed. This is often due to an improperly indexed future_regressor (with drop_most_recent especially)"
             ) from e
 
         if just_point_forecast:
@@ -2366,7 +2376,12 @@ def predict(
 
     def get_new_params(self, method: str = 'random'):
         """Return dict of new parameters for parameter tuning."""
-        model_choice = generate_regressor_params(model_dict=datepart_model_dict)
+        if method == 'deep':
+            model_choice = generate_regressor_params(
+                model_dict=datepart_model_dict_deep
+            )
+        else:
+            model_choice = generate_regressor_params(model_dict=datepart_model_dict)
         datepart_choice = random.choices(
             [
                 "recurring",

diff --git a/autots/models/statsmodels.py b/autots/models/statsmodels.py
@@ -298,7 +298,7 @@ def predict(
             parallel = False
         # joblib multiprocessing to loop through series
         if parallel:
-            df_list = Parallel(n_jobs=self.n_jobs, verbose=pool_verbose)(
+            df_list = Parallel(n_jobs=self.n_jobs, verbose=pool_verbose, timeout=3600)(
                 delayed(glm_forecast_by_column)(
                     current_series=df[col],
                     X=X,
@@ -1432,23 +1432,24 @@ def predict(
             ), "regressor row count not equal to forecast length"
 
         # LinAlgError: SVD did not converge (occurs when NaN in train data)
+        # NaN must be removed for some BLAS packages else they will kill the kernel
         if self.regression_type in ["User", "Holiday", 'user']:
             maModel = VECM(
-                self.df_train,
+                self.df_train.replace([np.inf, -np.inf], np.nan).fillna(0),
                 freq=self.frequency,
-                exog=np.array(self.regressor_train),
+                exog=np.nan_to_num(np.array(self.regressor_train)),
                 deterministic=self.deterministic,
                 k_ar_diff=self.k_ar_diff,
                 coint_rank=self.coint_rank,
                 seasons=self.seasons,
             ).fit()
             # don't ask me why it is exog_fc here and not exog like elsewhere
             forecast = maModel.predict(
-                steps=forecast_length, exog_fc=np.array(future_regressor)
+                steps=forecast_length, exog_fc=np.nan_to_num(np.array(future_regressor))
             )
         else:
             maModel = VECM(
-                self.df_train,
+                self.df_train.replace([np.inf, -np.inf], np.nan).fillna(0),
                 freq=self.frequency,
                 deterministic=self.deterministic,
                 k_ar_diff=self.k_ar_diff,

diff --git a/autots/tools/fast_kalman.py b/autots/tools/fast_kalman.py
@@ -185,7 +185,7 @@ def holt_winters_damped_matrices(M, alpha, beta, gamma, phi=1.0):
     return F, Q, H, R
 
 
-def new_kalman_params(method=None):
+def new_kalman_params(method=None, allow_auto=True):
     if method in ['fast']:
         em_iter = random.choices([None, 10], [0.8, 0.2])[0]
     elif method == "superfast":
@@ -575,6 +575,9 @@ def new_kalman_params(method=None):
             'observation_noise': 0.04,
         }
     params['em_iter'] = em_iter
+    if not allow_auto:
+        if params['observation_noise'] == 'auto':
+            params['observation_noise'] = 0.1
     return params
 
 
@@ -1314,7 +1317,9 @@ def douter(a, b):
 def dinv(A):
     "Matrix inverse applied to last two axes"
     try:
-        res = np.linalg.inv(A)
+        res = np.linalg.inv(
+            np.nan_to_num(A)
+        )  # can cause kernel death in OpenBLAS with NaN
     except Exception:
         res = np.linalg.pinv(A)  # slower but more robust
     return res
@@ -1609,7 +1614,7 @@ def ensure_matrix(x, dim=1):
     # pylint: disable=W0702,W0104,E1136
     try:
         y = np.array(x)
-        y.shape[0]
+        y.shape[0]  # for reasons I don't understand, this line is critical
         x = y
     except Exception:
         x = np.eye(dim) * x