Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

0.6.8 #226

Merged
merged 17 commits into from
Jan 18, 2024
Merged

0.6.8 #226

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,10 @@
* The most recent data will generally be the most important
* Forecasts are desired for the future immediately following the most recent data.

# 0.6.7 🇺🇦 🇺🇦 🇺🇦
* Cassandra bug fix
* isolated_only to anomaly methods
* matse metric is possibly temporary and not added to per series weighting options
* added HistoricValues transformer
# 0.6.8 🇺🇦 🇺🇦 🇺🇦
* bug fixes, robust for OpenBLAS nan handling kernel failures
* added BKBandpassFilter
* added expand_horizontal for scaling mosaics

### Unstable Upstream Pacakges (those that are frequently broken by maintainers)
* Pytorch-Forecasting
Expand Down
2 changes: 1 addition & 1 deletion autots/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from autots.models.cassandra import Cassandra


__version__ = '0.6.7'
__version__ = '0.6.8'

TransformTS = GeneralTransformer

Expand Down
5 changes: 4 additions & 1 deletion autots/evaluator/auto_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -785,7 +785,10 @@ def __init__(
if self.transformation_dict is None:
self.transformation_dict = {}
self.transformer_object = GeneralTransformer(
**self.transformation_dict, n_jobs=n_jobs, holiday_country=holiday_country
**self.transformation_dict,
n_jobs=n_jobs,
holiday_country=holiday_country,
verbose=self.verbose,
)
self.model = ModelMonster(
model_str,
Expand Down
181 changes: 151 additions & 30 deletions autots/evaluator/auto_ts.py

Large diffs are not rendered by default.

15 changes: 8 additions & 7 deletions autots/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,24 +511,24 @@ def long_form_results(
"""
upload = pd.melt(
self.forecast.rename_axis(index='datetime').reset_index(),
var_name="SeriesID",
value_name="Value",
var_name=id_name,
value_name=value_name,
id_vars="datetime",
).set_index("datetime")
upload[interval_name] = "50%"
upload_upper = pd.melt(
self.upper_forecast.rename_axis(index='datetime').reset_index(),
var_name="SeriesID",
value_name="Value",
var_name=id_name,
value_name=value_name,
id_vars="datetime",
).set_index("datetime")
upload_upper[
interval_name
] = f"{round(100 - ((1- self.prediction_interval)/2) * 100, 0)}%"
upload_lower = pd.melt(
self.lower_forecast.rename_axis(index='datetime').reset_index(),
var_name="SeriesID",
value_name="Value",
var_name=id_name,
value_name=value_name,
id_vars="datetime",
).set_index("datetime")
upload_lower[
Expand All @@ -537,7 +537,8 @@ def long_form_results(

upload = pd.concat([upload, upload_upper, upload_lower], axis=0)
if datetime_column is not None:
upload = upload.reset_index(drop=False, names=datetime_column)
upload.index.name = str(datetime_column)
upload = upload.reset_index(drop=False)
if update_datetime_name is not None:
upload[update_datetime_name] = datetime.datetime.utcnow()
return upload
Expand Down
2 changes: 1 addition & 1 deletion autots/models/matrix_var.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def rrvar(data, R, pred_step, maxiter=100):
X1 = data[:, :-1]
X2 = data[:, 1:]
V = np.random.randn(R, N)
X1_pinv = np.linalg.pinv(X1)
X1_pinv = np.linalg.pinv(np.nan_to_num(X1))
for it in range(maxiter):
W = X2 @ np.linalg.pinv((V @ X1))
V = np.linalg.pinv(W) @ X2 @ X1_pinv
Expand Down
9 changes: 5 additions & 4 deletions autots/models/model_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
'GLM': 1,
'ETS': 1,
'FBProphet': 0.5,
'GluonTS': 0.5,
# 'GluonTS': 0.5,
'UnobservedComponents': 1,
'VAR': 1,
'VECM': 1,
Expand All @@ -75,7 +75,7 @@
'ARDL': 1,
'ARCH': 1,
'MetricMotif': 1,
# 'SeasonalityMotif': 1,
'SeasonalityMotif': 1,
}
# fastest models at any scale
superfast = [
Expand All @@ -86,6 +86,7 @@
'SeasonalNaive',
# 'MetricMotif',
'SeasonalityMotif',
'SectionalMotif', # not entirely sure but so far this is pretty fast
]
# relatively fast
fast = {
Expand All @@ -105,13 +106,13 @@
'SectionalMotif': 1,
'NVAR': 0.3,
'MAR': 0.25,
'RRVAR': 1,
'RRVAR': 0.4,
'KalmanStateSpace': 0.4,
'MetricMotif': 1,
'Cassandra': 0.6,
'SeasonalityMotif': 1.5,
'FFT': 0.8,
"BallTreeMultivariateMotif": 1, # keep an eye on RAM
"BallTreeMultivariateMotif": 0.4, # keep an eye on RAM, not the fastest at scale but works...
}
# models that can scale well if many CPU cores are available
parallel = {
Expand Down
29 changes: 22 additions & 7 deletions autots/models/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -675,18 +675,28 @@ def retrieve_classifier(
}
# these are models that are relatively fast with large multioutput Y, small n obs
datepart_model_dict: dict = {
# 'RandomForest': 0.05, # crashes sometimes at scale for unclear reasons
'ElasticNet': 0.1,
'xgboost': 0.001, # excess memory at scale
'MLP': 0.05,
'DecisionTree': 0.02,
'Adaboost': 0.05,
'SVM': 0.01,
'KerasRNN': 0.02,
'Transformer': 0.02, # slow
'ExtraTrees': 0.00001, # some params cause RAM crash?
'RadiusNeighbors': 0.1,
'MultioutputGPR': 0.00001,
}
datepart_model_dict_deep = {
'RandomForest': 0.05, # crashes sometimes at scale for unclear reasons
'ElasticNet': 0.1,
'xgboost': 0.05,
'MLP': 0.05,
'DecisionTree': 0.02,
'Adaboost': 0.05,
'SVM': 0.01,
'KerasRNN': 0.02,
'Transformer': 0.02, # slow
'ExtraTrees': 0.01, # some params cause RAM crash?
'RadiusNeighbors': 0.1,
'MultioutputGPR': 0.001,
}
gpu = ['Transformer', 'KerasRNN', 'MLP'] # or more accurately, no dnn
gradient_boosting = {
Expand Down Expand Up @@ -788,7 +798,7 @@ def generate_classifier_params(
}
else:
model_dict = {
'xgboost': 1,
'xgboost': 0.5,
'ExtraTrees': 0.2,
'RandomForest': 0.1,
'KNN': 1,
Expand Down Expand Up @@ -2335,7 +2345,7 @@ def predict(
)
except Exception as e:
raise ValueError(
f"Datepart prediction with params {self.get_params()} failed"
f"Datepart prediction with params {self.get_params()} failed. This is often due to an improperly indexed future_regressor (with drop_most_recent especially)"
) from e

if just_point_forecast:
Expand Down Expand Up @@ -2366,7 +2376,12 @@ def predict(

def get_new_params(self, method: str = 'random'):
"""Return dict of new parameters for parameter tuning."""
model_choice = generate_regressor_params(model_dict=datepart_model_dict)
if method == 'deep':
model_choice = generate_regressor_params(
model_dict=datepart_model_dict_deep
)
else:
model_choice = generate_regressor_params(model_dict=datepart_model_dict)
datepart_choice = random.choices(
[
"recurring",
Expand Down
11 changes: 6 additions & 5 deletions autots/models/statsmodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ def predict(
parallel = False
# joblib multiprocessing to loop through series
if parallel:
df_list = Parallel(n_jobs=self.n_jobs, verbose=pool_verbose)(
df_list = Parallel(n_jobs=self.n_jobs, verbose=pool_verbose, timeout=3600)(
delayed(glm_forecast_by_column)(
current_series=df[col],
X=X,
Expand Down Expand Up @@ -1432,23 +1432,24 @@ def predict(
), "regressor row count not equal to forecast length"

# LinAlgError: SVD did not converge (occurs when NaN in train data)
# NaN must be removed for some BLAS packages else they will kill the kernel
if self.regression_type in ["User", "Holiday", 'user']:
maModel = VECM(
self.df_train,
self.df_train.replace([np.inf, -np.inf], np.nan).fillna(0),
freq=self.frequency,
exog=np.array(self.regressor_train),
exog=np.nan_to_num(np.array(self.regressor_train)),
deterministic=self.deterministic,
k_ar_diff=self.k_ar_diff,
coint_rank=self.coint_rank,
seasons=self.seasons,
).fit()
# don't ask me why it is exog_fc here and not exog like elsewhere
forecast = maModel.predict(
steps=forecast_length, exog_fc=np.array(future_regressor)
steps=forecast_length, exog_fc=np.nan_to_num(np.array(future_regressor))
)
else:
maModel = VECM(
self.df_train,
self.df_train.replace([np.inf, -np.inf], np.nan).fillna(0),
freq=self.frequency,
deterministic=self.deterministic,
k_ar_diff=self.k_ar_diff,
Expand Down
11 changes: 8 additions & 3 deletions autots/tools/fast_kalman.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def holt_winters_damped_matrices(M, alpha, beta, gamma, phi=1.0):
return F, Q, H, R


def new_kalman_params(method=None):
def new_kalman_params(method=None, allow_auto=True):
if method in ['fast']:
em_iter = random.choices([None, 10], [0.8, 0.2])[0]
elif method == "superfast":
Expand Down Expand Up @@ -575,6 +575,9 @@ def new_kalman_params(method=None):
'observation_noise': 0.04,
}
params['em_iter'] = em_iter
if not allow_auto:
if params['observation_noise'] == 'auto':
params['observation_noise'] = 0.1
return params


Expand Down Expand Up @@ -1314,7 +1317,9 @@ def douter(a, b):
def dinv(A):
"Matrix inverse applied to last two axes"
try:
res = np.linalg.inv(A)
res = np.linalg.inv(
np.nan_to_num(A)
) # can cause kernel death in OpenBLAS with NaN
except Exception:
res = np.linalg.pinv(A) # slower but more robust
return res
Expand Down Expand Up @@ -1609,7 +1614,7 @@ def ensure_matrix(x, dim=1):
# pylint: disable=W0702,W0104,E1136
try:
y = np.array(x)
y.shape[0]
y.shape[0] # for reasons I don't understand, this line is critical
x = y
except Exception:
x = np.eye(dim) * x
Expand Down
Loading
Loading