Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

0.6.4 #212

Merged
merged 26 commits into from
Dec 11, 2023
Merged

0.6.4 #212

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@
* The most recent data will generally be the most important
* Forecasts are desired for the future immediately following the most recent data.

# 0.6.3 🎐🎐🎐
* energy datasets to load_live_daily
* improved the 'Scalable' transformer_list to reduce memory issues on larger datasets
* memory improvements to KalmanSmoother, HolidayTransformer, LocalLinearTrend
* added DiffSmoother
* added force_gc arg which can be tried if memory is in short supply relative to data (probably won't help much)
# 0.6.4 🔜🔜🔜
* adjusted n_jobs back to minus 1 for multivariatemotif
* fixed bug with plot_validations not working with some frequencies
* force_validation added to import_template
* model_list now enforced in new generations
* added NeuralForecast

### Unstable Upstream Pacakges (those that are frequently broken by maintainers)
* Pytorch-Forecasting
Expand Down
5 changes: 3 additions & 2 deletions autots/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@
from autots.evaluator.auto_ts import AutoTS
from autots.evaluator.event_forecasting import EventRiskForecast
from autots.tools.transform import GeneralTransformer, RandomTransform
from autots.tools.shaping import long_to_wide
from autots.tools.shaping import long_to_wide, infer_frequency
from autots.tools.regressor import create_lagged_regressor, create_regressor
from autots.evaluator.auto_model import model_forecast
from autots.evaluator.anomaly_detector import AnomalyDetector, HolidayDetector
from autots.models.cassandra import Cassandra


__version__ = '0.6.3'
__version__ = '0.6.4'

TransformTS = GeneralTransformer

Expand All @@ -53,4 +53,5 @@
'AnomalyDetector',
'HolidayDetector',
'Cassandra',
'infer_frequency',
]
43 changes: 35 additions & 8 deletions autots/evaluator/auto_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,19 @@ def ModelMonster(
n_jobs=n_jobs,
**parameters,
)
elif model in ["NeuralForecast", "neuralforecast"]:
from autots.models.neural_forecast import NeuralForecast

return NeuralForecast(
frequency=frequency,
forecast_length=forecast_length,
prediction_interval=prediction_interval,
holiday_country=holiday_country,
random_seed=random_seed,
verbose=verbose,
n_jobs=n_jobs,
**parameters,
)
else:
raise AttributeError(
("Model String '{}' not a recognized model type").format(model)
Expand Down Expand Up @@ -1007,6 +1020,11 @@ def concat(self, another_eval):
ignore_index=True,
sort=False,
).reset_index(drop=True)
self.per_series_metrics = pd.concat(
[self.per_series_metrics, another_eval.per_series_metrics],
axis=0,
sort=False,
)
self.per_series_mae = pd.concat(
[self.per_series_mae, another_eval.per_series_mae], axis=0, sort=False
)
Expand Down Expand Up @@ -2142,6 +2160,7 @@ def NewGeneticTemplate(
models_mode: str = "default",
score_per_series=None,
recursive_count=0,
model_list=None,
# UPDATE RECURSIVE section if adding or removing params
):
"""
Expand All @@ -2155,6 +2174,8 @@ def NewGeneticTemplate(

"""
new_template_list = []
if model_list is None:
model_list = model_results['Model'].unique().tolist()

# filter existing templates
sorted_results = model_results[
Expand Down Expand Up @@ -2210,8 +2231,11 @@ def NewGeneticTemplate(
sidx = {name: i for i, name in enumerate(list(sorted_results), start=1)}
for row in sorted_results.itertuples(name=None):
n = n_list[counter]
counter += 1
model_type = row[sidx["Model"]]
# skip models not in the model_list
if model_type not in model_list:
continue
counter += 1
model_params = row[sidx["ModelParameters"]]
try:
trans_params = json.loads(row[sidx["TransformationParameters"]])
Expand Down Expand Up @@ -2356,6 +2380,7 @@ def NewGeneticTemplate(
models_mode=models_mode,
score_per_series=score_per_series,
recursive_count=recursive_count,
model_list=model_list,
)
# enjoy the privilege
elif new_template.shape[0] < max_results:
Expand All @@ -2373,15 +2398,18 @@ def NewGeneticTemplate(
)


def validation_aggregation(validation_results, df_train=None):
"""Aggregate a TemplateEvalObject."""
groupby_cols = [
def validation_aggregation(
validation_results,
df_train=None,
groupby_cols=[
'ID',
'Model',
'ModelParameters',
'TransformationParameters',
'Ensemble',
]
],
):
"""Aggregate a TemplateEvalObject."""
col_aggs = {
'Runs': 'sum',
'smape': 'mean',
Expand Down Expand Up @@ -2443,9 +2471,8 @@ def validation_aggregation(validation_results, df_train=None):
validation_results.model_results = validation_results.model_results.replace(
[np.inf, -np.inf], np.nan
)
validation_results.model_results = validation_results.model_results.groupby(
groupby_cols
).agg(col_aggs)
grouped = validation_results.model_results.groupby(groupby_cols)
validation_results.model_results = grouped.agg(col_aggs)
validation_results.model_results = validation_results.model_results.reset_index(
drop=False
)
Expand Down
38 changes: 35 additions & 3 deletions autots/evaluator/auto_ts.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
NumericTransformer,
clean_weights,
infer_frequency,
freq_to_timedelta,
)
from autots.tools.transform import GeneralTransformer, RandomTransform
from autots.evaluator.auto_model import (
Expand Down Expand Up @@ -238,6 +239,7 @@ def __init__(
self.models_mode = models_mode
self.current_model_file = current_model_file
self.force_gc = force_gc
self.validate_import = None
random.seed(self.random_seed)
if self.max_generations is None and self.generation_timeout is not None:
self.max_generations = 99999
Expand Down Expand Up @@ -725,9 +727,19 @@ def get_new_params(method='random'):
},
},
},
{
"fillna": None,
"transformations": {"0": "CenterSplit"},
"transformation_params": {
"0": {
'fillna': 'ffill',
'center': 'zero',
},
},
},
'random',
],
[0.9, 0.1, 0.05, 0.1, 0.1, 0.1, 0.1, 0.05, 0.15, 0.1],
[0.9, 0.1, 0.05, 0.1, 0.1, 0.1, 0.1, 0.05, 0.15, 0.05, 0.1],
)[0]
if preclean_choice == "random":
preclean_choice = RandomTransform(
Expand Down Expand Up @@ -1100,6 +1112,7 @@ def fit(
"""
self.model = None
self.grouping_ids = grouping_ids
self.fitStart = pd.Timestamp.now()

# convert class variables to local variables (makes testing easier)
if self.validation_method == "custom":
Expand All @@ -1117,7 +1130,6 @@ def fit(
else:
self.validation_indexes = []

prediction_interval = self.prediction_interval
random_seed = self.random_seed
metric_weighting = self.metric_weighting
verbose = self.verbose
Expand Down Expand Up @@ -1285,6 +1297,7 @@ def fit(
transformer_max_depth=self.transformer_max_depth,
models_mode=self.models_mode,
score_per_series=self.score_per_series,
model_list=self.model_list,
)
submitted_parameters = pd.concat(
[submitted_parameters, new_template],
Expand Down Expand Up @@ -1406,6 +1419,12 @@ def fit(
subset=['Model', 'ModelParameters', 'TransformationParameters']
)
self.validation_template = validation_template[self.template_cols]
if self.validate_import is not None:
self.validation_template = pd.concat(
[self.validation_template, self.validate_import]
).drop_duplicates(
subset=['Model', 'ModelParameters', 'TransformationParameters']
)

# run validations
if self.num_validations > 0:
Expand Down Expand Up @@ -1740,6 +1759,7 @@ def fit(

# clean up any remaining print statements
sys.stdout.flush()
self.fitRuntime = pd.Timestamp.now() - self.fitStart
return self

def validation_agg(self):
Expand Down Expand Up @@ -2449,6 +2469,7 @@ def import_template(
enforce_model_list: bool = True,
include_ensemble: bool = False,
include_horizontal: bool = False,
force_validation: bool = False,
):
"""Import a previously exported template of model parameters.
Must be done before the AutoTS object is .fit().
Expand All @@ -2459,6 +2480,7 @@ def import_template(
enforce_model_list (bool): if True, remove model types not in model_list
include_ensemble (bool): if enforce_model_list is True, this specifies whether to allow ensembles anyway (otherwise they are unpacked and parts kept)
include_horizontal (bool): if enforce_model_list is True, this specifies whether to allow ensembles except horizontal (overridden by keep_ensemble)
force_validation (bool): if True, all models imported here will automatically get sent to full cross validation (regardless of first eval performance)
"""
if method.lower() in ['add on', 'addon', 'add_on']:
addon_flag = True
Expand Down Expand Up @@ -2496,6 +2518,14 @@ def import_template(
else:
return ValueError("method must be 'addon' or 'only'")

if force_validation:
if self.validate_import is None:
self.validate_import = import_template
else:
self.validate_import = pd.concat(
[self.validate_import, import_template]
)

return self

def export_best_model(self, filename, **kwargs):
Expand Down Expand Up @@ -3136,7 +3166,9 @@ def plot_validations(
used_freq = self.used_frequency
start_date = plot_df[plot_df.columns.difference(['actuals'])].dropna(
how='all', axis=0
).index.min() - (pd.to_timedelta(used_freq) * int(self.forecast_length * 3))
).index.min() - (
freq_to_timedelta(used_freq) * int(self.forecast_length * 3)
)
if end_date == "auto":
end_date = plot_df[plot_df.columns.difference(['actuals'])].dropna(
how='all', axis=0
Expand Down
52 changes: 29 additions & 23 deletions autots/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,9 +295,12 @@ def extract_single_transformer(
)
return ", ".join(allz)
else:
trans_dict = transformation_params.get("transformations")
if isinstance(trans_dict, dict):
return ", ".join(list(trans_dict.values()))
if isinstance(transformation_params, dict):
trans_dict = transformation_params.get("transformations")
if isinstance(trans_dict, dict):
return ", ".join(list(trans_dict.values()))
else:
return "None"
else:
return "None"

Expand Down Expand Up @@ -333,6 +336,7 @@ def plot_distributions(
y_col='TotalRuntimeSeconds',
xlim=None,
xlim_right=None,
title_suffix="",
):
import matplotlib.pyplot as plt
import seaborn as sns
Expand Down Expand Up @@ -391,7 +395,7 @@ def plot_distributions(
plt.legend(handles, labels, title=group_col) # , bbox_to_anchor=(1.05, 1), loc=2

# Adding titles and labels
plt.title(f'Distribution of {y_col} by {group_col}', fontsize=16)
plt.title(f'Distribution of {y_col} by {group_col}{title_suffix}', fontsize=16)
plt.xlabel(f'{y_col}', fontsize=14)
plt.ylabel('Density', fontsize=14)

Expand Down Expand Up @@ -491,48 +495,49 @@ def long_form_results(
value_name="Value",
interval_name='PredictionInterval',
update_datetime_name=None,
datetime_column=None,
):
"""Export forecasts (including upper and lower) as single 'long' format output

Args:
id_name (str): name of column containing ids
value_name (str): name of column containing numeric values
interval_name (str): name of column telling you what is upper/lower
datetime_column (str): if None, is index, otherwise, name of column for datetime
update_datetime_name (str): if not None, adds column with current timestamp and this name

Returns:
pd.DataFrame
"""
try:
upload = pd.melt(
self.forecast,
var_name=id_name,
value_name=value_name,
ignore_index=False,
)
except Exception:
raise ImportError("Requires pandas>=1.1.0")
upload = pd.melt(
self.forecast.reset_index(names='datetime'),
var_name="SeriesID",
value_name="Value",
id_vars="datetime",
).set_index("datetime")
upload[interval_name] = "50%"
upload_upper = pd.melt(
self.upper_forecast,
var_name=id_name,
value_name=value_name,
ignore_index=False,
)
self.upper_forecast.reset_index(names='datetime'),
var_name="SeriesID",
value_name="Value",
id_vars="datetime",
).set_index("datetime")
upload_upper[
interval_name
] = f"{round(100 - ((1- self.prediction_interval)/2) * 100, 0)}%"
upload_lower = pd.melt(
self.lower_forecast,
var_name=id_name,
value_name=value_name,
ignore_index=False,
)
self.lower_forecast.reset_index(names='datetime'),
var_name="SeriesID",
value_name="Value",
id_vars="datetime",
).set_index("datetime")
upload_lower[
interval_name
] = f"{round(((1- self.prediction_interval)/2) * 100, 0)}%"

upload = pd.concat([upload, upload_upper, upload_lower], axis=0)
if datetime_column is not None:
upload = upload.reset_index(drop=False, names=datetime_column)
if update_datetime_name is not None:
upload[update_datetime_name] = datetime.datetime.utcnow()
return upload
Expand Down Expand Up @@ -570,6 +575,7 @@ def plot_ensemble_runtimes(self, xlim_right=None):
y_col='TotalRuntimeSeconds',
xlim=0,
xlim_right=xlim_right,
title_suffix=" in Chosen Ensemble",
)

def plot_df(
Expand Down
2 changes: 1 addition & 1 deletion autots/models/basics.py
Original file line number Diff line number Diff line change
Expand Up @@ -1269,7 +1269,7 @@ def predict(

# joblib multiprocessing to loop through series
if self.parallel:
df_list = Parallel(n_jobs=(self.n_jobs))(
df_list = Parallel(n_jobs=(self.n_jobs - 1))(
delayed(looped_motif)(
Xa=x.reshape(-1, x.shape[-1]) if self.multivariate else x[:, i],
Xb=self.df.iloc[-self.window :, i].to_numpy().reshape(1, -1),
Expand Down
Loading
Loading