Skip to content

Commit

Permalink
Merge pull request #238 from winedarksea/dev
Browse files Browse the repository at this point in the history
0.6.11
  • Loading branch information
winedarksea committed Apr 8, 2024
2 parents e0e8e9c + 9555557 commit a2a464c
Show file tree
Hide file tree
Showing 58 changed files with 866 additions and 257 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ Also take a look at the [production_example.py](https://github.com/winedarksea/A
* `superfast` (simple naive models) and `fast` (more complex but still faster models, optimized for many series)
* `fast_parallel` (a combination of `fast` and `parallel`) or `parallel`, given many CPU cores are available
* `n_jobs` usually gets pretty close with `='auto'` but adjust as necessary for the environment
* 'scalable' is the best list to avoid crashing when many series are present. There is also a transformer_list = 'scalable'
* see a dict of predefined lists (some defined for internal use) with `from autots.models.model_list import model_lists`
* Use the `subset` parameter when there are many similar series, `subset=100` will often generalize well for tens of thousands of similar series.
* if using `subset`, passing `weights` for series will weight subset selection towards higher priority series.
Expand All @@ -121,6 +122,7 @@ Also take a look at the [production_example.py](https://github.com/winedarksea/A
* this can be done by adjusting `frequency` and `aggfunc` but is probably best done before passing data into AutoTS.
* It will be faster if NaN's are already filled. If a search for optimal NaN fill method is not required, then fill any NaN with a satisfactory method before passing to class.
* Set `runtime_weighting` in `metric_weighting` to a higher value. This will guide the search towards faster models, although it may come at the expense of accuracy.
* Memory shortage is the most common cause of random process/kernel crashes. Try testing a data subset and using a different model list if issues occur. Please also report crashes if found to be linked to a specific set of model parameters (not AutoTS parameters but the underlying forecasting model params). Also crashes vary significantly by setup such as underlying linpack/blas so seeing crash differences between environments can be expected.

## How to Contribute:
* Give feedback on where you find the documentation confusing
Expand Down
10 changes: 6 additions & 4 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@
* Forecasts are desired for the future immediately following the most recent data.
* trimmed_mean to AverageValueNaive

# 0.6.10 🇺🇦 🇺🇦 🇺🇦
* assorted minor bug fixes
* bug in mosaic model selection fixed
* added crosshair_lite mosaic
# 0.6.11 🇺🇦 🇺🇦 🇺🇦
* bug fixes
* continually trying to keep up with the Pandas maintainers who are breaking stuff for no good reasonable
* updated RollingMeanTransformer and RegressionFilter, RegressionFilter should now be less memory intensive
* EIA data call to load_live_daily
* horizontal_ensemble_validation arg for more complete validation on these ensembles

### Unstable Upstream Pacakges (those that are frequently broken by maintainers)
* Pytorch-Forecasting
Expand Down
6 changes: 4 additions & 2 deletions autots/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
https://github.com/winedarksea/AutoTS
"""

from autots.datasets import (
load_hourly,
load_daily,
Expand All @@ -21,12 +22,12 @@
from autots.tools.transform import GeneralTransformer, RandomTransform
from autots.tools.shaping import long_to_wide, infer_frequency
from autots.tools.regressor import create_lagged_regressor, create_regressor
from autots.evaluator.auto_model import model_forecast
from autots.evaluator.auto_model import model_forecast, ModelPrediction
from autots.evaluator.anomaly_detector import AnomalyDetector, HolidayDetector
from autots.models.cassandra import Cassandra


__version__ = '0.6.10'
__version__ = '0.6.11'

TransformTS = GeneralTransformer

Expand Down Expand Up @@ -54,4 +55,5 @@
'HolidayDetector',
'Cassandra',
'infer_frequency',
'ModelPrediction',
]
1 change: 1 addition & 0 deletions autots/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Tools for Importing Sample Data
"""

from autots.datasets._base import (
load_daily,
load_live_daily,
Expand Down
97 changes: 96 additions & 1 deletion autots/datasets/_base.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""Loading example datasets."""

from os.path import dirname, join
import time
import datetime
import io
import json
import numpy as np
import pandas as pd

Expand Down Expand Up @@ -236,7 +238,9 @@ def load_live_daily(
wikipedia_pages: list = ['Microsoft_Office', "List_of_highest-grossing_films"],
wiki_language: str = "en",
weather_event_types=["%28Z%29+Winter+Weather", "%28Z%29+Winter+Storm"],
caiso_query: str = "ENE_SLRS",
caiso_query: str = None,
eia_key: str = None,
eia_respondents: list = ["MISO", "PJM", "TVA", "US48"],
timeout: float = 300.05,
sleep_seconds: int = 2,
**kwargs,
Expand Down Expand Up @@ -498,6 +502,7 @@ def load_live_daily(
except Exception as e:
print(f"pytrends data failed: {repr(e)}")

# this was kinda broken last I checked
if caiso_query is not None:
try:
n_chunks = (364 * weather_years) / 30
Expand Down Expand Up @@ -537,6 +542,96 @@ def load_live_daily(
except Exception as e:
print(f"caiso download failed with error: {repr(e)}")

if eia_key is not None and eia_respondents is not None:
api_url = 'https://api.eia.gov/v2/electricity/rto/daily-region-data/data/' # ?api_key={eia-key}
for respond in eia_respondents:
try:
params = {
"frequency": "daily",
"data": ["value"],
"facets": {
"type": ["D"],
"respondent": [respond],
"timezone": ["Eastern"],
},
"start": None, # "start": "2018-06-30",
"end": None, # "end": "2023-11-01",
"sort": [{"column": "period", "direction": "desc"}],
"offset": 0,
"length": 5000,
}

res = s.get(
api_url,
params={
"api_key": eia_key,
},
headers={"X-Params": json.dumps(params)},
)
eia_df = pd.json_normalize(res.json()['response']['data'])
eia_df['datetime'] = pd.to_datetime(eia_df['period'])
eia_df['value'] = eia_df['value'].astype('float')
eia_df['ID'] = (
eia_df['respondent']
+ "_"
+ eia_df['type']
+ "_"
+ eia_df['timezone']
)
temp = eia_df.pivot(columns='ID', index='datetime', values='value')
dataset_lists.append(temp)
time.sleep(sleep_seconds)
except Exception as e:
print(f"eia download failed with error {repr(e)}")
try:
api_url_mix = (
"https://api.eia.gov/v2/electricity/rto/daily-fuel-type-data/data/"
)
params = {
"frequency": "daily",
"data": ["value"],
"facets": {
"respondent": [respond],
"timezone": ["Eastern"],
"fueltype": [
"COL",
"NG",
"NUC",
"SUN",
"WAT",
"WND",
],
},
"start": None,
"end": None,
"sort": [{"column": "period", "direction": "desc"}],
"offset": 0,
"length": 5000,
}
res = s.get(
api_url_mix,
params={
"api_key": eia_key,
},
headers={"X-Params": json.dumps(params)},
)
eia_df = pd.json_normalize(res.json()['response']['data'])
eia_df['datetime'] = pd.to_datetime(eia_df['period'])
eia_df['value'] = eia_df['value'].astype('float')
eia_df['type-name'] = eia_df['type-name'].str.replace(" ", "_")
eia_df['ID'] = (
eia_df['respondent']
+ "_"
+ eia_df['type-name']
+ "_"
+ eia_df['timezone']
)
temp = eia_df.pivot(columns='ID', index='datetime', values='value')
dataset_lists.append(temp)
time.sleep(1)
except Exception as e:
print(f"eia download failed with error {repr(e)}")

### End of data download
if len(dataset_lists) < 1:
raise ValueError("No data successfully downloaded!")
Expand Down
1 change: 1 addition & 0 deletions autots/datasets/fred.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
requires API key from FRED
and pip install fredapi
"""

import time
import pandas as pd

Expand Down
6 changes: 3 additions & 3 deletions autots/evaluator/anomaly_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,9 +318,9 @@ def detect(self, df):
splash_threshold=self.splash_threshold,
threshold=self.threshold,
actuals=df if self.output != "univariate" else None,
anomaly_scores=self.anomaly_model.scores
if self.output != "univariate"
else None,
anomaly_scores=(
self.anomaly_model.scores if self.output != "univariate" else None
),
use_dayofmonth_holidays=self.use_dayofmonth_holidays,
use_wkdom_holidays=self.use_wkdom_holidays,
use_wkdeom_holidays=self.use_wkdeom_holidays,
Expand Down
81 changes: 56 additions & 25 deletions autots/evaluator/auto_model.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Mid-level helper functions for AutoTS."""

import sys
import gc
import traceback as tb
Expand Down Expand Up @@ -697,10 +698,12 @@ def ModelMonster(
n_jobs=n_jobs,
**parameters,
)
else:
elif model == "":
raise AttributeError(
("Model String '{}' not a recognized model type").format(model)
("Model name is empty. Likely this means AutoTS has not been fit.")
)
else:
raise AttributeError((f"Model String '{model}' not a recognized model type"))


class ModelPrediction(ModelObject):
Expand Down Expand Up @@ -768,11 +771,17 @@ def __init__(
self.force_gc = force_gc
# handle still in JSON form
if isinstance(transformation_dict, str):
self.transformation_dict = json.loads(transformation_dict)
if transformation_dict == "":
self.transformation_dict = {}
else:
self.transformation_dict = json.loads(transformation_dict)
else:
self.transformation_dict = transformation_dict
if isinstance(parameter_dict, str):
self.parameter_dict = json.loads(parameter_dict)
if parameter_dict == "":
self.parameter_dict = {}
else:
self.parameter_dict = json.loads(parameter_dict)
else:
self.parameter_dict = parameter_dict
if model_str == "PreprocessingRegression":
Expand All @@ -786,26 +795,31 @@ def __init__(
self.transformation_dict = {}
self.transformer_object = GeneralTransformer(
**self.transformation_dict,
n_jobs=n_jobs,
holiday_country=holiday_country,
n_jobs=self.n_jobs,
holiday_country=self.holiday_country,
verbose=self.verbose,
)
self.model = ModelMonster(
model_str,
parameters=self.parameter_dict,
frequency=frequency,
prediction_interval=prediction_interval,
holiday_country=holiday_country,
random_seed=random_seed,
verbose=verbose,
forecast_length=forecast_length,
n_jobs=n_jobs,
random_seed=self.random_seed,
)
self.name = "ModelPrediction"
self._fit_complete = False

def fit(self, df, future_regressor=None):
self.df = df
if self.frequency == "infer":
self.inferred_frequency = infer_frequency(df)
else:
self.inferred_frequency = self.frequency
self.model = ModelMonster(
self.model_str,
parameters=self.parameter_dict,
frequency=self.inferred_frequency,
prediction_interval=self.prediction_interval,
holiday_country=self.holiday_country,
random_seed=self.random_seed,
verbose=self.verbose,
forecast_length=self.forecast_length,
n_jobs=self.n_jobs,
)
transformationStartTime = datetime.datetime.now()
if self.current_model_file is not None:
try:
Expand Down Expand Up @@ -1266,9 +1280,15 @@ def model_forecast(
full_model_created = False # make at least one full model, horziontal only
# handle JSON inputs of the dicts
if isinstance(model_param_dict, str):
model_param_dict = json.loads(model_param_dict)
if model_param_dict == "":
model_param_dict = {}
else:
model_param_dict = json.loads(model_param_dict)
if isinstance(model_transform_dict, str):
model_transform_dict = json.loads(model_transform_dict)
if model_transform_dict == "":
model_transform_dict = {}
else:
model_transform_dict = json.loads(model_transform_dict)
if frequency == "infer":
frequency = infer_frequency(df_train)
# handle "auto" n_jobs to an integer of local count
Expand Down Expand Up @@ -1610,6 +1630,7 @@ def virtual_memory():
cumsum_A=cumsum_A,
diff_A=diff_A,
last_of_array=last_of_array,
column_names=df_train.columns,
)
if validation_round >= 1 and verbose > 0:
round_smape = model_error.avg_metrics['smape'].round(2)
Expand All @@ -1626,16 +1647,26 @@ def virtual_memory():
print(validation_accuracy_print)
else:
print(validation_accuracy_print)
model_id = create_model_id(
df_forecast.model_name,
df_forecast.model_parameters,
df_forecast.transformation_parameters,
)
# for horizontal ensemble, use requested ID and params
if ensemble_input == 2:
model_id = create_model_id(
model_str, parameter_dict, transformation_dict
)
# it's already json
deposit_params = row['ModelParameters']
else:
# for non horizontal, recreate based on what model actually used (some change)
model_id = create_model_id(
df_forecast.model_name,
df_forecast.model_parameters,
df_forecast.transformation_parameters,
)
deposit_params = json.dumps(df_forecast.model_parameters)
result = pd.DataFrame(
{
'ID': model_id,
'Model': df_forecast.model_name,
'ModelParameters': json.dumps(df_forecast.model_parameters),
'ModelParameters': deposit_params,
'TransformationParameters': json.dumps(
df_forecast.transformation_parameters
),
Expand Down
Loading

0 comments on commit a2a464c

Please sign in to comment.