<a href="https://colab.research.google.com/github/zee404-code/DartsTimeSeries/blob/main/dart07.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install darts

In [None]:
from darts.datasets import AirPassengersDataset     
data = AirPassengersDataset().load()
data.plot()

In [3]:
from darts.utils.statistics import check_seasonality, extract_trend_and_seasonality,remove_seasonality, remove_trend, stationarity_test_adf, stationarity_test_kpss, stationarity_tests
from darts.utils.model_selection import train_test_split
from darts.utils.utils import ModelMode, SeasonalityMode
from darts.models import AutoARIMA, ARIMA, VARIMA

In [None]:
check1 = stationarity_tests(data, 0.05, 0.05)
print(check1)
#stationary is based on data. Depending on the stationarity, forecasting models are used
#remove non-stationary attributes, apply ARIMA
#if seasonality exists, use SARIMA (seasonal ARIMA)
#VARIMA is used for multivariate data
#AutoARIMA is for optimal p,d,q values

#false means data isnt stationary, true means its stationary
#stationarity_test is only being used for checking trends
#check_seasonality is being used for checking seasonality

for m in range(2, 25):
    is_seasonal, period = check_seasonality(data, m=m, alpha=0.05)
    print('running {}'.format(m))
    if is_seasonal:
        print("There is seasonality of order {}.".format(period))

###current data has both seasonality and trend

In [5]:
train, test = train_test_split(data, test_size=0.25, axis=0, input_size=0, horizon=0, vertical_split_type='simple', lazy=False)

In [None]:
#for comparison. How the original data predicts vs how stationary data predicts
### original data
model = ARIMA()
model.fit(train)
pred = model.predict(len(test))
train.plot(label = 'train')
test.plot(label = 'test')
pred.plot(label = 'pred')

In [9]:
def vis(train3, test3, pred3):
  train3.plot(label = 'train')
  test3.plot(label = 'test')
  pred3.plot(label = 'pred')

#visualization method

In [None]:
if stationarity_tests(data, 0.05, 0.05): 
  print('this runs 1')
  if data.is_univariate:
    print('this runs 2')
    train, test = train_test_split(data, test_size=0.25, axis=0, input_size=0, horizon=0, vertical_split_type='simple', lazy=False)
    model = ARIMA()
    model.fit(train)
    pred = model.predict(len(test))
    vis(train, test, pred)
  else:
    print('this runs 3')
    train, test = train_test_split(data, test_size=0.25, axis=0, input_size=0, horizon=0, vertical_split_type='simple', lazy=False)
    model = VARIMA()
    model.fit(train)
    pred = model.predict(len(test))
    vis(train, test, pred)
else:
  print('this runs 4')
  data2, data3 = extract_trend_and_seasonality(data, freq=12, model=ModelMode.MULTIPLICATIVE, method='naive')
  #either data has trend, seasonality or both
  if (not stationarity_tests(data2, 0.05, 0.05)) and (not stationarity_tests(data3, 0.05, 0.05)) :
    print('this runs 5')
    #means trend and stationary both exists, remove both
    #data4 = remove_trend(data, model=ModelMode.MULTIPLICATIVE, method='naive').remove_seasonality(data, freq=None, model=SeasonalityMode.MULTIPLICATIVE, method='naive')
    data4 = remove_seasonality(data, freq=None, model=SeasonalityMode.ADDITIVE, method='STL')
    # method = 'naive' with model = 'multiplicative' if only seasonal
    # method = 'STL' with model = 'additive' if both seasonal and trend
  elif not stationarity_tests(data2, 0.05, 0.05):
    print('this runs 6')
    #means only trend exists, remove it
    data4 = remove_trend(data, model=ModelMode.MULTIPLICATIVE, method='naive')
  else:
    print('this runs 7')
    #means only seasonality exists, remove it
    data4 = remove_seasonality(data, freq=None, model=SeasonalityMode.MULTIPLICATIVE, method='naive')

print('this runs 9')
train2, test2 = train_test_split(data4, test_size=0.25, axis=0, input_size=0, horizon=0, vertical_split_type='simple', lazy=False)
if data.is_univariate:
  print('this runs 10')
  model2 = ARIMA()
else:
  print('this runs 11')
  model2 = VARIMA()

print('this runs 12')
model2.fit(train2)
pred2 = model2.predict(len(test2))
vis(train2, test2, pred2)


In [None]:
### confirmation test that data is stationary
check = stationarity_tests(data4, 0.05, 0.05)
print(check)

In [None]:
######## box-cox, differencing and pipelines #################
# box-cox removes both seasonality and trend. It works on 
# box-cox -> data show variation that increases or decreases with the level of the series. log and power transformations
# lambda = 0 -> log transformations are used
# lambda not equal to 0, power transformations are used followed by simple scaling.
# differencing only removes seasonality. Isn't removing trend.
for m in range(2, 25):
    is_seasonal, period = check_seasonality(data, m=m, alpha=0.05)
    if is_seasonal:
        print("There is seasonality of order {}.".format(period))

In [None]:
dat = data.diff(periods = 12) #12 given that the seasonality is of order 12
# n = 2 is creating seasonality of order 23. n is differencing steps
for m in range(2, 25):
    is_seasonal, period = check_seasonality(dat, m=m, alpha=0.05)
    print('running {}'.format(m))
    if is_seasonal:
        print("There is seasonality of order {}.".format(period))

#seasonality removed through differencing. check if dat is stationary.
c = stationarity_tests(dat, 0.05, 0.05)
print(c) #it isnt. check for trend.

dat3 = remove_trend(dat, model=ModelMode.ADDITIVE, method='naive')
c2 = stationarity_tests(dat3, 0.05, 0.05)
print(c2) #it is. both trend and seasonality have been removed. 

In [None]:
##### box-cox
from darts.dataprocessing.transformers import BoxCox
transformer = BoxCox(name='BoxCox',lmbda = None)
series_transformed = transformer.fit_transform(data)
print(series_transformed.head())


#when lmbda is set to None, optimum lmbda values between range of -5 to 5 are tested to find optimum value for dataset. Optim-method could be changed between mle and pearsonr. optim-method is used for finding lmbda values

In [None]:
#final verdict is that use stationary tests to detect trends. use check_seasonality to detect seasonality.
#use remove trends. use either remove seasonality or used diff(periods = order of seasonality)

from darts.dataprocessing.pipeline import Pipeline
pipeline = Pipeline([BoxCox(name='BoxCox', lmbda=None, optim_method='mle', n_jobs=1, verbose=True)],copy=False, verbose=True, n_jobs=None)
# (1) remove trend, (2) remove seasonality
# (3) check stationarity and seasonality
series_transformed = pipeline.fit_transform(data)