# Time Series Test Plan Demo

In [1]:
# This environment variable can be set to silence the summarized output of test results, for testing purposes.
#
# %env VM_SUMMARIZE_TEST_PLANS = False

import pandas as pd

In [2]:
import dotenv
dotenv.load_dotenv()

import validmind as vm

vm.init(  api_host = "http://localhost:3000/api/v1/tracking",
  project = "clhhzo21s006wl9rl0swhv40h")

2023-07-21 17:10:57,311 - INFO(validmind.api_client): Connected to ValidMind. Project: Stock Price Prediction Model - Initial Validation (clhhzo21s006wl9rl0swhv40h)


In [3]:
df = pd.read_csv("../datasets/lending_club_loan_rates.csv")

In [4]:
df.head()

Unnamed: 0,DATE,loan_rate_A,loan_rate_B,loan_rate_C,loan_rate_D
0,2007-08-01,7.766667,9.497692,10.9475,12.267
1,2007-09-01,7.841429,9.276667,10.829167,12.436667
2,2007-10-01,7.83,9.433333,10.825926,12.737368
3,2007-11-01,7.779091,9.467778,10.967037,12.609444
4,2007-12-01,7.695833,9.3875,10.805,12.478889


In [5]:
df = df.rename(columns={'Unnamed: 0': 'DATE'})
df = df.set_index(pd.to_datetime(df['DATE']))
df.drop(["DATE"], axis=1, inplace=True)
df

Unnamed: 0_level_0,loan_rate_A,loan_rate_B,loan_rate_C,loan_rate_D
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2007-08-01,7.766667,9.497692,10.947500,12.267000
2007-09-01,7.841429,9.276667,10.829167,12.436667
2007-10-01,7.830000,9.433333,10.825926,12.737368
2007-11-01,7.779091,9.467778,10.967037,12.609444
2007-12-01,7.695833,9.387500,10.805000,12.478889
...,...,...,...,...
2018-08-01,7.218997,11.161286,15.142618,19.857603
2018-09-01,7.201281,11.191918,15.139769,19.748459
2018-10-01,7.228498,11.208418,15.129105,19.792163
2018-11-01,7.536897,11.390483,15.126869,19.632697


In [6]:
vm_dataset = vm.init_dataset(
    dataset=df
)

2023-07-21 17:10:57,334 - INFO(validmind.client): Pandas dataset detected. Initializing VM Dataset instance...


In [7]:
vm.test_plans.list_plans()

ID,Name,Description
binary_classifier_metrics,BinaryClassifierMetrics,Test plan for sklearn classifier metrics
binary_classifier_validation,BinaryClassifierPerformance,Test plan for sklearn classifier models
binary_classifier_model_diagnosis,BinaryClassifierDiagnosis,Test plan for sklearn classifier model diagnosis tests
tabular_dataset_description,TabularDatasetDescription,Test plan to extract metadata and descriptive  statistics from a tabular dataset
tabular_data_quality,TabularDataQuality,Test plan for data quality on tabular datasets
time_series_data_quality,TimeSeriesDataQuality,Test plan for data quality on time series datasets
time_series_univariate,TimeSeriesUnivariate,Test plan to perform time series univariate analysis.
time_series_multivariate,TimeSeriesMultivariate,Test plan to perform time series multivariate analysis.
time_series_forecast,TimeSeriesForecast,Test plan to perform time series forecast tests.
time_series_sensitivity,TimeSeriesSensitivity,Test plan to perform time series forecast tests.


In [8]:
vm.test_plans.describe_plan("time_series_univariate")

ID,Name,Description,Required Context,Tests
time_series_univariate,TimeSeriesUnivariate,Test plan to perform time series univariate analysis.,['dataset'],TimeSeriesLinePlot (Metric) TimeSeriesHistogram (Metric) ACFandPACFPlot (Metric) SeasonalDecompose (Metric) AutoSeasonality (Metric) AutoStationarity (Metric) RollingStatsPlot (Metric) AutoAR (Metric) AutoMA (Metric)


In [9]:
loan_rate_columns = ["loan_rate_A", "loan_rate_B", "loan_rate_C", "loan_rate_D"]

test_plan_config = {
    "columns": loan_rate_columns,
    "seasonal_decompose": {"seasonal_model": "additive"},
    "auto_seasonality": {"min_period": 1, "max_period": 4},
    "auto_stationarity": {"max_order": 5, "threshold": 0.05},
    "rolling_stats_plot": {"window_size": 12},
    "auto_ar": {"max_ar_order": 3},
    "auto_ma": {"max_ma_order": 3},
}

plan = vm.run_test_plan("time_series_univariate", config=test_plan_config, dataset=vm_dataset)

HBox(children=(Label(value='Running test plan...'), IntProgress(value=0, max=18)))

2023-07-21 17:10:58,140 - INFO(validmind.tests.data_validation.SeasonalDecompose): Frequency of loan_rate_A: MS
2023-07-21 17:10:58,140 - INFO(validmind.tests.data_validation.SeasonalDecompose): Frequency of loan_rate_A: MS
2023-07-21 17:10:58,415 - INFO(validmind.tests.data_validation.SeasonalDecompose): Frequency of loan_rate_B: MS
2023-07-21 17:10:58,415 - INFO(validmind.tests.data_validation.SeasonalDecompose): Frequency of loan_rate_B: MS
2023-07-21 17:10:58,571 - INFO(validmind.tests.data_validation.SeasonalDecompose): Frequency of loan_rate_C: MS
2023-07-21 17:10:58,571 - INFO(validmind.tests.data_validation.SeasonalDecompose): Frequency of loan_rate_C: MS
2023-07-21 17:10:58,809 - INFO(validmind.tests.data_validation.SeasonalDecompose): Frequency of loan_rate_D: MS
2023-07-21 17:10:58,809 - INFO(validmind.tests.data_validation.SeasonalDecompose): Frequency of loan_rate_D: MS
No artists with labels found to put in legend.  Note that artists whose label start with an underscore a

VBox(children=(HTML(value='<h2>Results for <i>Time Series Univariate</i> Test Plan:</h2><hr>'), HTML(value='<d…