# Measuring Drift
<!--- @wandbcode{decisionopt-nb4b} -->

In [None]:
import os
import pandas as pd
from matplotlib import pyplot as plt
import wandb
from pathlib import Path

plt.style.use('fivethirtyeight')
os.environ["WANDB_QUIET"] = "true"
os.environ["WANDB_NOTEBOOK_NAME"] = "measure_drift.ipynb"
wandb_project = "bimbo_drift_check"

In [None]:
# Let's load the data from a W&B artifact
with wandb.init(project=wandb_project) as run:
    artifact = run.use_artifact(
        "danbecker/edu-decision-opt-course/course-data:v0"
    )
    data_dir = Path(artifact.download())

all_data = pd.read_csv(data_dir/'bimbo/train.csv')
all_data.head()

In [None]:
product_data = all_data.query("Producto_ID == 1238")
product_data.head()

In [None]:
product_data.Semana.value_counts().sort_index()

In [None]:
store_product_group_cols = ['Agencia_ID', 'Canal_ID', 'Ruta_SAK', 'Cliente_ID']
store_product_value_counts = product_data.groupby(store_product_group_cols).size()
store_product_value_counts.describe()

In [None]:
full_filled_cases = (store_product_value_counts == 7)
full_filled_data = product_data.set_index(store_product_group_cols).loc[full_filled_cases]
full_filled_data.reset_index(inplace=True)
original_rows = product_data.shape[0]
new_rows = full_filled_data.shape[0]
frac_data_dropped = 1 - (new_rows / original_rows)
print(f'Went from {original_rows} to {new_rows} rows, removing {(frac_data_dropped*100):.1f}% of rows')

In [None]:
full_filled_data.groupby(['Semana']).Demanda_uni_equil.mean().plot();

In [None]:
run = wandb.init(project=wandb_project, job_type="log_stats")
for semana, mean_demand in full_filled_data.groupby(['Semana']).Demanda_uni_equil.mean().iteritems():
    wandb.log({'week': semana, 'mean_demand': mean_demand})
run.finish()

In [None]:
from utils.time_series_split_modeling import make_models

models, encoder = make_models(full_filled_data)