In [None]:
import pandas as pd
from pyprojroot import here
from tqdm import tqdm

from lightgbm import LGBMClassifier

In [None]:
data = pd.read_csv(here() / 'data/interim/melted.csv', index_col='Datetime', parse_dates=True)

In [None]:
ui = data.index.unique()

In [None]:
test_indices = ui[ui >= pd.Timestamp('2019-01-01')]

In [None]:
results = list()

for i in tqdm(test_indices):
    train_data = data[data.index < i]
    test_data = data[data.index == i]

    X_train = train_data.drop(columns='Crowding')
    y_train = train_data['Crowding']

    X_test = test_data.drop(columns='Crowding')

    model = LGBMClassifier(class_weight='balanced', verbose=-1)
    model.fit(X_train, y_train)
    y_prob = model.predict_proba(X_test)

    X_test['y_pred'] = y_prob[:,1]
    results.append(X_test[['Origin', 'Subgroup', 'y_pred']])

In [None]:
df = pd.concat(results)

In [None]:
conversion = {0: 'bed',
              1: 'med',
              2: 'sur',
              3: 'cri'
              }

df.Subgroup = df.Subgroup.replace(conversion)

In [None]:
for s in df.Subgroup.unique():
    for o in df.Origin.unique():
        d = df[(df.Subgroup==s) & (df.Origin==o)]
        d['y_pred'].to_csv(here() / f'data/processed/matrices/prob/{s}-melt-{o}-0-0.csv')