In [None]:
from pathlib import Path

import pandas as pd
import numpy as np
from sklearn.metrics import (
    f1_score,
    precision_recall_fscore_support,
    roc_auc_score,
    roc_curve,
    auc,
    precision_recall_curve,
    confusion_matrix
)
from catboost import Pool, CatBoostClassifier
import shap
shap.initjs()
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from var import DATA_OUT, IMAGE_OUT, FORECAST_HOURS_IN_ADVANCE

In [None]:
df = pd.read_pickle(Path(DATA_OUT, 'df_dataset.pickle'))

In [None]:
X = df[
    [
        'f_107_adj',
        'solar_zenith_angle',
        'newell',
        # 'bz',
        # 'vx',
        # 'rho',
    ]
].copy()

y = df[f'tid_within_{FORECAST_HOURS_IN_ADVANCE}h'].copy()

In [None]:
static_params = {
    "eval_metric": 'F1',
    "random_seed": 42,
    "auto_class_weights": "SqrtBalanced",
    "od_type": "Iter",
    "use_best_model": True,
    "has_time": True,
    "od_wait": 200,
}

In [None]:
X_train, y_train = X.loc['2014':'2021-06'].copy(), y.loc['2014':'2021-06'].copy()
X_test, y_test = X.loc['2021-07':].copy(), y.loc['2021-07':].copy()

In [None]:
model = CatBoostClassifier(
    loss_function='Logloss',
    iterations=2_000,
    **static_params,
)

model = model.fit(
    X_train,
    y_train,
    eval_set=(X_test, y_test),
    silent=True,
)

In [None]:
y_pred = model.predict(X_test)

In [None]:
p, r, f, _ = precision_recall_fscore_support(y_test, y_pred)

In [None]:
print(f'{p[1].round(3)} precision | {r[1].round(3)} recall | {f[1].round(3)} F1-score')

In [None]:
shap_values = model.get_feature_importance(
    Pool(X_test, label=y_test),
    type="ShapValues",
)

shap_values = shap_values[:,:-1]

In [None]:
shap_values.shape

In [None]:
shap.summary_plot(
    shap_values,
    X_test,
)

In [None]:
explainer_dict[i] = shap.TreeExplainer(model)