In [1]:
from pathlib import Path

import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn.calibration import calibration_curve
from sklearn.metrics import (
    f1_score,
    precision_recall_fscore_support,
    roc_auc_score,
    roc_curve,
    auc,
    precision_recall_curve,
    confusion_matrix,
)
from catboost import Pool
import mlflow
from mlflow.types.schema import Schema, ColSpec
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt

from var import DATA_OUT, IMAGE_OUT, FORECAST_HOURS_IN_ADVANCE
from src import ML_SERVER_URI, EXPERIMENT_NAME
from src.model import (
    get_or_create_experiment,
    start_crossvalidated_run,
    evaluate_crossvalidated_metrics,
)

# MLFlow web server URI
mlflow.set_tracking_uri(ML_SERVER_URI)

In [2]:
df = pd.read_pickle(Path(DATA_OUT, 'df_dataset.pickle'))

In [3]:
X = df[
    [
        *[col_ for col_ in df.columns if col_.startswith('ie_')],
        *[col_ for col_ in df.columns if col_.startswith('iu_')],
        'hf',
        'hf_mav_2h',
        'f_107_adj',
        'hp_30',
        'smr',
        'solar_zenith_angle',
        'newell',
        *[col_ for col_ in df.columns if col_.startswith('local_warning_')],
        *[col_ for col_ in df.columns if col_.startswith('spectral_contribution_')],
        *[col_ for col_ in df.columns if col_.startswith('azimuth_')],
        *[col_ for col_ in df.columns if col_.startswith('velocity_')],
    ]
].copy()

y = df[f'tid_within_{FORECAST_HOURS_IN_ADVANCE}h'].copy()

In [4]:
cat_features = [
    'ie_variation',
    'iu_variation',
    *[col_ for col_ in df.columns if col_.startswith('local_warning_')],
]

static_params = {
    "eval_metric": 'F1',
    "random_seed": 42,
    # "auto_class_weights": "SqrtBalanced"
    "cat_features": cat_features,
    "od_type": "Iter",
    "use_best_model": True,
    "has_time": True,
    "od_wait": 200,
}

In [5]:
ts_cv = TimeSeriesSplit(n_splits=5)

## Retrieve cross-validated model

In [6]:
mlflow.set_experiment(
    experiment_id=get_or_create_experiment(EXPERIMENT_NAME)
);

### Pre-trained model (retrieved by `run_id`)

In [7]:
cat_model, (f1s, prs, rcs) = start_crossvalidated_run(
    X=X,
    y=y,
    time_series_cross_validator=ts_cv,
    run_id='ed451d15e8094aa593f44d07bc77696d',
)

In [8]:
weights = [tr_idx.shape[0] / X.shape[0] for tr_idx, _ in ts_cv.split(X)]

evaluate_crossvalidated_metrics(
    metrics={
        'F1-score': f1s,
        'Precision': prs,
        'Recall': rcs,
    },
    weights=weights,
)

Achieved F1-score (weighted):	0.58 ± 0.03
Achieved Precision (weighted):	0.63 ± 0.09
Achieved Recall (weighted):	0.55 ± 0.06


In [9]:
evaluate_crossvalidated_metrics(
    metrics={
        'F1-score': f1s,
        'Precision': prs,
        'Recall': rcs,
    },
    weights=None,
)

Achieved F1-score:	0.58 ± 0.03
Achieved Precision:	0.66 ± 0.09
Achieved Recall:	0.53 ± 0.06


In [10]:
train_idx, test_idx = [(tr_idx, ts_idx) for (tr_idx, ts_idx) in ts_cv.split(X)][-1]

X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

y_pred = cat_model.predict(X_test)

## Evaluation of classification

In [19]:
df_eval = X_test.copy(deep=True)
df_eval['true'] = y_test
df_eval['pred'] = cat_model.predict(X_test)
df_eval['score'] = cat_model.predict_proba(X_test)[:,1]

In [20]:
f1_score(
    df_eval['true'],
    df_eval['pred'],
).round(3)

0.558

In [42]:
px.histogram(
    data_frame=df_eval,
    x=['score_cal'],
    color='true',
    barmode='overlay',
)

### ROC curve

In [46]:
fpr, tpr, thresholds = roc_curve(
    df_eval['true'],
    df_eval['score_cal'],
)

In [47]:
roc_auc = roc_auc_score(
    df_eval['true'],
    df_eval['score_cal'],
)

In [48]:
fig = px.scatter(x=fpr, y=tpr)

fig.update_layout(
    height=700,
    width=800,
    autosize=False,
    shapes=[
        dict(
            type='line',
            x0=0,
            y0=0,
            x1=1,
            y1=1,
            line=dict(color='navy', width=2, dash='dash'),
        )
    ],
    title=f'ROC Curve (ROC-AUC: <b>{roc_auc:.2f}</b>)',
    xaxis=dict(title='False Positive Rate'),
    yaxis=dict(title='True Positive Rate'),
    template='ggplot2',
)

# fig.write_html(
#     Path(IMAGE_OUT,f'plot_roc_curve.html')
# )

fig.show()

### PR curve

In [68]:
p, r, t = precision_recall_curve(
    df_eval['true'],
    df_eval['score_cal'],
    drop_intermediate=True
)

In [69]:
pr_auc = auc(r, p)

In [70]:
f1_scores = 2 * (p*r) / (p+r)
thr_f1_max = t[np.argmax(f1_scores)]

print(
    f'{np.round(thr_f1_max, 3)} is the threshold that maximises F1-score to {np.round(f1_scores[np.argmax(f1_scores)], 3)}'
)

0.577 is the threshold that maximises F1-score to 0.568


In [84]:
np.where(np.logical_and(p>=0.80, p<0.8276))

(array([102], dtype=int64),)

In [85]:
idx = 102
thr_p_80 = t[idx]

print(
    f'{thr_p_80.round(3)} is the threshold that gives a precision of {p[idx].round(3)} (recall: {r[idx].round(3)} | F1-score: {f1_scores[idx].round(3)})'
)

0.856 is the threshold that gives a precision of 0.828 (recall: 0.332 | F1-score: 0.474)


In [90]:
np.where(np.logical_and(r>=0.60, r<0.62))

(array([78, 79], dtype=int64),)

In [91]:
idx = 78
thr_r_60 = t[idx]

print(
    f'{thr_r_60.round(3)} is the threshold that gives a recall of {r[idx].round(3)} (precision: {p[idx].round(3)} | F1-score: {f1_scores[idx].round(3)})'
)

0.462 is the threshold that gives a recall of 0.614 (precision: 0.513 | F1-score: 0.559)


In [92]:
thr_f1_max.round(3), thr_p_80.round(3), thr_r_60.round(3)

(0.577, 0.856, 0.462)

In [58]:
df_eval['pred_f1_max'] = np.where(
    df_eval['score'].gt(thr_f1_max),
    1,
    0,
)

In [59]:
df_eval['pred_p_80'] = np.where(
    df_eval['score'].gt(thr_p_80),
    1,
    0,
)

In [60]:
df_eval['pred_r_60'] = np.where(
    df_eval['score'].gt(thr_r_60),
    1,
    0,
)

In [None]:
# df_eval.to_pickle(Path(DATA_OUT, 'df_eval.pickle'))

In [93]:
fig = px.scatter(x=r, y=p)

fig.add_shape(
    type='line',
    x0=0,
    y0=1,
    x1=1,
    y1=0,
    line=dict(color='navy', width=2, dash='dash'),
)

fig.add_shape(
    type='line',
    x0=r[idx],
    y0=0,
    x1=r[idx],
    y1=p[idx],
    line=dict(color='red', width=2, dash='dash'),
)

fig.add_shape(
    type='line',
    x0=0,
    y0=p[idx],
    x1=r[idx],
    y1=p[idx],
    line=dict(color='red', width=2, dash='dash'),
)

DOT_SIZE = 15
fig.add_trace(
    go.Scatter(
        x=[r[idx]],
        y=[p[idx]],
        mode='markers',
        marker=dict(color='red', size=DOT_SIZE),
        showlegend=False,
    )
)

fig.update_layout(
    height=700,
    width=800,
    autosize=False,
    title=f'PR Curve (PR-AUC: <b>{pr_auc:.2f}</b>)',
    xaxis=dict(title='Recall'),
    yaxis=dict(title='Precision'),
    template='ggplot2',
)

# fig.write_html(
#     Path(IMAGE_OUT,f'plot_pr_curve.html')
# )

fig.show()

## Calibration curve

In [37]:
prob_true, prob_pred = calibration_curve(
    df_eval['true'],
    df_eval['score_cal'],
    n_bins=10,
)

In [38]:
fig = px.line(
    x=prob_pred,
    y=prob_true,
    markers=True,
)

fig.add_shape(
    type='line',
    x0=0,
    y0=0,
    x1=1,
    y1=1,
    line=dict(color='red', width=2, dash='dash'),
    name='Perfectly calibrated',
    showlegend=True,
)

fig.add_trace(
    go.Histogram(
        x=df_eval['score_cal'],
        yaxis='y2',
        opacity=0.3,
        showlegend=False,
        nbinsx=25,
    )
)

fig.update_layout(
    xaxis_title='Mean predicted probability',
    yaxis_title='Fraction of positives',
    yaxis2=dict(
        title='Count of samples',
        overlaying='y',
        side='right'
    )
)

fig.show()

In [28]:
from sklearn.metrics import brier_score_loss

In [40]:
brier_score_loss(
    y_test,
    df_eval['score_cal'],
).round(3)

0.085

## Calibration

In [30]:
from sklearn.calibration import CalibratedClassifierCV

In [36]:
df_eval['score_cal'] = CalibratedClassifierCV(
    estimator=cat_model,
    method='isotonic',
    cv='prefit',
).fit(
    X_train, y_train
).predict_proba(
    X_test
)[:,1]