# Non-Operational Forecasts: Testing the Baselines

In [1]:
### prerequisites ###
# numpy, pandas, scipy, xgboost, sklearn, matplotlib (optional)

import numpy as np
import pandas as pd
import sys
import os
import joblib
import matplotlib.pyplot as plt
import xgboost as xgb
import scipy
%matplotlib inline

In [2]:
### load models ###
# minimal impact: use xgboost_multioutput_400, F1 score 0.81233, threshold 0.23225
# limited impact: use xgboost_multioutput_400, F1 score 0.79934, threshold 0.15819
# substantial impact: use xgboost_chain_400, F1 score 0.76786, threshold 0.28913
# direct strike: use xgboost_chain_400, F1 score 0.77698, threshold 0.23056

model_xgboost_multioutput_400 = joblib.load("./models/baseline_model_xgboost_multioutput_400_2021-12-14 20:26.skl")
model_xgboost_chain_400 = joblib.load("./models/baseline_model_xgboost_chain_400_2021-12-14 20:29.skl")

print(type(model_xgboost_multioutput_400))
print(type(model_xgboost_chain_400))

<class 'sklearn.multioutput.MultiOutputClassifier'>
<class 'sklearn.multioutput.ClassifierChain'>


In [3]:
### Prepare test data ###
# times are in UTC
mm = 12
dd = 20
hh = 6
# current pos
LAT00 = 18.3
LON00 = 111.8
WIND00 = 45
# six hours ago
LAT06 = 17.3
LON06 = 110.9
WIND06 = 70
LAT12 = 16.0
LON12 = 110.6
WIND12 = 90
LAT18 = 14.9
LON18 = 110.8
WIND18 = 105
LAT24 = 14.0
LON24 = 110.7
WIND24 = 120

data = pd.DataFrame(data=[
    (mm, dd, hh, LAT00, LON00, WIND00, LAT06, LON06, WIND06, LAT12, LON12, WIND12, LAT18, LON18, WIND18, LAT24, LON24, WIND24)
], columns=[
    "MM", "DD", "HH", "00LAT", "00LON", "00WIND","06LAT", "06LON", "06WIND","12LAT", "12LON", "12WIND","18LAT", "18LON", "18WIND","24LAT", "24LON", "24WIND"
])
print(data)

   MM  DD  HH  00LAT  00LON  00WIND  06LAT  06LON  06WIND  12LAT  12LON  \
0  12  20   6   18.3  111.8      45   17.3  110.9      70   16.0  110.6   

   12WIND  18LAT  18LON  18WIND  24LAT  24LON  24WIND  
0      90   14.9  110.8     105   14.0  110.7     120  


In [4]:
### make predictions
preds_xgboost_multioutput_400 = model_xgboost_multioutput_400.predict_proba(data)
preds_model_xgboost_chain_400 = model_xgboost_chain_400.predict_proba(data)

# ugly code to handle the formats of the preds
raw_low_impact = preds_xgboost_multioutput_400[0][0][1]
raw_mid_impact = preds_xgboost_multioutput_400[1][0][1]
raw_big_impact = preds_model_xgboost_chain_400[0][2]
raw_direct_strike = preds_model_xgboost_chain_400[0][3]

In [5]:
print("Raw probabilities:")
print("Minimal impact: {0:.5%}".format(raw_low_impact))
print("Limited impact: {0:.5%}".format(raw_mid_impact))
print("Substantial impact: {0:.5%}".format(raw_big_impact))
print("Direct strike: {0:.5%}".format(raw_direct_strike))

Raw probabilities:
Minimal impact: 0.01674%
Limited impact: 0.34018%
Substantial impact: 0.00056%
Direct strike: 0.00051%


In [6]:
### transform preds according to thresholds ###
# probably useless

# linear interpolation
def linear_interpolation(threshold, x):
    return 0.5 + (x - threshold) * (1 - 0.5) / (1 - threshold)
    if x >= threshold:
        reference_point = (1,1)
        return 0.5 + (x - threshold) * (1 - 0.5) / (1 - threshold)
    else:
        reference_point = (0,0)
        return 0 + (threshold - x) * (0.5 - 0) / (threshold - 0)

low_impact = linear_interpolation(0.23225, raw_low_impact)
mid_impact = linear_interpolation(0.15819, raw_mid_impact)
big_impact = linear_interpolation(0.28913, raw_big_impact)
direct_strike = linear_interpolation(0.23056, raw_direct_strike)
    
print("Linearly adjusted probabilities:")
print("Minimal impact: {0:.5%}".format(low_impact))
print("Limited impact: {0:.5%}".format(mid_impact))
print("Substantial impact: {0:.5%}".format(big_impact))
print("Direct strike: {0:.5%}".format(direct_strike))

Linearly adjusted probabilities:
Minimal impact: 34.87469%
Limited impact: 40.60430%
Substantial impact: 29.66394%
Direct strike: 35.01773%


In [8]:
preds_xgboost_multioutput_400 = model_xgboost_multioutput_400.predict(data)
preds_model_xgboost_chain_400 = model_xgboost_chain_400.predict(data)

# ugly code to handle the formats of the preds
deterministic_low_impact = preds_xgboost_multioutput_400[0][0]
deterministic_mid_impact = preds_xgboost_multioutput_400[0][1]
deterministic_big_impact = int(preds_model_xgboost_chain_400[0][2])
deterministic_direct_strike = int(preds_model_xgboost_chain_400[0][3])

In [9]:
print("Deterministic forecast (0 = no, 1= yes):")
print("Minimal impact: {0}".format(deterministic_low_impact))
print("Limited impact: {0}".format(deterministic_mid_impact))
print("Substantial impact: {0}".format(deterministic_big_impact))
print("Direct strike: {0}".format(deterministic_direct_strike))

Deterministic forecast (0 = no, 1= yes):
Minimal impact: 0
Limited impact: 0
Substantial impact: 0
Direct strike: 0


In [6]:
from scipy import interpolate

def f(x, threshold):
    x_points = [0, threshold, 1]
    y_points = [0, 0.5, 1]
    
    # tck = interpolate.splrep(x_points, y_points, k=2)
    # return interpolate.splev(x, tck)
    
    cs = interpolate.CubicSpline(x_points, y_points)
    return cs(x)

low_impact = f(raw_low_impact, 0.23225)
mid_impact = f(raw_mid_impact, 0.15819)
big_impact = f(raw_big_impact, 0.28913)
direct_strike = f(raw_direct_strike, 0.23056)
    
print("Spline adjusted probabilities:")
print("Minimal impact: {0:.5%}".format(low_impact))
print("Limited impact: {0:.5%}".format(mid_impact))
print("Substantial impact: {0:.5%}".format(big_impact))
print("Direct strike: {0:.5%}".format(direct_strike))

Spline adjusted probabilities:
Minimal impact: 0.04188%
Limited impact: 1.21039%
Substantial impact: 0.00113%
Direct strike: 0.00130%
