In [10]:
%matplotlib inline

import pandas as pd
import matplotlib.pyplot as plt

from sklearn.metrics import log_loss, classification_report

In [9]:
from problem import get_train_data, get_test_data

data_train, labels_train = get_train_data()
data_test, labels_test = get_test_data()

# Model

In [49]:
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import numpy as np


def compute_rolling_std(X_df, features, time_window, center=False):
    for feature in features:
        name = "_".join([feature, time_window, "std"])
        X_df[name] = X_df[feature].rolling(time_window, center=center).std()
        X_df[name] = X_df[name].ffill().bfill()
        X_df[name] = X_df[name].astype(X_df[feature].dtype)
    return X_df

def add_features(X_df):
    Alfven_Mach_number = X_df['V'] * 1e12 * np.sqrt(X_df['Np'] * 1.7e-27 * 1e6) * np.sqrt(4e-7*np.pi) / X_df['B']
    raw_pressure = X_df['V']**2 * X_df['Np'] * 1.7e-27 * 1e12 * 1e9
    X_df['AMach_number'] = Alfven_Mach_number
    X_df['raw_pressure'] = raw_pressure
    return X_df

class FeatureExtractor(BaseEstimator):
    def fit(self, X, y):
        return self

    def transform(self, X):
        X = add_features(X)
        return compute_rolling_std(X, ["Beta", "Vth", "B", "Bx", "Bz"], "2h")
    
class PostProcessing(BaseEstimator):
    def fit(self, X, y):
        return self
    
    def transform(self, proba):
        y_pred = pd.Series(proba[:, 1])
        y_pred_smoothed = y_pred.rolling(12, min_periods=0, center=True).quantile(0.90)
        return np.swapaxes(np.array([1 - y_pred_smoothed, y_pred_smoothed]), 1, 0)

class MyClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self):
        self.clf = LogisticRegression(max_iter=1000)
        
    def fit(self, X, y):
        return self.clf.fit(X, y)
    
    def predict(self, X):
        proba = self.clf.predict_proba(X)
        y_pred = pd.Series(proba[:, 1])
        y_pred_smoothed = y_pred.rolling(12, min_periods=0, center=True).quantile(0.90)
        proba_smoothed =  np.swapaxes(np.array([1 - y_pred_smoothed, y_pred_smoothed]), 1, 0)
        return np.argmax(proba_smoothed, axis=0)
    
    def predict_proba(self, X):
        return self.clf.predict_proba(X)

def get_estimator():

    feature_extractor = FeatureExtractor()
    classifier = MyClassifier()
    pipe = make_pipeline(feature_extractor, StandardScaler(), classifier)
    
    return pipe

In [53]:
class MyClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self):
        self.clf = LogisticRegression(max_iter=1000)
        
    def fit(self, X, y):
        return self.clf.fit(X, y)
    
    def predict(self, X):
        proba = self.clf.predict_proba(X)
        y_pred = pd.Series(proba[:, 1])
        y_pred_smoothed = y_pred.rolling(12, min_periods=0, center=True).quantile(0.90)
        proba_smoothed =  np.swapaxes(np.array([1 - y_pred_smoothed, y_pred_smoothed]), 1, 0)
        return np.argmax(proba_smoothed, axis=0)
    
    def predict_proba(self, X):
        proba = self.clf.predict_proba(X)
        y_pred = pd.Series(proba[:, 1])
        y_pred_smoothed = y_pred.rolling(12, min_periods=0, center=True).quantile(0.90)
        proba_smoothed =  np.swapaxes(np.array([1 - y_pred_smoothed, y_pred_smoothed]), 1, 0)
        return proba_smoothed

In [39]:
from sklearn.compose import TransformedTargetRegressor

def transform(proba):
    print(proba.shape)
    y_pred = pd.Series(proba[:, 1])
    y_pred_smoothed = y_pred.rolling(12, min_periods=0, center=True).quantile(0.90)
    proba_smoothed =  np.swapaxes(np.array([1 - y_pred_smoothed, y_pred_smoothed]), 1, 0)
    return np.argmax(proba_smoothed, axis=0)

tt = TransformedTargetRegressor(regressor=MyClassifier(),
                                func=None, inverse_func=transform)

In [54]:
model = get_estimator()

In [55]:
model.fit(data_train, labels_train)

In [56]:
y_pred = model.predict_proba(data_test)

# Evaluate

In [58]:
!ramp-test --submission v2

[38;5;178m[1mTesting Solar wind classification[0m
[38;5;178m[1mReading train and test files from ./data/ ...[0m
[38;5;178m[1mReading cv ...[0m
[38;5;178m[1mTraining submissions/v2 ...[0m
[38;5;178m[1mCV fold 0[0m
	[38;5;178m[1mscore  mixed  pw_ll  pw_prec  pw_rec  ev_prec  ev_rec      time[0m
	[38;5;10m[1mtrain[0m   [38;5;10m[1m[38;5;150m0.24[0m[0m   [38;5;150m0.16[0m     [38;5;150m0.78[0m    [38;5;150m0.43[0m     [38;5;10m[1m[38;5;150m0.24[0m[0m    [38;5;150m0.27[0m  [38;5;150m8.261065[0m
	[38;5;12m[1mvalid[0m   [38;5;12m[1m0.38[0m   [38;5;105m0.31[0m     [38;5;105m0.78[0m    [38;5;105m0.42[0m     [38;5;105m0.24[0m    [38;5;105m0.29[0m  [38;5;105m0.710674[0m
	[38;5;1m[1mtest[0m    [38;5;1m[1m0.23[0m   [38;5;218m0.15[0m     [38;5;218m0.91[0m    [38;5;218m0.22[0m     [38;5;218m0.17[0m    [38;5;218m0.09[0m  [38;5;218m0.326175[0m
[38;5;178m[1mCV fold 1[0m
	[38;5;178m[1mscore  mixed  pw_ll  pw_prec  pw_rec  e