In [7]:
import numpy as np
from scipy.optimize import minimize

class TemperatureScaling:
    def __init__(self):
        self.temperature = None

    def fit(self, logits, y_true):
        """
        Fit the temperature scaling parameter using the training data.

        Args:
            logits (np.array): Logits (raw output from the model).
            y_true (np.array): True labels.
        """
        def loss_fn(T):
            scaled_logits = logits / T
            probs = np.exp(scaled_logits) / np.sum(np.exp(scaled_logits), axis=1, keepdims=True)
            log_likelihood = -np.log(probs[np.arange(len(y_true)), y_true])
            return np.mean(log_likelihood)

        # Optimize temperature
        result = minimize(loss_fn, x0=np.ones(1), bounds=[(0.1, 10)])
        self.temperature = result.x[0]

    def transform(self, logits):
        """
        Apply the temperature scaling to logits.

        Args:
            logits (np.array): Logits (raw output from the model).

        Returns:
            np.array: Scaled probabilities.
        """
        scaled_logits = logits / self.temperature
        probs = np.exp(scaled_logits) / np.sum(np.exp(scaled_logits), axis=1, keepdims=True)
        return probs



In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("Knowledge_base_train.csv")
df

Unnamed: 0,class,num_clients,Sum of Instances in Clients,Max. Of Instances in Clients,Min. Of Instances in Clients,Stddev of Instances in Clients,Average Dataset Missing Values %,Min Dataset Missing Values %,Max Dataset Missing Values %,Stddev Dataset Missing Values %,...,Min No. Of Insignificant Lags in Target,Stddev No. Of Insignificant Lags in Target,Avg. No. Of Seasonality Components in Target,Max No. Of Seasonality Components in Target,Min No. Of Seasonality Components in Target,Stddev No. Of Seasonality Components in Target,Average Fractal Dimensionality Across Clients of Target,Maximum Period of Seasonality Components in Target Across Clients,Minimum Period of Seasonality Components in Target Across Clients,Entropy of Target Stationarity
0,XGBRegressor,10,13821,1383,1382,0.300000,4.992466,4.121475,5.571635,0.448970,...,0,0,2,2,2,0,0.009829,13,2,0.325083
1,XGBRegressor,5,4031,807,806,0.400000,4.762946,3.970223,6.203474,0.875041,...,0,0,0,0,0,0,0.033774,0,0,0.673012
2,HUBERREGRESSOR,5,17280,3456,3456,0.000000,5.104167,4.745370,5.295139,0.188945,...,2,0,0,0,0,0,0.017802,0,0,0.673012
3,LinearSVR,5,6911,1383,1382,0.400000,4.934113,4.052098,5.571635,0.524086,...,1,0,0,0,0,0,0.390252,0,0,0.000000
4,XGBRegressor,15,24181,1613,1612,0.249444,4.966717,3.535980,6.389578,0.693981,...,0,0,1,1,1,0,0.014879,4,4,0.392674
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,ELASTICNETCV,10,27641,2765,2764,0.300000,4.967232,4.486252,5.714286,0.346594,...,0,0,0,0,0,0,0.012131,0,0,0.325083
396,XGBRegressor,10,27641,2765,2764,0.300000,5.003456,4.413893,5.788712,0.448594,...,0,0,0,0,0,0,0.030401,0,0,0.673012
397,XGBRegressor,5,6336,1268,1267,0.400000,5.145155,4.419890,5.524862,0.409185,...,1,0,0,0,0,0,0.104852,0,0,0.000000
398,LASSO,10,12671,1268,1267,0.300000,4.861573,3.864353,6.156275,0.549873,...,0,0,0,0,0,0,0.012215,0,0,0.500402


In [5]:
df = df.drop('class', axis=1)
df

Unnamed: 0,num_clients,Sum of Instances in Clients,Max. Of Instances in Clients,Min. Of Instances in Clients,Stddev of Instances in Clients,Average Dataset Missing Values %,Min Dataset Missing Values %,Max Dataset Missing Values %,Stddev Dataset Missing Values %,Average Target Missing Values %,...,Min No. Of Insignificant Lags in Target,Stddev No. Of Insignificant Lags in Target,Avg. No. Of Seasonality Components in Target,Max No. Of Seasonality Components in Target,Min No. Of Seasonality Components in Target,Stddev No. Of Seasonality Components in Target,Average Fractal Dimensionality Across Clients of Target,Maximum Period of Seasonality Components in Target Across Clients,Minimum Period of Seasonality Components in Target Across Clients,Entropy of Target Stationarity
0,10,13821,1383,1382,0.300000,4.992466,4.121475,5.571635,0.448970,4.992466,...,0,0,2,2,2,0,0.009829,13,2,0.325083
1,5,4031,807,806,0.400000,4.762946,3.970223,6.203474,0.875041,4.762946,...,0,0,0,0,0,0,0.033774,0,0,0.673012
2,5,17280,3456,3456,0.000000,5.104167,4.745370,5.295139,0.188945,5.104167,...,2,0,0,0,0,0,0.017802,0,0,0.673012
3,5,6911,1383,1382,0.400000,4.934113,4.052098,5.571635,0.524086,4.934113,...,1,0,0,0,0,0,0.390252,0,0,0.000000
4,15,24181,1613,1612,0.249444,4.966717,3.535980,6.389578,0.693981,4.966717,...,0,0,1,1,1,0,0.014879,4,4,0.392674
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,10,27641,2765,2764,0.300000,4.967232,4.486252,5.714286,0.346594,4.967232,...,0,0,0,0,0,0,0.012131,0,0,0.325083
396,10,27641,2765,2764,0.300000,5.003456,4.413893,5.788712,0.448594,5.003456,...,0,0,0,0,0,0,0.030401,0,0,0.673012
397,5,6336,1268,1267,0.400000,5.145155,4.419890,5.524862,0.409185,5.145155,...,1,0,0,0,0,0,0.104852,0,0,0.000000
398,10,12671,1268,1267,0.300000,4.861573,3.864353,6.156275,0.549873,4.861573,...,0,0,0,0,0,0,0.012215,0,0,0.500402


In [21]:
features_to_drop = [
    'Stddev No. Of Symbols per Categorical Features', 
    'Stddev No. Of Significant Lags in Target', 
    'Stddev No. Of Insignificant Lags in Target', 
    'Stddev No. Of Seasonality Components in Target'
]
df = df.drop(columns=features_to_drop, errors='ignore')

In [43]:
first_row_json = df.iloc[0].to_dict()
print(json.dumps(first_row_json, indent=4))

{
    "num_clients": 10.0,
    "Sum of Instances in Clients": 13821.0,
    "Max. Of Instances in Clients": 1383.0,
    "Min. Of Instances in Clients": 1382.0,
    "Stddev of Instances in Clients": 0.3,
    "Average Dataset Missing Values %": 4.992465884583631,
    "Min Dataset Missing Values %": 4.121475054229935,
    "Max Dataset Missing Values %": 5.571635311143271,
    "Stddev Dataset Missing Values %": 0.4489697353421885,
    "Average Target Missing Values %": 4.992465884583631,
    "Min Target Missing Values %": 4.121475054229935,
    "Max Target Missing Values %": 5.571635311143271,
    "Stddev Target Missing Values %": 0.4489697353421885,
    "No. Of Features": 3.0,
    "No. Of Numerical Features": 3.0,
    "No. Of Categorical Features": 0.0,
    "Sampling Rate": 0.1666666666666666,
    "Average Skewness of Numerical Features": 0.072566663,
    "Minimum Skewness of Numerical Features": 1.1289151943589609e-05,
    "Maximum Skewness of Numerical Features": 1.305305017292974,
    "

In [15]:
import numpy as np
from scipy.optimize import minimize
class TemperatureScaling:
    def __init__(self):
        self.temperature = None

    def fit(self, logits, y_true):
        """
        Fit the temperature scaling parameter using the training data.

        Args:
            logits (np.array): Logits (raw output from the model).
            y_true (np.array): True labels.
        """
        def loss_fn(T):
            scaled_logits = logits / T
            probs = np.exp(scaled_logits) / np.sum(np.exp(scaled_logits), axis=1, keepdims=True)
            log_likelihood = -np.log(probs[np.arange(len(y_true)), y_true])
            return np.mean(log_likelihood)

        # Optimize temperature
        result = minimize(loss_fn, x0=np.ones(1), bounds=[(0.1, 10)])
        self.temperature = result.x[0]

    def transform(self, logits):
        """
        Apply the temperature scaling to logits.

        Args:
            logits (np.array): Logits (raw output from the model).

        Returns:
            np.array: Scaled probabilities.
        """
        scaled_logits = logits / self.temperature
        probs = np.exp(scaled_logits) / np.sum(np.exp(scaled_logits), axis=1, keepdims=True)
        return probs

In [23]:
import joblib
import numpy as np

# Load the saved model and temperature scaler
loaded_model_and_scaler = joblib.load('final_model_with_temp_scaling.pkl')

# Extract the RandomForest model and temperature scaler from the loaded dictionary
loaded_model = loaded_model_and_scaler['model']
loaded_temperature_scaler = loaded_model_and_scaler['temperature_scaler']

# Use the loaded model and temperature scaler to make predictions
y_proba_loaded = loaded_model.predict_proba(df)
logits_loaded = np.log(y_proba_loaded + 1e-8)  # For numerical stability

# Apply temperature scaling to get the calibrated probabilities
y_pred_proba_scaled_loaded = loaded_temperature_scaler.transform(logits_loaded)

# Calculate Confidence (or any other metrics)
confidence_loaded = np.max(y_pred_proba_scaled_loaded, axis=1)
print(f"Temperature Scaling - Confidence (Avg) (Loaded Model): {np.mean(confidence_loaded):.4f}")

Temperature Scaling - Confidence (Avg) (Loaded Model): 0.7290
