Load the dataset

In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier

def load_data(file_path):
    df = pd.read_csv(file_path)
    bands = [col for col in df.columns if any(col.startswith(f"{month}_B") for month in 
            ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"])]

    X = df[bands].values
    
    label_mapping = {
        'A10': 1, 'A11': 1, 'A12': 1, 'A13': 1, 'A20': 1, 'A21': 1, 'A30': 1,
        'A22': 2, 'F10': 2, 'F20': 2, 'F30': 2, 'F40': 2,
        'E10': 3, 'E20': 3, 'E30': 3, 'B50': 3, 'B51': 3, 'B52': 3, 'B53': 3,
        'B54': 3, 'B55': 3,
        'B10': 4, 'B11': 4, 'B12': 4, 'B13': 4, 'B14': 4, 'B15': 4, 'B16': 4,
        'B17': 4, 'B18': 4, 'B19': 4, 'B20': 4, 'B21': 4, 'B22': 4, 'B23': 4,
        'B30': 4, 'B31': 4, 'B32': 4, 'B33': 4, 'B34': 4, 'B35': 4, 'B36': 4,
        'B37': 4, 'B40': 4, 'B41': 4, 'B42': 4, 'B43': 4, 'B44': 4, 'B45': 4,
        'B70': 4, 'B71': 4, 'B72': 4, 'B73': 4, 'B74': 4, 'B75': 4, 'B76': 4,
        'B77': 4, 'B80': 4, 'B81': 4, 'B82': 4, 'B83': 4, 'B84': 4, 'BX1': 4,
        'BX2': 4, 'C10': 5, 'C20': 6, 'C21': 6, 'C22': 6, 'C23': 6, 'C30': 6,
        'C31': 6, 'C32': 6, 'C33': 6, 'CXX1': 6, 'CXX2': 6, 'CXX3': 6,
        'CXX4': 6, 'CXX5': 6, 'CXX6': 6, 'CXX7': 6, 'CXX8': 6, 'CXX9': 6,
        'CXXA': 6, 'CXXB': 6, 'CXXC': 6, 'CXXD': 6, 'CXXE': 6, 'D10': 7,
        'D20': 7, 'G10': 8, 'G11': 8, 'G12': 8, 'G20': 8, 'G21': 8, 'G22': 8,
        'G30': 8, 'G40': 8, 'G50': 8, 'H10': 9, 'H11': 9, 'H12': 9, 'H20': 9,
        'H21': 9, 'H22': 9, 'H23': 9
    }
    y = df['Lc1'].map(label_mapping).values
    return X, y

# Load train and test data
X_train, y_train = load_data('uk_monthly_train_10nn_norm.csv')
X_test, y_test = load_data('uk_monthly_test_10nn_norm.csv')

### Random Forest

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=3, scoring="accuracy", n_jobs=-1)
grid_search.fit(X_train, y_train)

best_rf = grid_search.best_estimator_
print("Best parameters:", grid_search.best_params_)

### SVM

In [None]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV

# Load Data (Assuming X_train, X_test, y_train, y_test are available)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define hyperparameter grid
param_grid = {
    'C': [0.1, 1, 10, 100],  # Regularization parameter
    'gamma': ['scale', 'auto', 0.01, 0.1, 1],  # Kernel coefficient
    'kernel': ['rbf', 'poly'],  # Try different kernels
    'class_weight': ['balanced', None]  # Address class imbalance
}

# Initialize SVM and GridSearch
svm = SVC()
grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='accuracy', n_jobs=-1, verbose=2)
grid_search.fit(X_train_scaled, y_train)

# Best parameters
print(f"Best Parameters: {grid_search.best_params_}")

# Predict using the best model
best_svm = grid_search.best_estimator_
y_pred_svm = best_svm.predict(X_test_scaled)


# Evaluation
accuracy = accuracy_score(y_test, y_pred_svm)
print(f"SVM Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(classification_report(y_test, y_pred_svm))

### XGB

In [None]:
import xgboost as xgb
from sklearn.model_selection import GridSearchCV

# Remap label 9 to 0 (xgbclassifier requires labels to start from 0, not 1)
y_train = np.where(y_train == 9, 0, y_train)
y_test = np.where(y_test == 9, 0, y_test)

# Define XGBoost classifier
xgb_model = xgb.XGBClassifier(objective="multi:softmax", num_class=9, eval_metric="mlogloss", use_label_encoder=False)

# Define the parameter grid
param_grid = {
    "n_estimators": [50, 100, 200], 
    "max_depth": [3, 5, 7], 
    "learning_rate": [0.01, 0.1, 0.2],  
    "subsample": [0.8, 1.0],  
    "colsample_bytree": [0.8, 1.0]
}

# Set up GridSearchCV
grid_search_xgb = GridSearchCV(estimator=xgb_model, param_grid=param_grid, 
                               scoring="accuracy", cv=3, verbose=2, n_jobs=-1)

# Run Grid Search
grid_search_xgb.fit(X_train, y_train)

# Get the best parameters
print("Best Parameters:", grid_search_xgb.best_params_)

# Train best model
best_xgb = grid_search_xgb.best_estimator_

# Make predictions
y_pred_xgb = best_xgb.predict(X_test)

# Convert predicted label 0 back to 9 for consistency
y_pred_xgb = np.where(y_pred_xgb == 0, 9, y_pred_xgb)
# Ensure y_test is also converted back for proper evaluation
y_test = np.where(y_test == 0, 9, y_test)

### Evaluation with Validation Data

In [None]:
# export saved best models
import joblib
# RF
joblib.dump(best_rf, "best_rf_10nn.pkl")
# SVM
joblib.dump(best_svm, "best_svm_10nn.pkl")
scaler = joblib.dump(scaler, "svm_scaler.pkl") # save scaler for future use
# XGB
best_xgb.save_model("best_xgb_10nn.json")

In [None]:
# import saved best models
import joblib
import xgboost as xgb

# Load models
best_rf_10nn = joblib.load("best_rf_10nn.pkl")

#scaler = joblib.load("svm_scaler.pkl") # Load the saved scaler for SVM
best_svm_10nn = joblib.load("best_svm_10nn.pkl")  # Load the saved model

best_xgb_10nn = xgb.Booster()
best_xgb_10nn.load_model("best_xgb_10nn.json")

In [None]:
import pandas as pd

def load_data(file_path):
    df = pd.read_csv(file_path)
    bands = [col for col in df.columns if any(col.startswith(f"{month}_B") for month in 
            ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"])]

    X = df[bands].values
    
    label_mapping = {
        'A10': 1, 'A11': 1, 'A12': 1, 'A13': 1, 'A20': 1, 'A21': 1, 'A30': 1,
        'A22': 2, 'F10': 2, 'F20': 2, 'F30': 2, 'F40': 2,
        'E10': 3, 'E20': 3, 'E30': 3, 'B50': 3, 'B51': 3, 'B52': 3, 'B53': 3,
        'B54': 3, 'B55': 3,
        'B10': 4, 'B11': 4, 'B12': 4, 'B13': 4, 'B14': 4, 'B15': 4, 'B16': 4,
        'B17': 4, 'B18': 4, 'B19': 4, 'B20': 4, 'B21': 4, 'B22': 4, 'B23': 4,
        'B30': 4, 'B31': 4, 'B32': 4, 'B33': 4, 'B34': 4, 'B35': 4, 'B36': 4,
        'B37': 4, 'B40': 4, 'B41': 4, 'B42': 4, 'B43': 4, 'B44': 4, 'B45': 4,
        'B70': 4, 'B71': 4, 'B72': 4, 'B73': 4, 'B74': 4, 'B75': 4, 'B76': 4,
        'B77': 4, 'B80': 4, 'B81': 4, 'B82': 4, 'B83': 4, 'B84': 4, 'BX1': 4,
        'BX2': 4, 'C10': 5, 'C20': 6, 'C21': 6, 'C22': 6, 'C23': 6, 'C30': 6,
        'C31': 6, 'C32': 6, 'C33': 6, 'CXX1': 6, 'CXX2': 6, 'CXX3': 6,
        'CXX4': 6, 'CXX5': 6, 'CXX6': 6, 'CXX7': 6, 'CXX8': 6, 'CXX9': 6,
        'CXXA': 6, 'CXXB': 6, 'CXXC': 6, 'CXXD': 6, 'CXXE': 6, 'D10': 7,
        'D20': 7, 'G10': 8, 'G11': 8, 'G12': 8, 'G20': 8, 'G21': 8, 'G22': 8,
        'G30': 8, 'G40': 8, 'G50': 8, 'H10': 9, 'H11': 9, 'H12': 9, 'H20': 9,
        'H21': 9, 'H22': 9, 'H23': 9
    }
    y = df['Lc1'].map(label_mapping).values
    return X, y

# Load train and test data
X_val, y_val = load_data('uk_monthly_val_10nn_norm.csv')

In [None]:
import numpy as np
from sklearn.preprocessing import StandardScaler

# Random Forest predictions
rf_preds = best_rf.predict(X_val)

# SVM predictions
X_val_scaled = scaler.transform(X_val)  # Scale validation data
svm_preds = best_svm.predict(X_val_scaled)

# XGBoost predictions
y_val_xgb = np.where(y_val == 9, 0, y_val) # Remap label 9 to 0
xgb_preds = best_xgb.predict(X_val)

# Convert predicted label 0 back to 9 for consistency
xgb_preds = np.where(xgb_preds == 0, 9, xgb_preds)
# Ensure y_val is also converted back for proper evaluation
y_val_xgb = np.where(y_val_xgb == 0, 9, y_val_xgb)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

def evaluate_model(y_true, y_pred, model_name):
    print(f"\n🔍 {model_name} Evaluation:\n")
    print(classification_report(y_true, y_pred, digits=3))
    print("Confusion Matrix:")
    print(confusion_matrix(y_true, y_pred))

# Evaluate all models
evaluate_model(y_val, rf_preds, "Random Forest")
evaluate_model(y_val, svm_preds, "SVM")
evaluate_model(y_val_xgb, xgb_preds, "XGBoost")