In [None]:
!pip install xgboost
!pip install joblib
!pip install scikit-learn

In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split, GridSearchCV
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score
from scipy.stats import pearsonr
import joblib
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Datasets
train_df = pd.read_excel('/content/train_SOC.xlsx')
test_df = pd.read_excel('/content/test_SOC.xlsx')

X_train = train_df.drop(['ID', 'SOC(%)'], axis=1)
y_train = train_df['SOC(%)']
X_test = test_df.drop(['ID', 'SOC(%)'], axis=1)
y_test = test_df['SOC(%)']

In [None]:
# Feature Selection RFE
estimator = RandomForestRegressor(n_estimators=100)
selector = RFE(estimator, n_features_to_select=10, step=1)
selector = selector.fit(X_train, y_train)

selected_features = pd.Series(selector.ranking_, index=X_train.columns)

X_train_selected = selector.transform(X_train)
X_test_selected = selector.transform(X_test)

In [None]:
# Convert arrays back to DataFrames
X_train_selected_df = pd.DataFrame(X_train_selected, columns=selected_features[selected_features==1].index)
X_test_selected_df = pd.DataFrame(X_test_selected, columns=selected_features[selected_features==1].index)

# Save to Excel
X_train_selected_df.to_excel('/content/train_selected_SOC.xlsx', index=False)
X_test_selected_df.to_excel('/content/test_selected_SOC.xlsx', index=False)


In [None]:
# Random Forest
rf_parameters = {
    'n_estimators': [100, 500, 1000],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt', 'log2']
}

rf_grid_search = GridSearchCV(estimator=RandomForestRegressor(), param_grid=rf_parameters, cv=5, n_jobs=-1, verbose=2)
rf_grid_search.fit(X_train_selected, y_train)

print(rf_grid_search.best_params_)

In [None]:
# Support Vector Regression (SVR)
svr_parameters = {
    'C': [0.1, 1, 10, 100],
    'epsilon': [0.01, 0.1, 0.5, 1],
    'gamma': ['scale', 'auto', 0.01, 0.1, 1]
}

svr_grid_search = GridSearchCV(SVR(), param_grid=svr_parameters, cv=5, n_jobs=-1, verbose=2)
svr_grid_search.fit(X_train_selected, y_train)

print(svr_grid_search.best_params_)


In [None]:
# XGBoost
xgb_parameters = {
    'learning_rate': [0.01, 0.05, 0.1, 0.3],
    'max_depth': [4, 7, 10, 13],
    'gamma': [0, 0.1, 0.2],
    'colsample_bytree': [0.5, 0.7, 1],
    'min_child_weight': [1, 5, 10],
    'subsample': [0.5, 0.75, 1],
    'n_estimators': [50, 100, 150]
}

xgb_grid_search = GridSearchCV(XGBRegressor(), param_grid=xgb_parameters, cv=5, n_jobs=-1, verbose=2)
xgb_grid_search.fit(X_train_selected, y_train)

print(xgb_grid_search.best_params_)


In [None]:
def calculate_metrics(y_true, y_pred):
    r2 = r2_score(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    rpiq = np.std(y_true) / rmse  # Ratio of Performance to Inter-Quartile
    return r2, rmse, rpiq


In [None]:
# Function to evaluate and export predictions
def evaluate_and_export(model, X_train, y_train, X_test, y_test, model_name):
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    train_metrics = calculate_metrics(y_train, y_train_pred)
    test_metrics = calculate_metrics(y_test, y_test_pred)

    print(f"{model_name} Training Metrics: R2: {train_metrics[0]}, RMSE: {train_metrics[2]}, RPIQ: {train_metrics[3]}")
    print(f"{model_name} Testing Metrics: R2: {test_metrics[0]}, RMSE: {test_metrics[2]}, RPIQ: {test_metrics[3]}")


    predictions_df = pd.DataFrame({'Actual SOC': y_test, 'Predicted SOC': y_test_pred})
    predictions_df.to_excel(f'/content/{model_name}_predictions.xlsx', index=False)


evaluate_and_export(rf_grid_search.best_estimator_, X_train_selected, y_train, X_test_selected, y_test, "RF")
evaluate_and_export(svr_grid_search.best_estimator_, X_train_selected, y_train, X_test_selected, y_test, "SVR")
evaluate_and_export(xgb_grid_search.best_estimator_, X_train_selected, y_train, X_test_selected, y_test, "XGBoost")


In [None]:

joblib.dump(rf_grid_search.best_estimator_, '/content/best_rf_model.joblib')
joblib.dump(xgb_grid_search.best_estimator_, '/content/best_xgb_model.joblib')
joblib.dump(svr_grid_search.best_estimator_, '/content/best_svr_model.joblib')