In [None]:
import os
import numpy as np
import pandas as pd
import scipy
from scipy.io import loadmat
import matplotlib.pyplot as plt
import seaborn as sns
import shutil

path = "../input/rul-prediction-for-liion-batteries-prediction/Cleaned"

In [None]:
exps_dict = {}
exps = os.listdir(path)
for exp in os.listdir(path) :
    exps_dict[exp] = []
    for m in os.listdir(f"{path}/{exp}") :
        exps_dict[exp].append(m)

pd.DataFrame(exps_dict)

In [None]:
from sklearn.metrics import max_error,mean_absolute_error,mean_squared_error,mean_squared_log_error
from sklearn.metrics import median_absolute_error,mean_absolute_percentage_error,r2_score

def get_scores(y_true,y_pred) :
    return {
        'max_error':max_error(y_true,y_pred),
        'mean_absolute_error':mean_absolute_error(y_true,y_pred),
        'mean_squared_error':mean_squared_error(y_true,y_pred),
        'mean_squared_log_error':mean_squared_log_error(y_true,y_pred),
        'median_absolute_error':median_absolute_error(y_true,y_pred),
        'mean_absolute_percentage_error':mean_absolute_percentage_error(y_true,y_pred),
        'r2_score':r2_score(y_true,y_pred)
    }

def get_preds(model,data_x) :
    return model.predict(data_x).clip(min=0)

In [None]:
ensemble_dict = {}
for exp in exps:
    df = pd.DataFrame()
    for m in exps_dict[exp] :
#         print(f"{path}/{exp}/{m}")
        model_name = m.split('_')[0]
        temp = pd.read_csv(f"{path}/{exp}/{m}").rename(columns={"Capacity":f"{model_name}_Cap","model_predict":f"{model_name}_pred"})
        curr_cols = temp.columns.tolist()
        temp = temp.rename(columns={x:x.lower() for x in curr_cols})
#         print(temp.columns.tolist())
        df = pd.concat([df,temp],axis=1)
    ensemble_dict[exp] = df
# ensemble_dict

In [None]:
temp=None
ensemble_df = {}
for exp in exps :
    print(exp)
    print(ensemble_dict[exp].columns.tolist())
    temp=ensemble_dict[exp].drop(columns=['gru_cap','bigru_cap','bilstm_cap']).rename(columns={'lstm_cap':'cap'})
    df_x = temp.drop(columns=['cap'])
    df_y = temp['cap']
    ensemble_df[exp] = [df_x,df_y]
# ensemble_df

In [None]:
from sklearn.linear_model import LinearRegression,HuberRegressor
from sklearn.naive_bayes import BernoulliNB,CategoricalNB,ComplementNB,GaussianNB,MultinomialNB
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import LinearSVR,NuSVR,SVR
from sklearn.tree import DecisionTreeRegressor,ExtraTreeRegressor
from sklearn.ensemble import RandomForestRegressor,ExtraTreesRegressor,VotingRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor

In [None]:
from sklearn.model_selection import train_test_split

for exp in exps :
    print(exp)
    df_x,df_y = ensemble_df[exp]
    train_x,test_x,train_y,test_y = train_test_split(df_x,df_y,test_size=0.2,random_state=0)
    test_x,val_x,test_y,val_y = train_test_split(test_x,test_y,test_size=0.5,random_state=0)

    print(train_x.shape,test_x.shape,train_y.shape,test_y.shape)
    
    algos = (LinearRegression,HuberRegressor,KNeighborsRegressor,LinearSVR,NuSVR,
             SVR, DecisionTreeRegressor,ExtraTreeRegressor,RandomForestRegressor,ExtraTreesRegressor,
    XGBRegressor,LGBMRegressor,CatBoostRegressor)

    params = {
        'silent':True
    }

    for algo in algos :
        model = algo()
        if type(model).__name__ == 'CatBoostRegressor' :
            model = algo(**params)
        print(type(model).__name__)
        model.fit(train_x,train_y)

        model_results_train = pd.Series(get_scores(train_y,get_preds(model,train_x)))
        model_results_val = pd.Series(get_scores(val_y,get_preds(model,val_x)))
        model_results_test = pd.Series(get_scores(test_y,get_preds(model,test_x)))
        data = {"Train": model_results_train,
                "Val": model_results_val,
                "Test": model_results_test}
        model_results = pd.DataFrame(data)
        print(model_results)
        print("~"*100)
        print("\n")

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split

from sklearn.metrics import max_error,mean_absolute_error,mean_squared_error,mean_squared_log_error
from sklearn.metrics import median_absolute_error,mean_absolute_percentage_error,r2_score

def get_scores(y_true,y_pred) :
    return np.sqrt(mean_squared_error(y_true,y_pred))

model_results = pd.DataFrame()
for exp in exps :
    print(exp)
    df_x,df_y = ensemble_df[exp]
    train_x,test_x,train_y,test_y = train_test_split(df_x,df_y,test_size=0.2,random_state=0)
    test_x,val_x,test_y,val_y = train_test_split(test_x,test_y,test_size=0.5,random_state=0)

    print(train_x.shape,test_x.shape,train_y.shape,test_y.shape)
    
    algos = (LinearRegression,HuberRegressor,KNeighborsRegressor,LinearSVR,NuSVR,
             SVR, DecisionTreeRegressor,ExtraTreeRegressor,RandomForestRegressor,ExtraTreesRegressor,
    XGBRegressor,LGBMRegressor,CatBoostRegressor)

    params = {
        'silent':True
    }

    for algo in algos :
        model = algo()
        if type(model).__name__ == 'CatBoostRegressor' :
            model = algo(**params)
#         print(type(model).__name__)
        model.fit(train_x,train_y)

        model_results_train = get_scores(train_y,get_preds(model,train_x))
        model_results_val = get_scores(val_y,get_preds(model,val_x))
        model_results_test = get_scores(test_y,get_preds(model,test_x))
        data = {"Train": model_results_train,
                "Val": model_results_val,
                "Test": model_results_test}
        temp = pd.DataFrame(data,index=[f'{exp}_{type(model).__name__}'])
        model_results = model_results.append(temp)
#         print(temp)
#         print(model_results)
#         print("~"*100)
#         print("\n")
#         break
#     break
model_results

In [None]:
model_results.to_csv('ensemble_results.csv')

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split

from sklearn.metrics import max_error,mean_absolute_error,mean_squared_error,mean_squared_log_error
from sklearn.metrics import median_absolute_error,mean_absolute_percentage_error,r2_score

def get_scores(y_true,y_pred) :
    return np.sqrt(mean_squared_error(y_true,y_pred))

model_results = pd.DataFrame()
for exp in exps :
    print(exp)
    df_x,df_y = ensemble_df[exp]
    train_x,test_x,train_y,test_y = train_test_split(df_x,df_y,test_size=0.2,random_state=0)
    test_x,val_x,test_y,val_y = train_test_split(test_x,test_y,test_size=0.5,random_state=0)

    print(train_x.shape,test_x.shape,train_y.shape,test_y.shape)
    
    algos = (LinearRegression,HuberRegressor,KNeighborsRegressor,LinearSVR,NuSVR,
             SVR, DecisionTreeRegressor,ExtraTreeRegressor,RandomForestRegressor,ExtraTreesRegressor,
    XGBRegressor,LGBMRegressor,CatBoostRegressor)

    params = {
        'silent':True
    }

    for algo in algos :
        model = algo()
        if type(model).__name__ == 'CatBoostRegressor' :
            model = algo(**params)
        model.fit(train_x,train_y)

        model_results_train = get_scores(train_y,get_preds(model,train_x))
        model_results_val = get_scores(val_y,get_preds(model,val_x))
        model_results_test = get_scores(test_y,get_preds(model,test_x))
        data = {"Train": model_results_train,
                "Val": model_results_val,
                "Test": model_results_test}
        temp = pd.DataFrame(data,index=[f'{exp}_{type(model).__name__}'])
        model_results = model_results.append(temp)
model_results