In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [6]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import max_error
from sklearn.metrics import r2_score

In [7]:
address=r"C:\Users\Yashraj\OneDrive - IIT Hyderabad\IC_Analysis\0. Main\CC_gaussian _Cleaned_IC Curve (final)\features"

database=pd.read_pickle(address)

In [8]:
database.drop([17],axis=0,inplace=True)
database.reset_index(drop=True)

Unnamed: 0,max_ic,max_v,max2_ic,max2_v,slope_left,slope_right,Q_AtoB,Q_LtoR,energy,ic_capacity,capacity
0,10795.352357,3.916486,8821.589012,3.823246,305291.79285,-20301.33623,526.692962,363.14746,4777.697243,2122.309889,2288.498556
1,11958.094144,3.924406,8210.517968,3.822886,358293.506847,-160390.874307,576.332215,372.703834,4787.173414,2132.568833,2305.934889
2,11975.390891,3.940994,6866.361764,3.837949,295079.978676,-163744.098923,614.372039,410.806559,4570.4982,2098.734194,2308.251583
3,10381.444322,3.942594,6916.239016,3.8349,171724.534168,-106316.476652,614.037974,366.854791,4453.603029,2108.379611,2320.22425
4,7250.43876,3.951697,5979.646149,3.854949,94167.063378,-32233.75934,474.734869,269.636026,4148.660286,1983.6,2216.7
5,7625.19051,3.944069,5321.583003,3.857123,82910.226544,-33607.929088,452.690655,285.445281,4298.050171,2000.4,2248.3
6,8808.256554,3.969066,6079.539676,3.873729,187610.456769,-74882.971823,482.232815,298.839302,4523.9887,2005.2,2260.0
7,7739.316959,3.940969,5650.255646,3.848851,149160.477388,-26728.38922,449.01557,279.03889,4413.133329,2039.3,2262.0
8,8173.591623,3.950466,5699.904426,3.855931,139964.726757,-49862.100174,473.575006,294.340298,4502.713457,2047.0,2277.0
9,8206.92415,3.993,7054.1472,3.887823,161432.194508,-49945.650244,523.139844,288.358914,4551.355971,1975.2,2246.4


In [58]:
# copy the data
data = database.__deepcopy__()
#data.drop(['ic_capacity', 'capacity'],axis=1,inplace=True)

X=data.drop(['ic_capacity', 'capacity'],axis=1).values
y= data['capacity'].values.reshape(-1,1)

sc=StandardScaler()
Scaled_data=sc.fit_transform(X)

In [89]:
# Set the n_components=3
principal=PCA(n_components=2)
principal.fit(Scaled_data)
pca=principal.transform(Scaled_data)

In [90]:
# check how much variance is explained by each principal component
print(principal.explained_variance_ratio_)

[0.50407028 0.30080016]


In [91]:
X= pca
y= database['capacity'].values.reshape(-1,1)
print(X.shape)
print(y.shape)

(47, 2)
(47, 1)


In [92]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=33)

X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)
y_train = torch.FloatTensor(y_train)
y_test = torch.FloatTensor(y_test)

In [93]:
class TabularModel(nn.Module):

    def __init__(self, n_cont, out_sz, layers):
        super().__init__()
        
        self.bn_cont = nn.BatchNorm1d(n_cont)
        
        layerlist = []
        
        for i in layers:
            layerlist.append(nn.Linear(n_cont,i)) 
            layerlist.append(nn.ReLU(inplace=True))
            layerlist.append(nn.BatchNorm1d(i))
            n_cont = i
        layerlist.append(nn.Linear(layers[-1],out_sz))
            
        self.layers = nn.Sequential(*layerlist)
    
    def forward(self, x):
        
        x = self.bn_cont(x)
        x = self.layers(x)
        return x

In [94]:
torch.manual_seed(33)
model = TabularModel(X.shape[1], 1, [16,32,64,32,16])

In [95]:
criterion = nn.MSELoss()  # we'll convert this to RMSE later
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [96]:
epochs = 5001
losses = []

for i in range(epochs):
    
    y_pred = model.forward(X_train)
    loss = torch.sqrt(criterion(y_pred, y_train))
    losses.append(loss.item())

# A neat trick to save screen space:
    if i%500 == 0:
        print(f'epoch: {i:2}  loss: {loss.item():10.8f}')

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

epoch:  0  loss: 2267.46215820
epoch: 500  loss: 1245.51086426
epoch: 1000  loss: 0.99180502
epoch: 1500  loss: 0.36315149
epoch: 2000  loss: 0.36077693
epoch: 2500  loss: 0.42690378
epoch: 3000  loss: 0.47575286
epoch: 3500  loss: 0.50015920
epoch: 4000  loss: 0.41772476
epoch: 4500  loss: 0.41265622
epoch: 5000  loss: 0.30858955


In [97]:
# To Evaluate the Entire Test set
# no_grad ensure that gradient is not calculated and weights and bias are not updated

with torch.no_grad():
    y_val = model.forward(X_test)
    loss = torch.sqrt(criterion(y_val , y_test))
print(f'{loss:10.8f}')

45.39012527


In [98]:
print(f'{"PREDICTED":>12} {"ACTUAL":>8} {"DIFF":>8}')
for i in range(y_test.shape[0]):
    diff = np.abs(y_val[i].item()-y_test[i].item())
    print(f'{i+1:2}. {y_val[i].item():8.4f} {y_test[i].item():8.4f} {diff:8.4f}')

   PREDICTED   ACTUAL     DIFF
 1. 2279.1018 2248.3000  30.8018
 2. 2323.0999 2310.4836  12.6162
 3. 2199.0891 2259.1001  60.0110
 4. 2249.3494 2238.3000  11.0493
 5. 2172.4478 2243.7000  71.2522
 6. 2269.6565 2261.1001   8.5564
 7. 2279.2893 2216.7000  62.5894
 8. 2323.2358 2281.2085  42.0273
 9. 2321.0852 2288.4985  32.5867
10. 2254.0544 2191.8000  62.2544


In [99]:
# MAE
mean_absolute_error(y_test, y_val)

39.374462

In [100]:
#MSE
mean_squared_error(y_test, y_val)

2060.2634

In [101]:
#MAPE
mean_absolute_percentage_error(y_test, y_val)

0.017550306

In [102]:
# Max Error
max_error(y_test, y_val)

71.2522

In [103]:
r2_score(y_test, y_val)

-0.9001903358249592

# RANDOM FOREST MODEL

In [25]:
#Random forest model 
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import make_regression

model = RandomForestRegressor()
model.fit(X_train,np.ravel(y_train))
# Get the mean absolute error on the validation data

y_val = model.predict(X_test)

In [26]:
# MAE
mean_absolute_error(y_test, y_val)

21.56110205078121

In [27]:
#MSE
mean_squared_error(y_test, y_val)

733.3677593785792

In [28]:
#MAPE
mean_absolute_percentage_error(y_test, y_val)

0.00954432274116313

In [29]:
# Max Error
max_error(y_test, y_val)

55.87208007812478

In [30]:
r2_score(y_test, y_val)

0.3236116365201539

# XG Boost

In [31]:
import xgboost as xgb
XGBModel = xgb.XGBRegressor()
XGBModel.fit(X_train,np.ravel(y_train) , verbose=False)

# Get the mean absolute error on the validation data :
y_val = XGBModel.predict(X_test)


  from pandas import MultiIndex, Int64Index


In [32]:
# MAE
mean_absolute_error(y_test, y_val)

19.709034

In [33]:
#MSE
mean_squared_error(y_test, y_val)

731.0483

In [34]:
#MAPE
mean_absolute_percentage_error(y_test, y_val)

0.008700091

In [35]:
# Max Error
max_error(y_test, y_val)

54.915527

In [36]:
r2_score(y_test, y_val)

0.32575094075740374

# GradientBoostingRegressor

In [37]:
from sklearn.ensemble import GradientBoostingRegressor
reg = GradientBoostingRegressor(random_state=0)
reg.fit(X_train, np.ravel(y_train))
y_val=reg.predict(X_test)

In [40]:
# MAE
mean_absolute_error(y_test, y_val)

23.020956183548606

In [41]:
#MSE
mean_squared_error(y_test, y_val)

840.9480281165976

In [42]:
#MAPE
mean_absolute_percentage_error(y_test, y_val)

0.010218639722137622

In [43]:
# Max Error
max_error(y_test, y_val)

52.299508897147916

In [44]:
r2_score(y_test, y_val)

0.22438987365442775

# ElasticNet

In [45]:
from sklearn.linear_model import ElasticNet
reg= ElasticNet(random_state=0)
reg.fit(X_train,y_train)
y_val= reg.predict(X_test)

In [46]:
# MAE
mean_absolute_error(y_test, y_val)

19.080518161877173

In [47]:
#MSE
mean_squared_error(y_test, y_val)

617.1462571627691

In [48]:
#MAPE
mean_absolute_percentage_error(y_test, y_val)

0.008483924486462891

In [49]:
# Max Error
max_error(y_test, y_val)

48.11759323241313

In [50]:
r2_score(y_test, y_val)

0.4308032476587895

# SVM

In [51]:
from sklearn.svm import SVR
reg = SVR(kernel='rbf',degree=15)
reg.fit(X_train,np.ravel(y_train))
y_val= reg.predict(X_test)

In [52]:
print(f'{"PREDICTED":>12} {"ACTUAL":>8} {"DIFF":>8}')
for i in range(10):
    diff = np.abs(y_val[i].item()-y_test[i].item())
    print(f'{i+1:2}. {y_val[i].item():8.4f} {y_test[i].item():8.4f} {diff:8.4f}')

   PREDICTED   ACTUAL     DIFF
 1. 2261.3195 2248.3000  13.0195
 2. 2275.2259 2310.4836  35.2577
 3. 2261.6008 2259.1001   2.5007
 4. 2262.7717 2238.3000  24.4716
 5. 2261.3544 2243.7000  17.6544
 6. 2265.9761 2261.1001   4.8760
 7. 2261.4967 2216.7000  44.7967
 8. 2268.7954 2281.2085  12.4131
 9. 2274.3724 2288.4985  14.1261
10. 2262.2937 2191.8000  70.4937


In [53]:
# MAE
mean_absolute_error(y_test, y_val)

23.960945694569183

In [54]:
#MSE
mean_squared_error(y_test, y_val)

968.2912465122488

In [55]:
#MAPE
mean_absolute_percentage_error(y_test, y_val)

0.010710095721078224

In [56]:
# Max Error
max_error(y_test, y_val)

70.49365884837425

In [57]:
r2_score(y_test, y_val)

0.10694065395614627