In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 

import torch 
import torch.nn as nn 
import torch.optim as optim 
from torch.utils.data import DataLoader , TensorDataset


from sklearn.preprocessing import StandardScaler , LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
# importing dataset 

df = pd.read_csv("C:/Users/gokup/Downloads/DateFruit_Dataset.csv")
df.head()

Unnamed: 0,AREA,PERIMETER,MAJOR_AXIS,MINOR_AXIS,ECCENTRICITY,EQDIASQ,SOLIDITY,CONVEX_AREA,EXTENT,ASPECT_RATIO,...,KurtosisRR,KurtosisRG,KurtosisRB,EntropyRR,EntropyRG,EntropyRB,ALLdaub4RR,ALLdaub4RG,ALLdaub4RB,Class
0,422163,2378.908,837.8484,645.6693,0.6373,733.1539,0.9947,424428,0.7831,1.2976,...,3.237,2.9574,4.2287,-59191263232,-50714214400,-39922372608,58.7255,54.9554,47.84,BERHI
1,338136,2085.144,723.8198,595.2073,0.569,656.1464,0.9974,339014,0.7795,1.2161,...,2.6228,2.635,3.1704,-34233065472,-37462601728,-31477794816,50.0259,52.8168,47.8315,BERHI
2,526843,2647.394,940.7379,715.3638,0.6494,819.0222,0.9962,528876,0.7657,1.315,...,3.7516,3.8611,4.7192,-93948354560,-74738221056,-60311207936,65.4772,59.286,51.9378,BERHI
3,416063,2351.21,827.9804,645.2988,0.6266,727.8378,0.9948,418255,0.7759,1.2831,...,5.0401,8.6136,8.2618,-32074307584,-32060925952,-29575010304,43.39,44.1259,41.1882,BERHI
4,347562,2160.354,763.9877,582.8359,0.6465,665.2291,0.9908,350797,0.7569,1.3108,...,2.7016,2.9761,4.4146,-39980974080,-35980042240,-25593278464,52.7743,50.908,42.6666,BERHI


In [3]:
df.shape

(898, 35)

In [4]:
#  Splitting our df 

X = df.drop("Class",axis=1)
y = df["Class"]

In [5]:
# y.value_counts()

df["Class"].unique()

array(['BERHI', 'DEGLET', 'DOKOL', 'IRAQI', 'ROTANA', 'SAFAVI', 'SOGAY'],
      dtype=object)

In [6]:
# before spliting  -- Encoding 

le = LabelEncoder()
y = le.fit_transform(y)

In [7]:
X_train , X_test , y_train , y_test = train_test_split(
    X , y , random_state= 42, test_size=0.2
)

In [8]:
#  Scaling our df

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(X_train)
x_test_scaled = scaler.transform(X_test)

In [9]:
# Converting df into tensor 

x_train_tensor = torch.tensor(x_train_scaled,dtype = torch.float32)
y_train_tensor = torch.tensor(y_train ,dtype=torch.long)

x_test_tensor = torch.tensor(x_test_scaled,dtype= torch.float32)
y_test_tensor = torch.tensor(y_test,dtype=torch.long)

In [10]:
#  Tensor Dataset 

train_tensor = TensorDataset(x_train_tensor,y_train_tensor)
test_tensor = TensorDataset(x_test_tensor,y_test_tensor)

In [11]:
#  DataLoader ---- batch creating 

train_loader = DataLoader(train_tensor,batch_size= 32, shuffle = True)
test_loader = DataLoader(test_tensor,batch_size= 32)

## ANN Model

In [12]:
# defining our model --- 

class ANN(nn.Module):
    def __init__(self):
        super(ANN,self).__init__()

        self.model = nn.Sequential(
            # 1st Hidden Layer 
            nn.Linear(X.shape[1],64),
            nn.ReLU(),

            # 2 hidden Layer
            nn.Linear(64,64),
            nn.ReLU(),

            # Output layer
            nn.Linear(64,7)
        )

    def forward(self,x):
        return self.model(x)

In [13]:
# model --- 
model = ANN()

# Loss fnx and Optimizer 
criteria = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

In [14]:
# Training our ANN 

train_loss = []
# best_loss = []
epochs = 100

for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    for xb , yb in train_loader:
        optimizer.zero_grad()
            
        output = model(xb)
        loss = criteria(output,yb)
        loss.backward()
        optimizer.step()      # parameter update

        running_loss+=loss.item()

    epoch_train_loss = running_loss/len(train_loader)
    train_loss.append(epoch_train_loss)

    print(f"epoch {epoch+1}/{epochs} >== train loss = {epoch_train_loss}")

epoch 1/100 >== train loss = 1.68777030447255
epoch 2/100 >== train loss = 1.0864973586538564
epoch 3/100 >== train loss = 0.7374417315358701
epoch 4/100 >== train loss = 0.5554129779338837
epoch 5/100 >== train loss = 0.4645652589590653
epoch 6/100 >== train loss = 0.4022444583799528
epoch 7/100 >== train loss = 0.3589683032554129
epoch 8/100 >== train loss = 0.32869929852692975
epoch 9/100 >== train loss = 0.31211804501388385
epoch 10/100 >== train loss = 0.28534385367580084
epoch 11/100 >== train loss = 0.2619681215804556
epoch 12/100 >== train loss = 0.24783031610043152
epoch 13/100 >== train loss = 0.23226185142993927
epoch 14/100 >== train loss = 0.22645543353713077
epoch 15/100 >== train loss = 0.21247939603484195
epoch 16/100 >== train loss = 0.20375274866819382
epoch 17/100 >== train loss = 0.2027296851510587
epoch 18/100 >== train loss = 0.1912381551835848
epoch 19/100 >== train loss = 0.17922430420699326
epoch 20/100 >== train loss = 0.17466710378294406
epoch 21/100 >== trai

In [15]:
# Evaluation -- 

model.eval()
total = 0 
correct = 0
with torch.no_grad():
    for xb, yb in test_loader:
        output = model(xb) # Which gives output  -- sum != 1
        max_val , predicted = torch.max(output,1)
    
        correct += (predicted == yb).sum().item()     #sum of values which are correctly predicted (same as yb)
        total += yb.size(0)     #return actual samplen in each batch

 
print("Total value ", total)
print("Correct", correct)
print("accuracy",correct/total)
    

Total value  180
Correct 170
accuracy 0.9444444444444444


In [27]:
from sklearn.decomposition import PCA

pca = PCA(n_components=0.95)
x_train_pca = pca.fit_transform(x_train_scaled)
x_test_pca = pca.transform(x_test_scaled)


# converting pca to tensor -- 
x_train_pca_tensor = torch.tensor(x_train_pca,dtype = torch.float32) 
x_test_pca_tensor = torch.tensor(x_test_pca,dtype=torch.float32) 

y_train_pca_tensor = torch.tensor(y_train,dtype= torch.long)
y_test_pca_tensor = torch.tensor(y_test,dtype = torch.long)


# tensor dataset 
pca_train_tensor = TensorDataset(x_train_pca_tensor, y_train_pca_tensor)
pca_test_tensor = TensorDataset(x_test_pca_tensor,y_test_pca_tensor)

# Dataset Loader 
pca_train_loader = DataLoader(pca_train_tensor,batch_size=32,shuffle=True)
pca_test_loader = DataLoader(pca_test_tensor,batch_size=32)

In [28]:
x_train_pca.shape

(718, 9)

In [56]:
#  Define ANN_ 
class PANN(nn.Module):
    def __init__(self): 
        super(PANN,self).__init__()

        self.pca_model = nn.Sequential(
            nn.Linear(9,64), 
            nn.ReLU(),

            nn.Linear(64,64),
            nn.ReLU(),

            nn.Linear(64,7),   
        )

    def forward(self,x):
        return self.pca_model(x)

In [57]:
p_model = PANN()

criteria = nn.CrossEntropyLoss()
optimizer = optim.Adam(p_model.parameters())

In [58]:
# Train
p_train_loss = []

epochs = 100

for epoch in range(epochs):
    p_model.train()
    p_running_loss = 0 

    for xb,yb in pca_train_loader:
        optimizer.zero_grad()
        
        outputs = p_model(xb)
        loss= criteria(outputs,yb) 
        loss.backward() 
        optimizer.step()

        p_running_loss += loss.item() 
    
    pca_epoch_loss = p_running_loss/len(pca_train_loader)
    p_train_loss.append(pca_epoch_loss)

    print(f"epochs {epoch+1}/{epochs} >== p_train_loss : {pca_epoch_loss}")

epochs 1/100 >== p_train_loss : 1.5749850480452827
epochs 2/100 >== p_train_loss : 1.0028354007264841
epochs 3/100 >== p_train_loss : 0.7060836644276328
epochs 4/100 >== p_train_loss : 0.5376273043777632
epochs 5/100 >== p_train_loss : 0.44394659736882086
epochs 6/100 >== p_train_loss : 0.38328519463539124
epochs 7/100 >== p_train_loss : 0.3414960788643878
epochs 8/100 >== p_train_loss : 0.3154804667700892
epochs 9/100 >== p_train_loss : 0.2875427674988042
epochs 10/100 >== p_train_loss : 0.2712429269500401
epochs 11/100 >== p_train_loss : 0.2572819266630256
epochs 12/100 >== p_train_loss : 0.24778413157100262
epochs 13/100 >== p_train_loss : 0.23987027225287064
epochs 14/100 >== p_train_loss : 0.2355820646752482
epochs 15/100 >== p_train_loss : 0.22246847917204318
epochs 16/100 >== p_train_loss : 0.21353784705633702
epochs 17/100 >== p_train_loss : 0.20889265770497528
epochs 18/100 >== p_train_loss : 0.2031456617557484
epochs 19/100 >== p_train_loss : 0.19340098616869553
epochs 20/100

In [59]:
# Evalution 

p_model.eval()
total= 0 
correct = 0 

with torch.no_grad(): 
    for xb,yb in pca_test_loader: 
        outputs = p_model(xb) 
        max_val , predicted = torch.max(outputs,1)
        

        total += yb.size(0)
        correct += (predicted==yb).sum().item()

print("accuracy" , (correct/total))
print(total , "total ")
print(correct, "correct")

accuracy 0.9055555555555556
180 total 
163 correct
