### Wine Quality Classification Model 

In [31]:
import pandas as pd 
from sklearn.model_selection import train_test_split

In [32]:
df = pd.read_csv('../Data/proccessed_winequality.csv')

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,type,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,0,1,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,1,1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,2,1,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,3,1,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,4,1,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [4]:
df.drop(labels=['Unnamed: 0'],axis=1,inplace=True)

In [5]:
df.head()

Unnamed: 0,type,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,1,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,1,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,1,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,1,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


### Splitting the input and output features

In [6]:
X_cls = df.drop(labels=['type'],axis=1)
y_cls = df['type']

In [7]:
X_cls.shape

(6497, 12)

In [8]:
y_cls.shape

(6497,)

In [9]:
X_cls

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.270,0.36,20.7,0.045,45.0,170.0,1.00100,3.00,0.45,8.8,6
1,6.3,0.300,0.34,1.6,0.049,14.0,132.0,0.99400,3.30,0.49,9.5,6
2,8.1,0.280,0.40,6.9,0.050,30.0,97.0,0.99510,3.26,0.44,10.1,6
3,7.2,0.230,0.32,8.5,0.058,47.0,186.0,0.99560,3.19,0.40,9.9,6
4,7.2,0.230,0.32,8.5,0.058,47.0,186.0,0.99560,3.19,0.40,9.9,6
...,...,...,...,...,...,...,...,...,...,...,...,...
6492,6.2,0.600,0.08,2.0,0.090,32.0,44.0,0.99490,3.45,0.58,10.5,5
6493,5.9,0.550,0.10,2.2,0.062,39.0,51.0,0.99512,3.52,0.51,11.2,6
6494,6.3,0.510,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6
6495,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.71,10.2,5


In [10]:
y_cls

0       1
1       1
2       1
3       1
4       1
       ..
6492    0
6493    0
6494    0
6495    0
6496    0
Name: type, Length: 6497, dtype: int64

#### Applying the train test split 

In [11]:
X_train_cls , X_test_cls , y_train_cls , y_test_cls = train_test_split(X_cls,y_cls,test_size=0.2,random_state=42)

In [12]:
X_train_cls.shape , y_train_cls.shape

((5197, 12), (5197,))

In [13]:
X_test_cls.shape , y_test_cls.shape

((1300, 12), (1300,))

In [14]:
type(X_train_cls)

pandas.core.frame.DataFrame

In [15]:
type(X_test_cls)

pandas.core.frame.DataFrame

In [16]:
type(y_train_cls)

pandas.core.series.Series

In [17]:
type(y_test_cls)

pandas.core.series.Series

### Creating the Custom Dataset 

In [18]:
import torch.nn as nn
import torch 
from torch.utils.data import Dataset,DataLoader

In [19]:
class CustomDataset(Dataset):
    def __init__(self,features , labels ):
        self.features = torch.tensor(features,dtype=torch.float32)
        self.labels = torch.tensor(labels,dtype=torch.long)

    def __len__(self):
        return len(self.features)
    

    def __getitem__(self, index):
        return self.features[index] , self.labels[index]
        

In [20]:
train_dataset = CustomDataset(X_train_cls.values,y_train_cls.values)
test_dataset = CustomDataset(X_test_cls.values,y_test_cls.values)

In [21]:
train_loader = DataLoader(train_dataset,shuffle=True,batch_size=16)
test_loader = DataLoader(test_dataset,shuffle=False,batch_size=16)

### Neural Network Architecture

In [22]:
class WineTypePredictionNN(nn.Module):
    def __init__(self, num_features):
        print('Num ' , num_features)
        super().__init__()
        self.network = nn.Sequential(
            nn.Linear(num_features,32),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Dropout(0.2),

            nn.Linear(32,16),
            nn.BatchNorm1d(16),
            nn.ReLU(),
            nn.Dropout(0.2),

            nn.Linear(16,8),
            nn.BatchNorm1d(8),
            nn.ReLU(),
            nn.Dropout(0.2),

            nn.Linear(8,2)
        )

    def forward(self , features ):
        return self.network(features)

### Defining Some Important Parameters

In [23]:
X_train_cls.shape[1]

12

### Training the Model on GPU

In [24]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)



Using device: cuda


In [25]:
model = WineTypePredictionNN(num_features=X_train_cls.shape[-1]).to(device)
learning_rate = 0.001
loss_function = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(),learning_rate)
epochs = 100

Num  12


## Creating a training loop 

In [26]:
for epoch in range(epochs):
    total_epoch_loss = 0
    for batch_features , batch_labels in train_loader:
        batch_features , batch_labels = batch_features.to(device) , batch_labels.to(device)

        # Forward Pass 
        y_pred = model(batch_features).to(device)

        #Loss Calculation
        loss = loss_function(y_pred,batch_labels).to(device)

        #Clearning the Gradients 
        optimizer.zero_grad()

        #Backward Propogation
        loss.backward()

        #Updating the weights
        optimizer.step()

        total_epoch_loss += loss.item()

    
    print(f'Epoch : {epoch+1} , Average Loss : {total_epoch_loss/len(train_loader)}')


Epoch : 1 , Average Loss : 0.39101290225982666
Epoch : 2 , Average Loss : 0.24404279386767974
Epoch : 3 , Average Loss : 0.19957613651569073
Epoch : 4 , Average Loss : 0.1719548436254263
Epoch : 5 , Average Loss : 0.13337114191972294
Epoch : 6 , Average Loss : 0.1286560665558164
Epoch : 7 , Average Loss : 0.133122107518407
Epoch : 8 , Average Loss : 0.14355135822525392
Epoch : 9 , Average Loss : 0.13493275985121728
Epoch : 10 , Average Loss : 0.11027529670355411
Epoch : 11 , Average Loss : 0.12460590219841554
Epoch : 12 , Average Loss : 0.12430809028733235
Epoch : 13 , Average Loss : 0.12401959804674753
Epoch : 14 , Average Loss : 0.11265186147621045
Epoch : 15 , Average Loss : 0.10101359205750318
Epoch : 16 , Average Loss : 0.12160953098908067
Epoch : 17 , Average Loss : 0.10899780358689336
Epoch : 18 , Average Loss : 0.10751585730709709
Epoch : 19 , Average Loss : 0.10049875543954281
Epoch : 20 , Average Loss : 0.11182775664788026
Epoch : 21 , Average Loss : 0.10839458546099755
Epoch

In [27]:
model.eval()
train_total_loss = 0.0
train_correct = 0
train_total = 0

with torch.no_grad():
    for features, labels in train_loader:
        features, labels = features.to(device), labels.to(device)

        outputs = model(features)
        loss = loss_function(outputs, labels)
        train_total_loss += loss.item()

        # Predicted class
        _, predicted = torch.max(outputs, dim=1)

        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()

train_avg_loss = train_total_loss / len(train_loader)
train_accuracy = 100 * train_correct / train_total

print(f"Train -> Loss: {train_avg_loss:.4f}, Accuracy: {train_accuracy:.2f}%")


Train -> Loss: 0.0567, Accuracy: 98.56%


In [28]:
model.eval()
test_total_loss = 0.0
test_correct = 0
test_total = 0

with torch.no_grad():
    for features, labels in test_loader:
        features, labels = features.to(device), labels.to(device)

        outputs = model(features)
        loss = loss_function(outputs, labels)
        test_total_loss += loss.item()

        # Predicted class
        _, predicted = torch.max(outputs, dim=1)

        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()

test_avg_loss = test_total_loss / len(test_loader)
test_accuracy = 100 * test_correct / test_total

print(f"Test -> Loss: {test_avg_loss:.4f}, Accuracy: {test_accuracy:.2f}%")


Test -> Loss: 0.0794, Accuracy: 97.85%


In [29]:
torch.save(model.state_dict(), "wine_quality_model.pth")
