In [58]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from ndlinear import NdLinear
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
data = pd.read_csv("WineQT.csv")
data = data.dropna()
print("Data preview:")
print(data.head())

Data preview:
   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8              0.76         0.04             2.3      0.092   
3           11.2              0.28         0.56             1.9      0.075   
4            7.4              0.70         0.00             1.9      0.076   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 11.0                  34.0   0.9978  3.51       0.56   
1                 25.0                  67.0   0.9968  3.20       0.68   
2                 15.0                  54.0   0.9970  3.26       0.65   
3                 17.0                  60.0   0.9980  3.16       0.58   
4                 11.0                  34.0   0.9978  3.51       0.56   

   alcohol  quality  Id  
0      9.4        5   0  
1      9.8        5 

In [59]:
print("Columns in dataset:", data.columns.tolist())
print("Dataset shape:", data.shape)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).reshape(-1, 1)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).reshape(-1, 1)

print("X_train_tensor shape:", X_train_tensor.shape)
print("y_train_tensor shape:", y_train_tensor.shape)

# NdLinear Model (Ensemble AI)
class NdLinearModel(nn.Module):
    def __init__(self, input_dim):
        super(NdLinearModel, self).__init__()
        self.ndlinear = NdLinear(input_dims=(input_dim,), hidden_size=(64,))
        self.output = nn.Linear(64, 1)
    def forward(self, x):
        x = self.ndlinear(x) 
        x = torch.relu(x)
        x = self.output(x)  
        return x
# Train NdLinear model
input_dim = X_train_scaled.shape[1]
ndlinear_model = NdLinearModel(input_dim=input_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(ndlinear_model.parameters(), lr=0.001)
for epoch in range(200):
    ndlinear_model.train()
    optimizer.zero_grad()
    outputs = ndlinear_model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
ndlinear_model.eval()
with torch.no_grad():
    y_pred_ndlinear = ndlinear_model(X_test_tensor).numpy().flatten()
    y_pred_ndlinear = np.clip(y_pred_ndlinear, 3, 8)  # Clip to valid quality range
mse_ndlinear = mean_squared_error(y_test, y_pred_ndlinear)
#  nn.Linear Model
class LinearModel(nn.Module):
    def __init__(self, input_dim):
        super(LinearModel, self).__init__()
        self.linear = nn.Linear(input_dim, 64)
        self.output = nn.Linear(64, 1)
    def forward(self, x):
        x = self.linear(x)
        x = torch.relu(x)
        x = self.output(x)
        return x
# Train nn.Linear model
linear_model = LinearModel(input_dim=input_dim)
optimizer = optim.Adam(linear_model.parameters(), lr=0.001)
for epoch in range(200):
    linear_model.train()
    optimizer.zero_grad()
    outputs = linear_model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
linear_model.eval()
with torch.no_grad():
    y_pred_linear = linear_model(X_test_tensor).numpy().flatten()
    y_pred_linear = np.clip(y_pred_linear, 3, 8)  # Clip to valid quality range
mse_linear = mean_squared_error(y_test, y_pred_linear)


print(f"NdLinear MSE: {mse_ndlinear:.4f}")
print(f"nn.Linear MSE: {mse_linear:.4f}")



Columns in dataset: ['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol', 'quality', 'Id']
Dataset shape: (1143, 13)
X_train_tensor shape: torch.Size([914, 11])
y_train_tensor shape: torch.Size([914, 1])
NdLinear MSE: 2.8268
nn.Linear MSE: 2.3884
NdLinear MSE: 2.8268
nn.Linear MSE: 2.3884


In [None]:
## KFold Cross-Validation

kf = KFold(n_splits=3, shuffle=True, random_state=10)
ndlinear_mse_scores = []
linear_mse_scores = []
for train_index, val_index in kf.split(X_train_scaled):

    X_train_fold = X_train_scaled[train_index]
    y_train_fold = y_train[train_index]
    X_val_fold = X_train_scaled[val_index]
    y_val_fold = y_train[val_index]

    X_train_fold_tensor = torch.tensor(X_train_fold, dtype=torch.float32)
    y_train_fold_tensor = torch.tensor(y_train_fold, dtype=torch.float32).reshape(-1, 1)
    X_val_fold_tensor = torch.tensor(X_val_fold, dtype=torch.float32)
    y_val_fold_tensor = torch.tensor(y_val_fold, dtype=torch.float32).reshape(-1, 1)
    # Train NdLinear model
    ndlinear_model_cv = NdLinearModel(input_dim=input_dim)
    optimizer_nd = optim.Adam(ndlinear_model_cv.parameters(), lr=0.001)
    for epoch in range(200):
        ndlinear_model_cv.train()
        optimizer_nd.zero_grad()
        outputs = ndlinear_model_cv(X_train_fold_tensor)
        loss = criterion(outputs, y_train_fold_tensor)
        loss.backward()
        optimizer_nd.step()
    ndlinear_model_cv.eval()
    with torch.no_grad():
        y_val_pred = ndlinear_model_cv(X_val_fold_tensor).numpy().flatten()
        y_val_pred = np.clip(y_val_pred, 3, 8)
        mse_cv = mean_squared_error(y_val_fold, y_val_pred)
        ndlinear_mse_scores.append(mse_cv)
    #Train nn.Linear model 
    linear_model_cv = LinearModel(input_dim=input_dim)
    optimizer_lin = optim.Adam(linear_model_cv.parameters(), lr=0.001)
    for epoch in range(200):
        linear_model_cv.train()
        optimizer_lin.zero_grad()
        outputs = linear_model_cv(X_train_fold_tensor)
        loss = criterion(outputs, y_train_fold_tensor)
        loss.backward()
        optimizer_lin.step()
    linear_model_cv.eval()
    with torch.no_grad():
        y_val_pred_lin = linear_model_cv(X_val_fold_tensor).numpy().flatten()
        y_val_pred_lin = np.clip(y_val_pred_lin, 3, 8)  # Clip to valid quality range
        mse_cv_lin = mean_squared_error(y_val_fold, y_val_pred_lin)
        linear_mse_scores.append(mse_cv_lin)

print(f"KFold CV results with K=3:")
print("NdLinear average MSE:", np.mean(ndlinear_mse_scores))
print("nn.Linear average MSE:", np.mean(linear_mse_scores))

KFold CV results with K=3:
NdLinear average MSE: 2.3665987354604794
nn.Linear average MSE: 2.4035062378541854
