In [7]:
import logging

from pathlib import Path

from product_bundle.dataset import read_dataset
from product_bundle.utils import load_product_df

data_dir = Path("../data")

logging.basicConfig(
    level=logging.INFO,  # Set the logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)

product_df = load_product_df(data_dir)


In [14]:
import torch

X = torch.tensor(product_df.iloc[:, 8:-4].values, dtype=torch.float32)
y = torch.tensor(product_df['UnitPrice'].values, dtype=torch.float32)

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [17]:
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=64)

In [18]:
class RegressionModel(nn.Module):
    def __init__(self, input_size):
        super(RegressionModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        return self.fc4(x)

In [19]:
from tqdm import tqdm


model = RegressionModel(input_size=X.shape[1])
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

num_epochs = 50
for epoch in tqdm(range(num_epochs)):
    model.train()
    for batch in train_dataloader:
        X_batch, y_batch = batch
        y_pred = model(X_batch)
        loss = criterion(y_pred.squeeze(), y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

  0%|          | 0/50 [00:00<?, ?it/s]

100%|██████████| 50/50 [00:15<00:00,  3.31it/s]


In [21]:
model.eval()
with torch.no_grad():
    test_loss = 0.0
    for batch in test_dataloader:
        X_batch, y_batch = batch
        y_pred = model(X_batch)
        test_loss += criterion(y_pred.squeeze(), y_batch)

print(f"Test MSE Loss: {test_loss / len(test_dataloader):.4f}")

Test MSE Loss: 51.6335
