In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import logging
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from scipy.sparse import coo_matrix

# ---- 1. DataSplitter Class ----
class DataSplitter:
    def __init__(self, data):
        self.data = data
        self.train_data = None
        self.val_data = None
        self.test_data = None

    def split(self, test_size=0.2, val_size=0.5, random_state=42):
        train_val, test = train_test_split(self.data, test_size=test_size, random_state=random_state)
        train, val = train_test_split(train_val, test_size=val_size, random_state=random_state)
        self.train_data = train.reset_index(drop=True)
        self.val_data = val.reset_index(drop=True)
        self.test_data = test.reset_index(drop=True)
        return self.train_data, self.val_data, self.test_data

# ---- 2. Utility to Create History Matrix ----
def generate_history_matrix(data, row='user', value_field='rate', max_history_len=None, user_num=None, item_num=None):
    inter_feat = data.sample(frac=1).reset_index(drop=True)
    users, items = inter_feat['uid'].to_numpy(), inter_feat['vid'].to_numpy()
    values = inter_feat[value_field].to_numpy() if value_field else np.ones(len(inter_feat))

    if row == 'user':
        row_ids, col_ids = users, items
        row_num = user_num
    else:
        row_ids, col_ids = items, users
        row_num = item_num

    history_len = np.zeros(row_num, dtype=np.int64)
    for r in row_ids:
        history_len[r] += 1

    max_len = np.max(history_len) if max_history_len is None else min(max_history_len, np.max(history_len))
    history_matrix = np.zeros((row_num, max_len), dtype=np.int64)
    history_value = np.zeros((row_num, max_len))
    history_len[:] = 0

    for r, c, v in zip(row_ids, col_ids, values):
        if history_len[r] < max_len:
            history_matrix[r, history_len[r]] = c
            history_value[r, history_len[r]] = v
            history_len[r] += 1

    return (
        torch.LongTensor(history_matrix),
        torch.FloatTensor(history_value),
        torch.LongTensor(history_len)
    )

# ---- 3. Sparse Matrix Generator ----
def create_sparse_interaction_matrix(data, user_num, item_num, value_field='rate'):
    users = data['uid'].to_numpy()
    items = data['vid'].to_numpy()
    values = data[value_field].to_numpy() if value_field else np.ones(len(data))
    sparse_mat = coo_matrix((values, (users, items)), shape=(user_num, item_num))
    return sparse_mat

# ---- 4. Simple Feedforward Model ----
class SimpleModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(SimpleModel, self).__init__()
        self.fc = nn.Linear(input_size, output_size)

    def forward(self, x):
        return self.fc(x)

# ---- 5. Training Function ----
def train(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(train_loader)

# ---- 6. Evaluation Function ----
def evaluate(model, val_loader, criterion, device):
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            val_loss += loss.item()
    return val_loss / len(val_loader)

# ---- 7. Main Code ----
if __name__ == "__main__":
    logging.getLogger().setLevel(logging.WARNING)

    # Load dataset
    data = pd.read_csv("/u.data", sep='\t', header=None)
    data.columns = ['uid', 'vid', 'rate', 'time']
    data['uid'] -= 1
    data['vid'] -= 1
    user_num = data['uid'].max() + 1
    item_num = data['vid'].max() + 1

    # Split the dataset
    splitter = DataSplitter(data)
    train_data, val_data, test_data = splitter.split()

    # Generate history matrices
    history_matrix_train, history_value_train, _ = generate_history_matrix(train_data, row='user', value_field='rate', user_num=user_num, item_num=item_num)
    history_matrix_val, history_value_val, _ = generate_history_matrix(val_data, row='user', value_field='rate', user_num=user_num, item_num=item_num)
    history_matrix_test, history_value_test, _ = generate_history_matrix(test_data, row='user', value_field='rate', user_num=user_num, item_num=item_num)

    # Create DataLoaders
    train_loader = DataLoader(list(zip(history_matrix_train, history_value_train)), batch_size=32, shuffle=True)
    val_loader = DataLoader(list(zip(history_matrix_val, history_value_val)), batch_size=32, shuffle=False)
    test_loader = DataLoader(list(zip(history_matrix_test, history_value_test)), batch_size=32, shuffle=False)

    # Model, Loss, Optimizer Setup
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    input_size = history_matrix_train.shape[1]
    model = SimpleModel(input_size=input_size, output_size=1).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Training loop
    num_epochs = 10
    for epoch in range(num_epochs):
        train_loss = train(model, train_loader, criterion, optimizer, device)
        val_loss = evaluate(model, val_loader, criterion, device)
        print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

    # Final Test Evaluation
    test_loss = evaluate(model, test_loader, criterion, device)
    print(f'Test Loss: {test_loss:.4f}')
