In [None]:
import os
os.chdir('/content/drive/MyDrive/yeonjun/공부/RecSys/intro_to_recsys/data')

In [None]:
import numpy as np
import pandas as pd
from collections import defaultdict
import matplotlib.pyplot as plt
from datetime import datetime

from tqdm.notebook import tqdm

import warnings, random
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset

from sklearn.model_selection import train_test_split

In [None]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")

GPU is available


In [None]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

In [None]:
train_df = pd.read_csv('./dota-heroes-binary/dota_train_binary_heroes.csv', index_col='match_id_hash')
test_df = pd.read_csv('./dota-heroes-binary/dota_test_binary_heroes.csv', index_col='match_id_hash')
target = pd.read_csv('./dota-heroes-binary/train_targets.csv', index_col='match_id_hash')

In [None]:
target.columns

Index(['game_time', 'radiant_win', 'duration', 'time_remaining',
       'next_roshan_team'],
      dtype='object')

In [None]:
x_train, x_test, y_train, y_test = train_test_split(train_df.values, target['radiant_win'].values, test_size=0.2, random_state=1995)

In [None]:
# train = np.load('./ml-100k/ml_100k_train.npy')
# test = np.load('./ml-100k/ml_100k_test.npy')

In [None]:
class Config:
    learning_rate = 0.01/2
    weight_decay = 0.1/2
    early_stopping_round = 0
    epochs = 30
    seed = 1995
    dim_f = 20
    alpha = 100
    batch_size = 64
    
config = Config()

In [None]:
class Dataset(Dataset):
    def __init__(self, data, target, train):
        self.data = data
        self.train = train
        
        if train:
            self.target = target
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        if self.train:
            return {
                'x' : torch.tensor(self.data[idx, :], dtype=torch.float).to(device),
                'y' : torch.tensor(self.target[idx], dtype=torch.float).to(device)
            }
        else:
            return {
                'x' : torch.tensor(self.data[idx, :], dtype=torch.float).to(device),
            }

In [None]:
class FM(nn.Module):
    def __init__(self, input_dim, embed_dim):
        super(FM, self).__init__()
        self.W = nn.Parameter(torch.randn(input_dim, dtype=torch.float))
        self.V = nn.Parameter(torch.randn((input_dim, embed_dim), dtype=torch.float))
        self.bias = nn.Parameter(torch.tensor([1], dtype=torch.float))

    def forward(self, batch_data):
        feature_effect = torch.matmul(batch_data, self.W)
        temp = torch.matmul(batch_data, self.V)**2 - torch.matmul(batch_data**2, self.V**2)
        interaction_effect = torch.sum(temp, axis=1) / 2     

        return self.bias + feature_effect + interaction_effect

In [None]:
seed_everything(config.seed)

train_dataset = Dataset(x_train, y_train, train=True)
train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, drop_last=False)

test_dataset = Dataset(x_test, y_test, train=True)
test_loader = DataLoader(test_dataset, batch_size=config.batch_size*100, shuffle=False, drop_last=False)

model = FM(input_dim=train_df.shape[1],
           embed_dim=config.dim_f)
model = model.to(device)

optimizer = torch.optim.SGD(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)
loss_fn = nn.BCEWithLogitsLoss()

start = datetime.now()
history = defaultdict(list)
history['best_loss'] = np.inf
for epoch in range(config.epochs):
    
    model.train()
    losses = 0
    for batch_data in train_loader:
        x = batch_data['x']
        y = batch_data['y']

        optimizer.zero_grad()
        
        pred = model(x)
        loss = loss_fn(pred, y)
        loss.backward()
        optimizer.step()

        losses += loss.item()
    losses /= len(train_loader) 
    history['train_losses'].append(losses)
    
    losses_val = 0
    for bacth_data in test_loader:
        x = batch_data['x']
        y = batch_data['y']

        with torch.no_grad():

            pred = model(x)
            loss = loss_fn(pred, y)
            losses_val += loss.item()
    
    losses_val /= len(test_loader)
    history['val_losses'].append(losses_val)
    print(f'EPOCH {epoch+1} TRAIN LogLoss : {losses:.6f}, TEST LogLoss : {losses_val:.6f}')
    
    if history['best_loss'] > losses_val:
        history['best_loss'] = losses_val
        # torch.save(model.state_dict(), f'../paper_review/4. Deep learning based/FM/FM.pth')
        print('The Model Saving...')
    # if epoch==0 or (epoch + 1) % 10 == 0 or epoch == config.epochs:
    

end = datetime.now()
print(f'Training takes time {end-start}')

EPOCH 1 TRAIN LogLoss : 10.167667, TEST LogLoss : 8.689508
The Model Saving...
EPOCH 2 TRAIN LogLoss : 7.552812, TEST LogLoss : 6.222736
The Model Saving...
EPOCH 3 TRAIN LogLoss : 5.645339, TEST LogLoss : 5.550409
The Model Saving...
EPOCH 4 TRAIN LogLoss : 4.243169, TEST LogLoss : 3.947579
The Model Saving...
EPOCH 5 TRAIN LogLoss : 3.223651, TEST LogLoss : 3.005708
The Model Saving...
EPOCH 6 TRAIN LogLoss : 2.472993, TEST LogLoss : 2.130957
The Model Saving...
EPOCH 7 TRAIN LogLoss : 1.926187, TEST LogLoss : 2.284875
EPOCH 8 TRAIN LogLoss : 1.538880, TEST LogLoss : 1.407978
The Model Saving...
EPOCH 9 TRAIN LogLoss : 1.264784, TEST LogLoss : 0.948185
The Model Saving...
EPOCH 10 TRAIN LogLoss : 1.072153, TEST LogLoss : 1.006382
EPOCH 11 TRAIN LogLoss : 0.939853, TEST LogLoss : 0.805522
The Model Saving...
EPOCH 12 TRAIN LogLoss : 0.850821, TEST LogLoss : 0.718277
The Model Saving...
EPOCH 13 TRAIN LogLoss : 0.792013, TEST LogLoss : 0.669478
The Model Saving...
EPOCH 14 TRAIN LogLos