In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np

In [2]:
def load_data():
    input_folder = 'combined_stats'
    input_format = 'out'
    feature = 'feature'
    label = 'label'
    X_train = np.loadtxt("{}/{}_{}.{}".format(input_folder, feature, 'training', input_format))
    y_train = np.loadtxt("{}/{}_{}.{}".format(input_folder, label, 'training', input_format))
    X_val = np.loadtxt("{}/{}_{}.{}".format(input_folder, feature, 'validation', input_format))
    y_val = np.loadtxt("{}/{}_{}.{}".format(input_folder, label, 'validation', input_format))
    X_test = np.loadtxt("{}/{}_{}.{}".format(input_folder, feature, 'test', input_format))
    y_test = np.loadtxt("{}/{}_{}.{}".format(input_folder, label, 'test', input_format))
    return X_train, y_train, X_val, y_val, X_test, y_test

In [3]:
X_train, y_train, X_val, y_val, X_test, y_test = load_data()
print(X_train.shape, y_train.shape, X_val.shape, y_val.shape, X_test.shape, y_test.shape)

(1558, 76) (1558,) (106, 76) (106,) (11, 76) (11,)


In [4]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(76, 38)
        self.fc2 = nn.Linear(38, 19)
        self.fc3 = nn.Linear(19, 1)
        self.out_act = nn.Sigmoid()

    def forward(self, x):
        x = x.view(-1, 76)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        y = self.out_act(x)
        return y

net = Net()

In [5]:
import torch.optim as optim

In [7]:
n_training = X_train.shape[0]
X_train_tensor = torch.from_numpy(X_train).float()
y_train_tensor = torch.from_numpy(y_train).float()
n_validation = X_val.shape[0]
X_validation_tensor = torch.from_numpy(X_val).float()
y_validation_tensor = torch.from_numpy(y_val).float()
n_test = X_test.shape[0]
X_test_tensor = torch.from_numpy(X_test).float()
y_test_tensor = torch.from_numpy(y_test).float()

epoches = []
training_acc = []
val_acc = []

epoch_size = 50
learning_rate = 0.0001

# set up the NN
net = Net()
criterion = nn.BCELoss()
optimizer = optim.Adam(net.parameters(), lr = learning_rate)


for epoch in range(epoch_size):  # loop over the dataset multiple times
    running_loss = 0.0
    for i in range(n_training):
        # get the inputs
        inputs = X_train_tensor[i][:]
        labels = y_train_tensor[i]
        labels = labels.view(1, -1)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        # print(outputs, labels)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % n_training == (n_training - 1):    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0
    print('Epoch %d finished Training.' %(epoch + 1))
    
    correct_training = 0
    with torch.no_grad():
        for i in range(n_training):
            inputs = X_train_tensor[i][:]
            labels = y_train_tensor[i]
            labels = labels.view(1, -1)

            outputs = net(inputs)
            if outputs.data >= 0.5:
                predicted = 1
            else:
                predicted = 0
            correct_training += (predicted == labels).item()
    print('Accuracy of the network on the training: %d %%' % (
        100 * correct_training / n_training))
    training_acc.append(correct_training / n_training)

    correct_val = 0
    with torch.no_grad():
        for i in range(n_validation):
            inputs = X_validation_tensor[i][:]
            labels = y_validation_tensor[i]
            labels = labels.view(1, -1)

            outputs = net(inputs)
            if outputs.data >= 0.5:
                predicted = 1
            else:
                predicted = 0
            correct_val += (predicted == labels).item()
    print('Accuracy of the network on the validation: %d %%' % (
        100 * correct_val / n_validation))
    val_acc.append(correct_val / n_validation)
    epoches.append(epoch + 1)
print('Traininig all done')

[1,  1558] loss: 5.506
Epoch 1 finished Training.
Accuracy of the network on the training: 58 %
Accuracy of the network on the validation: 60 %
[2,  1558] loss: 5.292
Epoch 2 finished Training.
Accuracy of the network on the training: 59 %
Accuracy of the network on the validation: 61 %
[3,  1558] loss: 5.221
Epoch 3 finished Training.
Accuracy of the network on the training: 60 %
Accuracy of the network on the validation: 64 %
[4,  1558] loss: 5.176
Epoch 4 finished Training.
Accuracy of the network on the training: 62 %
Accuracy of the network on the validation: 68 %
[5,  1558] loss: 5.129
Epoch 5 finished Training.
Accuracy of the network on the training: 63 %
Accuracy of the network on the validation: 70 %
[6,  1558] loss: 5.111
Epoch 6 finished Training.
Accuracy of the network on the training: 63 %
Accuracy of the network on the validation: 70 %
[7,  1558] loss: 5.094
Epoch 7 finished Training.
Accuracy of the network on the training: 63 %
Accuracy of the network on the validatio

In [15]:
import pandas as pd
import csv

correct_test = 0
y_pred = []
y_pred1 = []
y_true1 = []
print('Predicting on new schedule..')
folder = 'test_games'
test_file = 'test_1819'
test_format = 'csv'
schedule1819 = pd.read_csv("{}/{}.{}".format(folder, test_file, test_format))
result = []

with torch.no_grad():
    for index, row in schedule1819.iterrows():
        team1 = row['HTeam']
        team2 = row['VTeam']

        inputs = X_test_tensor[index][:]
        labels = y_test_tensor[index]
        labels = labels.view(1, -1)

        pred = net(inputs)
        prob = pred.data
        
        if prob > 0.5:
            winner = team1
            loser = team2
            result.append([winner, loser, prob.item()])
        else:
            winner = team2
            loser = team1
            result.append([winner, loser, 1 - prob.item()])
print('Finish predicting')

result_name = '1819_test_Result.csv'
with open("{}/{}".format(folder, result_name), 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['win', 'lose', 'probability'])
    writer.writerows(result)

'''
with torch.no_grad():
    for i in range(n_test):
        inputs = X_test_tensor[i][:]
        labels = y_test_tensor[i]
        labels = labels.view(1, -1)

        outputs = net(inputs)
        y_pred.append(outputs.data)
        if outputs.data >= 0.5:
            predicted = 1
        else:
            predicted = 0
        correct_test += (predicted == labels).item()
print('Accuracy of the network on the test: %d %%' % (
    100 * correct_test / n_test))
print(correct_test, n_test)
'''

Predicting on new schedule..
Finish predicting


"\nwith torch.no_grad():\n    for i in range(n_test):\n        inputs = X_test_tensor[i][:]\n        labels = y_test_tensor[i]\n        labels = labels.view(1, -1)\n\n        outputs = net(inputs)\n        y_pred.append(outputs.data)\n        if outputs.data >= 0.5:\n            predicted = 1\n        else:\n            predicted = 0\n        correct_test += (predicted == labels).item()\nprint('Accuracy of the network on the test: %d %%' % (\n    100 * correct_test / n_test))\nprint(correct_test, n_test)\n"