In [109]:
import random
import math
import torch
import torch.nn as nn # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
import torch.optim as optim # For all Optimization algorithms, SGD, Adam, etc.
import torch.nn.functional as F # All functions that don't have any parameters
from torch.utils.data import TensorDataset, DataLoader # Gives easier dataset managment and creates mini batches
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import KFold, train_test_split
from tqdm import tqdm
import matplotlib.pyplot as plt
import time

In [110]:
# helper functions
def check_accuracy(loader, model, showresult):
    num_correct = 0
    num_samples = 0
    model.eval()
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)
            x = x.reshape(x.shape[0], -1)
            scores = model(x)
            _, predictions = scores.max(1)
            _, actual = y.max(1)
            num_correct += (predictions == actual).sum()
            num_samples += predictions.size(0)
            
        if showresult:
            print(
                f"Got {num_correct} / {num_samples} with accuracy"
                f" {float(num_correct) / float(num_samples) * 100:.2f}"
            )

    model.train()
    return num_correct/num_samples

In [111]:
# load the data from a csv file
df = pd.read_csv('C:\\Users\\Kkda\\Desktop\\Stat 441 Final Project\\Pokemon_train.csv')
# Remove features according to feature selection
df = df.drop(columns=["has_gender","sprite_perimeter","sprite_size","sprite_overflow_vertical","sprite_overflow_horizontal","height_m"])

# Convert the categorical variables using one hot encoding
X = df.drop(['type_1'],axis=1)
X = pd.get_dummies(X,columns=['type_2','status'])
y_encoded = pd.get_dummies(df['type_1'])

# convert X and y_encoded to PyTorch tensors
X_tensor = torch.tensor(X.values).float()
y_tensor = torch.tensor(y_encoded.values).float()

In [112]:
# Declare model
class NN(nn.Module):
    def __init__(self, nodes):
        super(NN, self).__init__()
        self.layers = nn.ModuleList()
        for i in range(1, len(nodes)):
            self.layers.append(nn.Linear(nodes[i-1], nodes[i]))
        self.activation = nn.ReLU()

    def forward(self, x):
        out = x
        for i in range(len(self.layers)):
            out = self.layers[i](out)
            out = self.activation(out)
        return out

In [161]:
# Declare lists that store model results
num_best_model = 100;
top_learning_rate = [0]*num_best_model;
top_num_epochs = [0]*num_best_model;
top_momentum = [0]*num_best_model;
top_num_hidden_layer = [0]*num_best_model;
top_batch_size = [0]*num_best_model;
top_node_list = [[]]*num_best_model;
top_accuracy = [0]*num_best_model;
# Set up iteration condition
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [None]:
# Every time we execute this block, we randomly generate 100 models with different hyper parameters and train them
# Initialize variables
input_size = 68
num_classes = 18
num_model = 10000; # Number of model to train
for ii in tqdm(range(num_model)):
    # randomly generate hyperparameters
    learning_rate = round(random.uniform(0.0005,0.002),5) # generate learning_rate
    num_epochs = random.randint(10,40) # generate num of epoches
    momentum = round(random.uniform(0.9,0.99),3) # generate momentum
    batch_size = random.randint(50,200) # generate batch size
    num_hidden_layer = random.randint(0,3) # generate number of hidden layers
    hidden_layer = 0
    node_list = [0]*num_hidden_layer # generate node lists
    for jj in range(len(node_list)):
        node_list[jj] = random.randint(15*(num_hidden_layer-jj),30*(num_hidden_layer+1-jj))
    
    # Create the model
    model = NN([input_size]+node_list+[num_classes]).to(device)

    # Set up loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
    
    # Set up data
    X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=random.seed(time.time()))

    # Create TensorDatasets for train and test sets
    train_dataset = TensorDataset(X_train, y_train)
    test_dataset = TensorDataset(X_test, y_test)

    # Create DataLoaders for train and test sets
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)    
    
    loss_values = []
    # train the model
    for epoch in range(num_epochs):
        for batch_idx, (data, targets) in enumerate(train_loader):
            # Get data to cuda if possible
            data = data.to(device=device)
            targets = targets.to(device=device)

            # -1 will flatten all outer dimensions into one
            data = data.reshape(data.shape[0], -1) 

            # forward propagation
            scores = model(data)
            loss = criterion(scores, targets)
            
            # zero previous gradients
            optimizer.zero_grad()

            # back-propagation
            loss.backward()

            # gradient descent or adam step
            optimizer.step()
        loss_values.append(loss.item())
        
    # Plot loss function
#     plt.plot(loss_values)
#     plt.xlabel('Epoch')
#     plt.ylabel('Loss')
#     plt.show()

    # Compute model accuracy
    accuracy = check_accuracy(test_loader,model,False)*100
#     print(check_accuracy(test_loader,model,False)*100)
    
    # Update the best models
    min_accuracy = min(top_accuracy)
    if accuracy > min_accuracy:
        index = top_accuracy.index(min_accuracy)
        top_learning_rate[index] = learning_rate
        top_num_epochs[index] = num_epochs
        top_momentum[index] = momentum
        top_num_hidden_layer[index] = num_hidden_layer
        top_batch_size[index] = batch_size;
        top_node_list[index] = node_list
        top_accuracy[index] = accuracy

# Examine the hyper parameters of the best model
best_index = top_accuracy.index(max(top_accuracy))
print("indedx: {}".format(best_index))
best_accuracy = top_accuracy[best_index].float()
print("accuracy: {}".format(best_accuracy))
print("learning rate:{}".format(top_learning_rate[best_index]))
print("num_epochs :{}".format(top_num_epochs[best_index]))
print("momenum:{}".format(top_momentum[best_index]))
print("hidden_layer:{}".format(top_num_hidden_layer[best_index]))
print("batch_size:{}".format(top_batch_size[best_index]))
print("node_list:{}".format(top_node_list[best_index]))

 13%|█▎        | 1320/10000 [03:36<26:12,  5.52it/s]