In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as Data
from torch.autograd import Variable
from sklearn.preprocessing import StandardScaler

In [2]:
trainset = pd.read_csv('./train.csv')
testset = pd.read_csv('./test.csv')

print(trainset)
print(testset)

X_train = trainset.iloc[:, 0:20].values
Y_train = trainset.iloc[:, 20].values
Y_train = Y_train-1

X_test = testset.iloc[:, 0:20].values
Y_test = testset.iloc[:, 20].values
Y_test = Y_test-1

scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

X_train = torch.from_numpy(X_train).type(torch.FloatTensor)
Y_train = torch.from_numpy(Y_train).type(torch.LongTensor)
X_test = torch.from_numpy(X_test).type(torch.FloatTensor)
Y_test = torch.from_numpy(Y_test).type(torch.LongTensor)


      cache-misses-1  node-loads-1  ...  branch-load-misses-5  label
0           36218220       2530196  ...              17816426      7
1           34025893       2310966  ...              38927513     23
2           36778380       2597194  ...              18699857     17
3           35736215       2499164  ...              19137869     12
4           33763999       2342137  ...              19270653     24
...              ...           ...  ...                   ...    ...
1495        32765499       2249790  ...              16673701     26
1496        36032964       2675011  ...              16989614      1
1497        34176184       2447644  ...              17000473      1
1498        32203221       2189984  ...              17285080     26
1499        34253061       2361245  ...              17671581     13

[1500 rows x 21 columns]
     cache-misses-1  node-loads-1  ...  branch-load-misses-5  label
0          33534639       2368872  ...              22351242      3
1         

In [3]:
batch_size = 256
train_set = Data.TensorDataset(X_train, Y_train)
train_loader = Data.DataLoader(
    dataset=train_set,
    batch_size=batch_size,
    shuffle=True
)

In [64]:
class MLP(nn.Module):
    
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(20, 48)
        self.fc2 = nn.Linear(48, 96)
        self.fc3 = nn.Linear(96, 128)
        self.fc4 = nn.Linear(128, 30)
        self.dropout = nn.Dropout(0.1)
        
    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        x = F.relu(x)
        x = self.dropout(self.fc4(x))
        
        return x
    
model = MLP()

In [5]:
num_epochs = 5000
learning_rate = 1e-3
batch_no = len(X_train) // batch_size

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_function = nn.CrossEntropyLoss()

In [6]:
model.train()
for epoch in range(1, num_epochs+1):
    loss_sum = 0
    for step, (x, y) in enumerate(train_loader):
        y_pred = model(x)
        y = y.squeeze()
        loss = loss_function(y_pred, y)
        loss_sum += loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    if epoch % 50 == 0:
        print("epoch: %d, loss: %f" % (epoch, loss_sum/batch_size))
        acc_sum = 0
        acc_sum += (model(X_train).argmax(dim=1) == Y_train.squeeze()).sum()
        print("train accuracy: %f" % (acc_sum/len(Y_train)))

epoch: 50, loss: 0.018197
train accuracy: 0.769333
epoch: 100, loss: 0.012554
train accuracy: 0.836667
epoch: 150, loss: 0.009365
train accuracy: 0.875333
epoch: 200, loss: 0.009047
train accuracy: 0.896667
epoch: 250, loss: 0.006377
train accuracy: 0.900000
epoch: 300, loss: 0.005747
train accuracy: 0.906000
epoch: 350, loss: 0.005206
train accuracy: 0.920667
epoch: 400, loss: 0.005443
train accuracy: 0.924000
epoch: 450, loss: 0.004882
train accuracy: 0.920667
epoch: 500, loss: 0.004187
train accuracy: 0.922000
epoch: 550, loss: 0.003562
train accuracy: 0.926667
epoch: 600, loss: 0.003681
train accuracy: 0.915333
epoch: 650, loss: 0.003873
train accuracy: 0.926667
epoch: 700, loss: 0.003518
train accuracy: 0.924667
epoch: 750, loss: 0.003603
train accuracy: 0.924000
epoch: 800, loss: 0.003110
train accuracy: 0.926000
epoch: 850, loss: 0.003282
train accuracy: 0.918000
epoch: 900, loss: 0.002976
train accuracy: 0.923333
epoch: 950, loss: 0.003073
train accuracy: 0.933333
epoch: 1000, 

In [8]:
model.eval()
acc_sum = 0
print(model(X_test).argmax(dim=1))
print(Y_test.squeeze())
acc_sum += (model(X_test).argmax(dim=1) == Y_test.squeeze()).sum()
print("test accuracy: %f" % (acc_sum/len(Y_test)))

tensor([ 2,  8, 19, 29,  7, 22, 12, 28, 29, 11, 24, 27, 12, 20, 29, 12, 17, 18,
         8, 22, 19, 22,  9, 12,  2,  1,  0,  1,  0,  4, 18, 16, 22,  3,  2, 11,
        24,  9, 14, 29,  5,  6,  8,  9, 21,  5, 18, 26, 14, 27,  7, 15, 25, 23,
         7, 27, 16,  5,  6,  0,  2,  8, 26, 18,  3,  5, 17, 18,  2, 22,  9,  4,
        26, 17, 28, 11, 19, 14, 27, 16, 17, 14, 18,  3, 14, 15, 23, 22,  8,  3,
        12,  3, 13,  1, 20,  0, 29, 15,  8,  2, 14, 13, 12,  6, 20, 13, 11, 15,
         5, 21,  8, 28, 24, 10, 10,  1, 14, 19, 11, 29,  6, 28,  5, 26, 14, 20,
        11, 17, 28, 22, 16, 23,  5,  3,  4,  0,  1,  3, 16, 11,  5, 27, 12, 26,
        28, 12, 11, 18, 20, 10, 11,  7,  3, 23,  9,  1, 20, 27, 13, 14, 23, 19,
        23,  4, 14, 18, 11,  3, 16, 24, 12, 24, 10, 14, 25, 18,  2, 24,  7, 14,
        18,  7, 23, 13,  1, 13, 22, 22, 19,  4,  0,  7,  6, 22, 15, 29, 13, 20,
        10, 27, 29, 28, 19, 16,  5, 17,  0, 10, 29,  9,  0, 28,  9,  0, 13, 15,
        23, 20, 23, 26, 10, 12, 11,  9, 

In [55]:
batch_size = 256
train_set = Data.TensorDataset(X_train, Y_train)
train_loader = Data.DataLoader(
    dataset=train_set,
    batch_size=batch_size,
    shuffle=True
)

class LSTM(nn.Module):
    
    def __init__(self):
        super(LSTM, self).__init__()
        self.fc1 = nn.LSTM(20, 48)
        self.fc2 = nn.Linear(48, 96)
        self.fc3 = nn.Linear(96, 30)
        self.dropout = nn.Dropout(0.1)
        
    def forward(self, x):
        x, _ = self.fc1(x.view(len(x), 1, -1))
        x = self.fc2(x.view(len(x), 48))
        x = F.relu(x)
        x = self.dropout(self.fc3(x))
        
        return x
    
model = LSTM()

In [56]:
num_epochs = 5000
learning_rate = 1e-3
batch_no = len(X_train) // batch_size

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_function = nn.CrossEntropyLoss()

In [57]:
model.train()
for epoch in range(1, num_epochs+1):
    loss_sum = 0
    for step, (x, y) in enumerate(train_loader):
        y_pred = model(x)
        y = y.squeeze()
        loss = loss_function(y_pred, y)
        loss_sum += loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    if epoch % 50 == 0:
        print("epoch: %d, loss: %f" % (epoch, loss_sum/batch_size))
        acc_sum = 0
        acc_sum += (model(X_train).argmax(dim=1) == Y_train.squeeze()).sum()
        print("train accuracy: %f" % (acc_sum/len(Y_train)))

epoch: 50, loss: 0.025782
train accuracy: 0.724000
epoch: 100, loss: 0.017752
train accuracy: 0.788667
epoch: 150, loss: 0.013932
train accuracy: 0.811333
epoch: 200, loss: 0.012252
train accuracy: 0.837333
epoch: 250, loss: 0.010259
train accuracy: 0.853333
epoch: 300, loss: 0.009455
train accuracy: 0.866667
epoch: 350, loss: 0.009074
train accuracy: 0.890667
epoch: 400, loss: 0.007375
train accuracy: 0.884000
epoch: 450, loss: 0.008211
train accuracy: 0.905333
epoch: 500, loss: 0.007175
train accuracy: 0.912667
epoch: 550, loss: 0.006428
train accuracy: 0.896000
epoch: 600, loss: 0.006208
train accuracy: 0.922000
epoch: 650, loss: 0.005605
train accuracy: 0.932000
epoch: 750, loss: 0.004668
train accuracy: 0.920000
epoch: 800, loss: 0.004297
train accuracy: 0.941333
epoch: 850, loss: 0.004262
train accuracy: 0.923333
epoch: 900, loss: 0.004109
train accuracy: 0.923333
epoch: 950, loss: 0.003890
train accuracy: 0.932000
epoch: 1000, loss: 0.004266
train accuracy: 0.922000
epoch: 1050,

In [58]:
model.eval()
acc_sum = 0
print(model(X_test).argmax(dim=1))
print(Y_test.squeeze())
acc_sum += (model(X_test.view(len(X_test), 1, -1)).argmax(dim=1) == Y_test.squeeze()).sum()
print("test accuracy: %f" % (acc_sum/len(Y_test)))

tensor([ 2,  8, 19, 29,  7, 22, 12, 28, 29, 10, 24, 27, 23,  0, 25, 12, 27, 18,
        23, 22, 19, 22,  9, 12,  2,  1,  0,  1,  0,  4, 18, 21, 22, 14,  2, 11,
        24,  9, 14, 29,  5,  6,  8,  9, 21,  5, 18, 26, 14, 27,  7, 22, 25, 23,
         7, 27, 16,  5,  6,  0,  2,  8, 26, 27, 14,  5, 17, 18,  2, 22,  9,  4,
        12, 17, 28, 11, 19, 28, 27, 16, 17, 14, 18,  3, 14, 15, 23, 22,  8, 14,
        20,  3, 13, 14, 20,  0, 29, 15,  8,  2, 14, 13, 12, 18, 20, 13, 11, 15,
         5, 21,  8, 28, 24, 10, 10,  1, 14, 19, 11,  6,  6, 28,  5, 26, 14, 20,
        11, 17, 28, 22, 16, 23,  5,  3,  4,  0,  1,  3, 16, 11,  5,  6, 12, 26,
        28, 16, 11, 18, 26, 10, 11,  7,  3, 23,  9,  1, 10, 27, 13, 14, 23, 19,
        21,  4, 14,  3, 11,  3, 16, 24, 12, 24, 10, 14, 25, 18,  2, 24, 13, 14,
        18, 15, 23, 13,  4, 13, 22, 22, 19,  4,  0,  7,  6, 22, 15, 29, 13, 20,
        10, 27, 29, 28, 19, 16,  5, 27,  0, 10, 29,  9,  0, 28,  9,  0, 13, 15,
        23, 20, 23, 26, 10, 12, 11,  9, 

In [59]:
trainset = pd.read_csv('./train.csv')
testset = pd.read_csv('./test.csv')

print(trainset)
print(testset)

X_train = trainset.iloc[:, 0:20].values
Y_train = trainset.iloc[:, 20].values
Y_train = Y_train-1

X_test = testset.iloc[:, 0:20].values
Y_test = testset.iloc[:, 20].values
Y_test = Y_test-1

X_train = torch.from_numpy(X_train).type(torch.FloatTensor)
Y_train = torch.from_numpy(Y_train).type(torch.LongTensor)
X_test = torch.from_numpy(X_test).type(torch.FloatTensor)
Y_test = torch.from_numpy(Y_test).type(torch.LongTensor)


      cache-misses-1  node-loads-1  ...  branch-load-misses-5  label
0           36218220       2530196  ...              17816426      7
1           34025893       2310966  ...              38927513     23
2           36778380       2597194  ...              18699857     17
3           35736215       2499164  ...              19137869     12
4           33763999       2342137  ...              19270653     24
...              ...           ...  ...                   ...    ...
1495        32765499       2249790  ...              16673701     26
1496        36032964       2675011  ...              16989614      1
1497        34176184       2447644  ...              17000473      1
1498        32203221       2189984  ...              17285080     26
1499        34253061       2361245  ...              17671581     13

[1500 rows x 21 columns]
     cache-misses-1  node-loads-1  ...  branch-load-misses-5  label
0          33534639       2368872  ...              22351242      3
1         

In [68]:
batch_size = 512
train_set = Data.TensorDataset(X_train, Y_train)
train_loader = Data.DataLoader(
    dataset=train_set,
    batch_size=batch_size,
    shuffle=True
)
model = MLP()

num_epochs = 5000
learning_rate = 1e-3
batch_no = len(X_train) // batch_size

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_function = nn.CrossEntropyLoss()

In [69]:
model.train()
for epoch in range(1, num_epochs+1):
    loss_sum = 0
    for step, (x, y) in enumerate(train_loader):
        y_pred = model(x)
        y = y.squeeze()
        loss = loss_function(y_pred, y)
        loss_sum += loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    if epoch % 50 == 0:
        print("epoch: %d, loss: %f" % (epoch, loss_sum/batch_size))
        acc_sum = 0
        acc_sum += (model(X_train).argmax(dim=1) == Y_train.squeeze()).sum()
        print("train accuracy: %f" % (acc_sum/len(Y_train)))

epoch: 50, loss: 45.865765
train accuracy: 0.540667
epoch: 100, loss: 28.005024
train accuracy: 0.656000
epoch: 150, loss: 22.214424
train accuracy: 0.654000
epoch: 200, loss: 14.174532
train accuracy: 0.717333
epoch: 250, loss: 13.027711
train accuracy: 0.690667
epoch: 300, loss: 8.891039
train accuracy: 0.756000
epoch: 350, loss: 13.349325
train accuracy: 0.721333
epoch: 400, loss: 5.510678
train accuracy: 0.794667
epoch: 450, loss: 7.459725
train accuracy: 0.784000
epoch: 500, loss: 5.469628
train accuracy: 0.823333
epoch: 550, loss: 5.552497
train accuracy: 0.792000
epoch: 600, loss: 4.666986
train accuracy: 0.812667
epoch: 650, loss: 4.334311
train accuracy: 0.827333
epoch: 700, loss: 3.565736
train accuracy: 0.844667
epoch: 750, loss: 2.944563
train accuracy: 0.829333
epoch: 800, loss: 2.598421
train accuracy: 0.815333
epoch: 850, loss: 3.681266
train accuracy: 0.838667
epoch: 900, loss: 2.253383
train accuracy: 0.834000
epoch: 950, loss: 2.158818
train accuracy: 0.833333
epoch: 

In [71]:
model.eval()
acc_sum = 0
print(model(X_test).argmax(dim=1))
print(Y_test.squeeze())
acc_sum += (model(X_test).argmax(dim=1) == Y_test.squeeze()).sum()
print("test accuracy: %f" % (acc_sum/len(Y_test)))

tensor([ 2,  8, 19, 29,  7, 22, 26, 24, 25, 11, 24, 17, 20, 20, 29, 16, 17, 18,
        23, 22, 19, 22,  9, 26,  2,  1,  0,  1,  0,  4, 18, 18, 22,  3,  2,  6,
        24,  9, 14, 29,  5, 11,  8,  9, 21,  5, 18, 26, 14, 27,  7,  7, 25, 23,
         7, 27, 16,  5,  6,  0,  2,  8, 26, 18,  3,  5, 27, 18,  2, 22,  9,  4,
        26, 17, 28, 11, 19, 14, 27, 16, 17, 14, 18,  3, 14, 15, 28, 22,  8,  3,
        20,  3, 13,  1, 20,  0, 29, 15,  8,  2, 14, 11, 17, 17, 20, 25, 15, 15,
         5, 21,  8, 28, 24, 10,  8,  1, 14, 17, 11,  6,  6, 28,  5, 26, 14, 20,
        11,  6, 28,  8, 12, 23,  5,  3,  4,  0,  1,  3, 16, 11,  5,  6, 26, 26,
        28, 21, 25, 18, 12, 10, 11,  7,  3, 23,  9,  1, 27, 27, 13, 14, 23, 19,
        20,  4, 14, 18, 11,  3, 16, 24, 12, 24, 10, 14, 11, 18,  2, 22, 11, 14,
        18, 15, 23, 13,  5, 13, 22, 22, 19,  5,  0,  7,  6, 22, 15, 29, 13, 20,
        10, 27, 24,  8, 19, 12,  5, 17,  0, 10, 29,  9,  0, 28,  9,  0, 15, 15,
        23, 20, 23, 26, 10, 12, 11,  9, 