In [None]:
!pip install bloscpack

Collecting bloscpack
[?25l  Downloading https://files.pythonhosted.org/packages/39/c6/ca9b5567caad38b118bf0cbd92d122b067ebc7961742793c6b4c02895bef/bloscpack-0.16.0.tar.gz (99kB)
[K     |████████████████████████████████| 102kB 4.9MB/s 
[?25hCollecting blosc
[?25l  Downloading https://files.pythonhosted.org/packages/1c/89/004eeb307ba3f56fca9727f47fbde09f8ae3936a1cb0dd6b7708f846d3fd/blosc-1.9.2.tar.gz (816kB)
[K     |████████████████████████████████| 819kB 8.7MB/s 
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Collecting deprecated
  Downloading https://files.pythonhosted.org/packages/76/a1/05d7f62f956d77b23a640efc650f80ce24483aa2f85a09c03fb64f49e879/Deprecated-1.2.10-py2.py3-none-any.whl
Building wheels for collected packages: blosc
  Building wheel for blosc (PEP 517) ... [?25l[?25hdone
  Created wheel for blosc: filename=blosc-1.9.2-cp36-cp36m-linux_x86_64.wh

In [None]:
import warnings
warnings.filterwarnings('ignore')
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F


import random
from tqdm import tqdm
import numpy as np
import bloscpack as bp
from sklearn import metrics

import time
from datetime import timedelta

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Load data

In [None]:

# master_X = np.load(data_path+'/small_input_X.npy')
# master_Y = np.load(data_path+'/small_input_Y.npy')
master_X = bp.unpack_ndarray_from_file('/content/drive/MyDrive/majhong/small_input/input_X_0_100136.nosync.blp')
master_Y = bp.unpack_ndarray_from_file('/content/drive/MyDrive/majhong/small_input/input_Y_0_100136.nosync.blp')
master_Y = np.where(master_Y==1)[1]
print(master_X.shape, master_Y.shape)

(100136, 50, 4, 34) (100136,)


In [None]:
master_Y

array([29,  6, 31, ..., 33, 26,  4])

In [None]:
np.unique(master_Y)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33])

In [None]:
# split training and validation set
n = len(master_X)
train_size = int(0.8*n)
train_idx = random.sample(range(n),train_size)
val_idx = [i for i in range(n) if i not in train_idx]

train_X = torch.from_numpy(master_X[train_idx,:,:,:]).type(torch.FloatTensor)
train_Y = torch.from_numpy(master_Y[train_idx]).type(torch.LongTensor)
val_X = torch.from_numpy(master_X[val_idx,:,:,:]).type(torch.FloatTensor)
val_Y = torch.from_numpy(master_Y[val_idx]).type(torch.LongTensor)
print(train_X.shape,train_Y.shape,val_X.shape,val_Y.shape)

# Transform into dataloader
train_dataset = TensorDataset(train_X, train_Y)
val_dataset = TensorDataset(val_X, val_Y)

train_dataloader = DataLoader(train_dataset,batch_size=128,shuffle=True,num_workers=2)
val_dataloader = DataLoader(val_dataset,batch_size=128,shuffle=False,num_workers=2)

torch.Size([80108, 50, 4, 34]) torch.Size([80108]) torch.Size([20028, 50, 4, 34]) torch.Size([20028])


### Model

In [None]:
class MahjongNet(nn.Module):
  def __init__(self):
    super(MahjongNet,self).__init__()
    self.conv1 = nn.Conv2d(in_channels=50,out_channels=100,kernel_size=(2,5),stride=1)
    self.conv2 = nn.Conv2d(in_channels=100,out_channels=100,kernel_size=(2,5),stride=1)
    self.conv3 = nn.Conv2d(in_channels=100,out_channels=100,kernel_size=(2,5),stride=1)
    self.fc1 = nn.Linear(2200,300,bias=True)
    self.fc2 = nn.Linear(300,34,bias=True)
    self.batch_norm2d = nn.BatchNorm2d(100)
    self.batch_norm1d = nn.BatchNorm1d(300)
    self.dropout = nn.Dropout2d(0.5)
  
  def forward(self,x):
    conv1_output = F.relu(self.conv1(x))
    conv1_output = self.batch_norm2d(conv1_output)
    conv1_output = self.dropout(conv1_output)
    
    conv2_output = F.relu(self.conv2(conv1_output))
    conv2_output = self.batch_norm2d(conv2_output)
    conv2_output = self.dropout(conv2_output)

    conv3_output = F.relu(self.conv3(conv2_output))
    conv3_output = self.batch_norm2d(conv3_output)
    conv3_output = self.dropout(conv3_output)    

    output = conv3_output.reshape(conv3_output.size(0),-1)
    output = F.relu(self.fc1(output))
    output = self.batch_norm1d(output)
    output = F.dropout(output)
    output = self.fc2(output)
    
    return output

In [None]:
from torchsummary import summary
net = MahjongNet()
net.to(device)
summary(net,(50,4,34))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 100, 3, 30]          50,100
       BatchNorm2d-2           [-1, 100, 3, 30]             200
         Dropout2d-3           [-1, 100, 3, 30]               0
            Conv2d-4           [-1, 100, 2, 26]         100,100
       BatchNorm2d-5           [-1, 100, 2, 26]             200
         Dropout2d-6           [-1, 100, 2, 26]               0
            Conv2d-7           [-1, 100, 1, 22]         100,100
       BatchNorm2d-8           [-1, 100, 1, 22]             200
         Dropout2d-9           [-1, 100, 1, 22]               0
           Linear-10                  [-1, 300]         660,300
      BatchNorm1d-11                  [-1, 300]             600
           Linear-12                   [-1, 34]          10,234
Total params: 922,034
Trainable params: 922,034
Non-trainable params: 0
-------------------------------

### Training

In [None]:
import torch.optim as optim
def get_time_dif(start_time):
  end_time = time.time()
  time_dif = end_time - start_time
  return timedelta(seconds=int(round(time_dif)))

In [None]:
def evaluate(model, data_loader):
  model.eval()
  loss_total = 0
  predict_all = np.array([], dtype=int)
  labels_all = np.array([], dtype=int)
  with torch.no_grad():
    for inputs, labels in data_loader:
      inputs, labels = inputs.to(device), labels.to(device)
      outputs = model(inputs)
      loss = F.cross_entropy(outputs, labels)
      loss_total += loss
      labels = labels.data.cpu().numpy()
      predic = torch.max(outputs.data,1)[1].cpu().numpy()
      labels_all = np.append(labels_all, labels)
      predict_all = np.append(predict_all, predic)
  acc = metrics.accuracy_score(labels_all, predict_all)
  recall = metrics.recall_score(labels_all, predict_all,average='macro')
  f1 = metrics.f1_score(labels_all, predict_all,average='macro')
  return acc, recall, f1, loss_total/len(data_loader)

In [None]:
def train(model,train_loader,val_loader,learning_rate=0.0025,
    num_epochs=100,save_path='/content/drive/MyDrive/majhong/acc_loss.txt'):
  start_time = time.time()
  model.train()
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.Adam(model.parameters(), lr=learning_rate)
  total_batch = 0
  val_best_loss = float('inf')
  last_improve = 0
  flag = False
  f = open(save_path, 'w+')

  model.train()

  for epoch in range(num_epochs):
    running_loss = 0.0
    print('Epoch[{}/{}]'.format(epoch+1, num_epochs))
    for i,data in enumerate(train_loader,0):
      inputs, labels = data
      inputs, labels = inputs.to(device), labels.to(device)
      optimizer.zero_grad()

      outputs = model(inputs)
      loss = criterion(outputs,labels)
      loss.backward()
      optimizer.step()

      running_loss += loss.item()
      if i % 50 == 49:
        true = labels.data.cpu()
        predic = torch.max(outputs.data,1)[1].cpu()
        train_acc = metrics.accuracy_score(true,predic)
        val_acc, val_recall, val_f1, val_loss = evaluate(model,val_loader)
        f.write(str(train_acc)+ ' '+ str(running_loss) + ' '+ str(val_acc) + ' ' + str(val_loss) +' '+ str(val_f1) + ' '+str(val_recall) + '\n')
        if val_loss < val_best_loss:
          val_best_loss = val_loss
          torch.save(model.state_dict(),save_path)
          improve = '*'
          last_improve = total_batch
        else:
          improve = ''
        time_dif = get_time_dif(start_time)
        msg = 'Iter: {0:>6}, Train Loss:{1:>5.4}, Train Acc:{2:6.2%}, Val Loss:{3:5.4}, Val Acc:{4:6.2%}, Val Recall:{5:6.2%} Val F1:{6:6.2%}, Time:{7} {8}'
        print(msg.format(total_batch,running_loss / 20,train_acc,val_loss,val_acc,val_recall,val_f1,time_dif,improve))
        model.train()
        running_loss = 0.0
      total_batch += 1
      # if total_batch - last_improve > 10000:
      #   print("No optimization for a long time, auto-stopping...")
      #   flag = True
      #   break
    # if flag:
    #   break                  

In [None]:
model = MahjongNet().to(device)
train(model, train_dataloader, val_dataloader,learning_rate=0.001)

Epoch[1/100]
Iter:     49, Train Loss:9.415, Train Acc: 2.34%, Val Loss: 3.76, Val Acc: 4.40%, Val Recall: 3.08% Val F1: 1.28%, Time:0:00:02 *
Iter:     99, Train Loss:9.147, Train Acc: 5.47%, Val Loss:3.844, Val Acc: 4.22%, Val Recall: 3.02% Val F1: 0.81%, Time:0:00:04 
Iter:    149, Train Loss:9.044, Train Acc: 4.69%, Val Loss:4.018, Val Acc: 4.24%, Val Recall: 3.00% Val F1: 1.12%, Time:0:00:06 
Iter:    199, Train Loss:8.931, Train Acc: 3.12%, Val Loss:4.059, Val Acc: 4.10%, Val Recall: 2.86% Val F1: 1.17%, Time:0:00:08 
Iter:    249, Train Loss:8.894, Train Acc: 7.03%, Val Loss:4.015, Val Acc: 4.22%, Val Recall: 2.86% Val F1: 1.06%, Time:0:00:09 
Iter:    299, Train Loss:8.807, Train Acc: 6.25%, Val Loss:4.018, Val Acc: 4.21%, Val Recall: 2.88% Val F1: 0.96%, Time:0:00:11 
Iter:    349, Train Loss:8.827, Train Acc: 4.69%, Val Loss:4.045, Val Acc: 3.94%, Val Recall: 2.75% Val F1: 0.98%, Time:0:00:13 
Iter:    399, Train Loss:8.833, Train Acc: 5.47%, Val Loss: 3.87, Val Acc: 4.14%, V

In [None]:
 #Traning model-V2
save_path='/content/drive/MyDrive/majhong/acc_loss.txt'
f = open(save_path, 'w+')
import torch.optim as optim
net = MahjongNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
for epoch in range(500):  # loop over the dataset multiple times

     running_loss = 0.0
     for i, data in enumerate(train_dataloader, 0):
         # get the inputs
         inputs, labels = data
         inputs, labels = inputs.to(device), labels.to(device)
         # zero the parameter gradients
         optimizer.zero_grad()

         # forward + backward + optimize
         outputs = net(inputs)
         loss = criterion(outputs, labels)
         loss.backward()
         optimizer.step()

         # print statistics
         running_loss += loss.item()
         if i % 20 == 19:    # print every 2000 mini-batches
             print('[%d, %5d] loss: %.3f' %
                   (epoch + 1, i + 1, running_loss / 20))
             loss = running_loss / 20
             f.write(train_acc+ ' '+ running_loss + ' '+ val_acc + ' ' + val_loss +' '+ val_f1 + ' '+val_recall + '\n')
             running_loss = 0.0

 print('Finished Training')

[1,    20] loss: 3.796
[1,    40] loss: 3.782
[1,    60] loss: 3.723
[1,    80] loss: 3.666
[1,   100] loss: 3.658
[1,   120] loss: 3.600
[1,   140] loss: 3.584
[1,   160] loss: 3.589
[1,   180] loss: 3.556
[1,   200] loss: 3.564
[1,   220] loss: 3.581
[1,   240] loss: 3.529
[1,   260] loss: 3.545
[1,   280] loss: 3.561
[1,   300] loss: 3.554
[1,   320] loss: 3.510
[1,   340] loss: 3.514
[1,   360] loss: 3.526
[1,   380] loss: 3.519
[1,   400] loss: 3.510
[1,   420] loss: 3.507
[1,   440] loss: 3.495
[1,   460] loss: 3.524
[1,   480] loss: 3.525


KeyboardInterrupt: ignored

In [None]:
# #Traning model-V3
# import torch.optim.lr_scheduler as lr_scheduler
# criterion = nn.CrossEntropyLoss()
# # optimizer = torch.optim.SGD(net.parameters(),lr = 0.2,momentum =0.9)
# optimizer = torch.optim.Adam(net.parameters(), lr=0.01)
# scheduler = lr_scheduler.MultiStepLR(optimizer, [95], gamma=0.5)
# def train_model(model,optimizer,n_epochs,criterion):
#     start_time = time.time()
#     for epoch in range(1,n_epochs-1):
#         epoch_time = time.time()
#         epoch_loss = 0
#         correct = 0
#         total = 0
#         print( "Epoch {}/{}".format(epoch,n_epochs))
    
#     #########################train the model
#         model.train()
#         num_batches = len(train_dataset) // train_dataloader.batch_size
#         for data in train_dataloader:
#             inputs, labels = data
#             #get the inputs and labels in training data
#             inputs = inputs.to(device)
#             #labels = labels.to(device)
#             labels = labels.to(device)

#             #zero the parameter gradients
#             optimizer.zero_grad()

#             #forward + backward + optimize
#             output = model(inputs)
#             loss = criterion(output,labels)
#             loss.backward()
#             optimizer.step()

#             #compute training loss
#             epoch_loss +=loss.item()

#             #compute training accuracy
#             _,pred =torch.max(output,1)
#             correct += (pred.cpu()==labels.cpu()).sum().item()
#             total +=labels.shape[0]
#         scheduler.step()
#         train_loss = epoch_loss/num_batches
#         acc = correct/total

#     #####################################evaluation     
#         model.eval()
#         a= 0
#         pred_val = 0
#         corr = 0
#         tot = 0
#         num_batches = len(val_dataset) // val_dataloader.batch_size
        
#         with torch.no_grad():
#             for val_inp,val_label in val_dataloader:
#                 val_inp = val_inp.to(device)
#                 val_label = val_label.to(device)

#                 #forward
#                 out_val = model(val_inp)
#                 loss = criterion(out_val,val_label)

#                 #compute evaluation loss
#                 a += loss.item()

#                 #compute evaluation accuracy
#                 _,pred_val = torch.max(out_val,1)
#                 corr += (pred_val.cpu()==val_label.cpu()).sum().item()
#                 tot += val_label.shape[0]
#             acc_val = corr/tot
#             val_loss = a/num_batches

#         #print   
#         epoch_time2 = time.time()    
#         print("Duration : {:.4f},Train Loss :{:.4f},Train Acc :{:.4f}, Valid Loss:{:.4f},Valid acc :{:.4f}".format(
#         epoch_time2-epoch_time, train_loss, acc, val_loss, acc_val))
#     end_time= time.time()
#     print("Total time :{:.0f}s".format(end_time - start_time))

In [None]:
# train_model(model=net, optimizer=optimizer, n_epochs=100, criterion=criterion)