<a href="https://colab.research.google.com/github/yuvalgrossman/EquSolve/blob/master/Classifier/HASY_trainer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
%cd '/content/'
if not os.path.isdir('EquSolve'):
  !git clone http://github.com/yuvalgrossman/EquSolve
  %cd EquSolve
else:
  %cd EquSolve
  !git pull

import datetime
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchvision.transforms import transforms
from tqdm import tqdm
import webbrowser
import time
import pdb

# project classes:
from Classifier.HASYDataset import HASYDataset
from Classifier.Net import Net
from Utils.mapper import mapper

/content
/content/EquSolve
remote: Enumerating objects: 13, done.[K
remote: Counting objects: 100% (13/13), done.[K
remote: Compressing objects: 100% (9/9), done.[K
remote: Total 9 (delta 5), reused 0 (delta 0), pack-reused 0[K
Unpacking objects: 100% (9/9), done.
From http://github.com/yuvalgrossman/EquSolve
   9b1024f..b798d58  master     -> origin/master
Updating 9b1024f..b798d58
error: The following untracked working tree files would be overwritten by merge:
	Classifier/weights/HASY_weights.pth
Please move or remove them before you merge.
Aborting


In [2]:
class Trainer():
    def __init__(self, config, transform):
        self.config = config
        self.device = self.get_device()
        self.transform = transform

        #create dir to save train results:
        theTime = "{date:%Y-%m-%d_%H-%M-%S}".format(date=datetime.datetime.now())
        self.Train_Results_Dir = 'Classifier/TrainResults/Train_Results_' + theTime
        os.mkdir(self.Train_Results_Dir)

        #create and open a webpage monitor: (we just replace one line in the html file to update the folder)
        with open("Classifier/TrainResults/monitor_base.html") as fin, open("Classifier/TrainResults/monitor.html", 'w') as fout:
            for line in fin:
                lineout = line
                if 'var results_folder' in line:
                    lineout = 'var results_folder = "Classifier/Train_Results_{}/"'.format(theTime)
                fout.write(lineout)

        webbrowser.open("Classifier/TrainResults/monitor.html")

    def train(self):
        # dataset should come as a tuple of (train_dataset,test_dataset)
        dataset = download_dataset(self.config, self.transform)
        train_data = dataset[0]
        test_data  = dataset[1]

        # move dataset to dataloader
        trainloader = DataLoader(train_data, batch_size=self.config['batch_size'], shuffle=True)
        testloader = DataLoader(test_data, batch_size=self.config['batch_size'], shuffle=True)
        
        # TRAINING CONFIGURATIONS:
        net = Net().to(self.device)
        print(net)

        # loss
        self.criterion = nn.CrossEntropyLoss()

        # optimizer
        self.optimizer = optim.SGD(net.parameters(), lr=self.config['lr'], momentum=self.config['momentum'])

        # define tracking measures:
        self.init_tracking_measures()

        # apply network changes according to training state
        if self.config['state'] == 'MNIST': # if training on MNIST
            weights_save_path = self.config['weights_path'] + 'MNIST_weights.pth'

        if self.config['state'] == 'HASY': # if training on HASY
            weights_load_path = self.config['weights_path'] + 'MNIST_weights.pth'
            weights_save_path = self.config['weights_path'] + 'HASY_weights.pth'

            net.load_state_dict(torch.load(weights_load_path)['state_dict']) # load MNIST weights
            net.fc3 = nn.Linear(84, len(config['sym_list'])).to(self.device)   # change model's last layer


        # TRAINING:
        print('Start Training on {}'.format(self.device))

        for epochNum in range(self.config['train_epochs']):  # no. of epochs

            net = self.train_epoch(net, trainloader, epochNum)

            self.test_epoch(net, testloader, epochNum)

            self.generate_measures_plots() # update figures after each epoch to observe during training

        self.save_network(net, weights_save_path)

        print('Done Training {} epochs'.format(epochNum+1))

        self.generate_measures_plots()

    def init_tracking_measures(self):
        self.tracking_measures = {}
        self.tracking_measures['batch_train_loss'] = []
        self.tracking_measures['batch_train_acc'] = []
        self.tracking_measures['epoch_train_loss'] = []
        self.tracking_measures['epoch_train_acc'] = []
        self.tracking_measures['epoch_test_loss'] = []
        self.tracking_measures['epoch_test_acc'] = []

    def generate_measures_plots(self):
        for key, value in self.tracking_measures.items():
            fig, ax = plt.subplots(figsize=(10, 5))
            ax.plot(value)
            ax.set_title(key)
            plt.grid()
            fn = os.path.join(self.Train_Results_Dir,'{}.png'.format(key))
            plt.savefig(fn)
            plt.close()

    def train_epoch(self, net, trainloader, epoch):

        epoch_loss = 0
        epoch_acc = 0
        net.train()
        
        for data in tqdm(trainloader):
            # data pixels and labels to GPU if available
            inputs, labels = data[0].to(self.device, non_blocking=True), data[1].to(self.device, non_blocking=True)
            # set the parameter gradients to zero
            self.optimizer.zero_grad()
            outputs = net(inputs)
            # print(outputs.shape, labels.shape)
            loss = self.criterion(outputs, labels)
            # propagate the loss backward
            loss.backward()
            # update the gradients
            self.optimizer.step()

            batch_loss = loss.item()
            epoch_loss += batch_loss
            self.tracking_measures['batch_train_loss'].append(batch_loss)

            _, predicted = torch.max(outputs.data, 1)
            batch_acc = (predicted == labels).sum().item()/len(predicted)
            epoch_acc += batch_acc
            self.tracking_measures['batch_train_acc'].append(batch_acc)

        epoch_loss /= len(trainloader)
        epoch_acc /= len(trainloader)
        print('Train Epoch {} loss: {:.3f} acc: {:.3f}'.format(epoch + 1, epoch_loss, epoch_acc))
        self.tracking_measures['epoch_train_loss'].append(epoch_loss)
        self.tracking_measures['epoch_train_acc'].append(epoch_acc)

        return net


    def test_epoch(self, net, testloader, epoch):       
        correct = 0
        total = 0
        epoch_loss = 0
        epoch_acc = 0

        net.eval()
        with torch.no_grad():
            for data in testloader:
                inputs, labels = data[0].to(self.device, non_blocking=True), data[1].to(self.device, non_blocking=True)
                outputs = net(inputs)
                loss = self.criterion(outputs, labels)

                # total += labels.size(0)
                # correct += (predicted == labels).sum().item()

                batch_loss = loss.item()
                epoch_loss += batch_loss

                _, predicted = torch.max(outputs.data, 1)
                batch_acc = (predicted == labels).sum().item()/len(predicted)
                epoch_acc += batch_acc

        # print('Accuracy of the network on test images: %0.3f %%' % (
        #         100 * correct / total))
        epoch_loss /= len(testloader)
        epoch_acc /= len(testloader)
        print('Test Epoch {} loss: {:.3f} acc: {:.3f}'.format(epoch + 1, epoch_loss, epoch_acc))
        self.tracking_measures['epoch_test_loss'].append(epoch_loss)
        self.tracking_measures['epoch_test_acc'].append(epoch_acc)

    def get_device(self):
        if torch.cuda.is_available():
            device = 'cuda:0'
        else:
            device = 'cpu'
        return device

    def save_network(self, net, weights_save_path):
        saved_dict = {'state_dict': net.state_dict()}
        # add custom data to the saved file:
        saved_dict['train_measures'] = self.tracking_measures
        saved_dict['config'] = self.config
        # saved_dict['class2sym_mapper'] = class2sym_mapper

        fn = os.path.join(self.Train_Results_Dir,config['state'] +'.pth')
        torch.save(saved_dict, fn)
        print('save model in ' + fn)

        fn = weights_save_path
        torch.save(saved_dict, fn)
        print('save model in ' + fn)

    def download_dataset(config, transform):
      if config['state'] == 'MNIST':
        import torchvision
        train_dataset = torchvision.datasets.MNIST(config['data_path'], train=True, download=True,
                              transform=transform)
        test_dataset = torchvision.datasets.MNIST(config['data_path'], train=False, download=True,
                              transform=transform)                      


      if config['state'] == 'HASY':
        if not os.path.exists(config['data_path'] + 'hasy-data'): # download data  
          import tarfile
          import requests    
          url = 'https://zenodo.org/record/259444/files/HASYv2.tar.bz2?download=1'
          out = config['data_path'] + 'HASYv2.tar'
          print('Downloading HASY dataset')
          r = requests.get(url)
          with open(out, 'wb') as f:
              f.write(r.content)
          
          my_tar = tarfile.open(out)
          print('Extracting dataset')
          my_tar.extractall(config['data_path'])  # specify which folder to extract to
          my_tar.close()
          print('Done extracting')
          
        meta_data = pd.read_csv(config['data_path'] + 'hasy-data-labels.csv')
        # here we concatenate all_df with equal sign df
        all_df = mapper(meta_data,config['sym_list']) # slice only needed symbols
        print(all_df.latex.value_counts())

        dataset = HASYDataset(config,all_df,transform) # read data into dataset
        train_size = int(config['HASY_train_split'] * len(dataset))
        test_size = len(dataset) - train_size
        train_dataset, test_dataset = torch.utils.data.random_split(dataset,
                                                                    [train_size, test_size]) # split dataset to train and test

      return (train_dataset,test_dataset)

In [3]:
def download_dataset(config, transform):
    if config['state'] == 'MNIST':
      import torchvision
      train_dataset = torchvision.datasets.MNIST(config['data_path'], train=True, download=True,
                            transform=transform)
      test_dataset = torchvision.datasets.MNIST(config['data_path'], train=False, download=True,
                            transform=transform)                      


    if config['state'] == 'HASY':
      if not os.path.exists(config['data_path'] + 'hasy-data'): # download data  
        import tarfile
        import requests    
        url = 'https://zenodo.org/record/259444/files/HASYv2.tar.bz2?download=1'
        out = config['data_path'] + 'HASYv2.tar'
        print('Downloading HASY dataset')
        r = requests.get(url)
        with open(out, 'wb') as f:
            f.write(r.content)
        
        my_tar = tarfile.open(out)
        print('Extracting dataset')
        my_tar.extractall(config['data_path'])  # specify which folder to extract to
        my_tar.close()
        print('Done extracting')
        
      meta_data = pd.read_csv(config['data_path'] + 'hasy-data-labels.csv')
      # here we concatenate all_df with equal sign df
      all_df = mapper(meta_data,config['sym_list']) # slice only needed symbols
      print(all_df.latex.value_counts())

      dataset = HASYDataset(config,all_df,transform) # read data into dataset
      train_size = int(config['HASY_train_split'] * len(dataset))
      test_size = len(dataset) - train_size
      train_dataset, test_dataset = torch.utils.data.random_split(dataset,
                                                                  [train_size, test_size]) # split dataset to train and test

    return (train_dataset,test_dataset)

In [4]:
inner_path = ''
config = {}
config['inner_path'] = inner_path
config['data_path'] = inner_path + 'DataSets/'
config['weights_path'] = inner_path + 'Classifier/weights/'
config['train_data_path'] = 'classification-task/fold-1/train.csv'
config['test_data_path']  = 'classification-task/fold-1/test.csv'
config['batch_size'] = 128
config['train_epochs'] = 5
config['lr'] = 0.01
config['momentum'] = 0.9 
config['state'] = 'MNIST'
config['sym_list'] = ['1','2','3','4','5','6','7','8','9',
                      '\\alpha','=','+','-','\\pi','A','X','\\cdot']
config['HASY_train_split'] = 0.8

transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize(0.5,0.5),
                              ])

theTrainer = Trainer(config, transform)

tic = time.time()
theTrainer.train()
print('Proccess took {:.2f} m.'.format((time.time() - tic)/60))

  1%|▏         | 6/469 [00:00<00:08, 52.60it/s]

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)
Start Training on cuda:0


100%|██████████| 469/469 [00:08<00:00, 55.52it/s]


Train Epoch 1 loss: 0.659 acc: 0.778
Test Epoch 1 loss: 0.114 acc: 0.966


100%|██████████| 469/469 [00:08<00:00, 55.28it/s]


Train Epoch 2 loss: 0.091 acc: 0.972
Test Epoch 2 loss: 0.078 acc: 0.975


100%|██████████| 469/469 [00:08<00:00, 55.29it/s]


Train Epoch 3 loss: 0.064 acc: 0.980
Test Epoch 3 loss: 0.042 acc: 0.987


100%|██████████| 469/469 [00:08<00:00, 55.17it/s]


Train Epoch 4 loss: 0.052 acc: 0.984
Test Epoch 4 loss: 0.043 acc: 0.986


100%|██████████| 469/469 [00:08<00:00, 55.59it/s]


Train Epoch 5 loss: 0.043 acc: 0.986
Test Epoch 5 loss: 0.037 acc: 0.988
save model in Classifier/TrainResults/Train_Results_2020-09-01_11-12-39/MNIST.pth
save model in Classifier/weights/MNIST_weights.pth
Done Training 5 epochs
Proccess took 0.93 m.


In [12]:
config['state'] = 'HASY'
config['train_epochs'] = 10

transform = transforms.Compose([transforms.Resize([28,28]),                                
                      transforms.ToTensor(),
                      transforms.Normalize(0.5,0.5),
                      ])

theTrainer = Trainer(config, transform)

tic = time.time()
theTrainer.train()
print('Proccess took {:.2f} m.'.format((time.time() - tic)/60))

  5%|▌         | 2/39 [00:00<00:02, 18.44it/s]

\alpha    2601
\pi       1533
\cdot      755
A          159
2          124
8          121
3          120
-          118
1          118
6          100
+           90
9           90
5           78
7           75
4           61
X           54
Name: latex, dtype: int64
Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)
Start Training on cuda:0


100%|██████████| 39/39 [00:02<00:00, 18.48it/s]


Train Epoch 1 loss: 1.472 acc: 0.612
Test Epoch 1 loss: 0.837 acc: 0.764


100%|██████████| 39/39 [00:02<00:00, 18.87it/s]


Train Epoch 2 loss: 0.645 acc: 0.819
Test Epoch 2 loss: 0.510 acc: 0.867


100%|██████████| 39/39 [00:02<00:00, 18.98it/s]


Train Epoch 3 loss: 0.390 acc: 0.900
Test Epoch 3 loss: 0.337 acc: 0.907


100%|██████████| 39/39 [00:02<00:00, 18.88it/s]


Train Epoch 4 loss: 0.265 acc: 0.928
Test Epoch 4 loss: 0.273 acc: 0.925


100%|██████████| 39/39 [00:02<00:00, 18.82it/s]


Train Epoch 5 loss: 0.206 acc: 0.944
Test Epoch 5 loss: 0.221 acc: 0.935


100%|██████████| 39/39 [00:02<00:00, 18.96it/s]


Train Epoch 6 loss: 0.167 acc: 0.956
Test Epoch 6 loss: 0.206 acc: 0.941


100%|██████████| 39/39 [00:02<00:00, 19.05it/s]


Train Epoch 7 loss: 0.147 acc: 0.963
Test Epoch 7 loss: 0.180 acc: 0.946


100%|██████████| 39/39 [00:02<00:00, 18.85it/s]


Train Epoch 8 loss: 0.128 acc: 0.966
Test Epoch 8 loss: 0.168 acc: 0.954


100%|██████████| 39/39 [00:02<00:00, 18.82it/s]


Train Epoch 9 loss: 0.116 acc: 0.969
Test Epoch 9 loss: 0.163 acc: 0.951


100%|██████████| 39/39 [00:02<00:00, 18.36it/s]


Train Epoch 10 loss: 0.100 acc: 0.973
Test Epoch 10 loss: 0.154 acc: 0.957
save model in Classifier/TrainResults/Train_Results_2020-09-01_11-17-03/HASY.pth
save model in Classifier/weights/HASY_weights.pth
Done Training 10 epochs
Proccess took 0.55 m.


In [6]:
# !git add '/content/EquSolve/Classifier/TrainResults/Train_Results_2020-08-27_11-28-41
# !git commit --message="Add train results"'
# !git push --set-upstream origin master

In [7]:
# !git remote rm origin
# !git remote add origin 'git@github.com:yuvalgrossman/EquSolve.git'
# # !RUN git clone https://github.com/edenhill/librdkafka.git