<a href="https://colab.research.google.com/github/yuvalgrossman/EquSolve/blob/master/Classifier/HASY_trainer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
%cd '/content/'
if not os.path.isdir('EquSolve'):
  !git clone http://github.com/yuvalgrossman/EquSolve
  %cd EquSolve
else:
  %cd EquSolve
  !git pull

import datetime
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchvision.transforms import transforms
from tqdm import tqdm
import webbrowser
import time
import pdb

# project classes:
from Classifier.HASYDataset import HASYDataset
from Classifier.Net import Net
from Utils.mapper import mapper

/content
Cloning into 'EquSolve'...
remote: Enumerating objects: 225, done.[K
remote: Counting objects: 100% (225/225), done.[K
remote: Compressing objects: 100% (179/179), done.[K
remote: Total 225 (delta 101), reused 104 (delta 37), pack-reused 0[K
Receiving objects: 100% (225/225), 21.06 MiB | 31.57 MiB/s, done.
Resolving deltas: 100% (101/101), done.
/content/EquSolve


In [4]:
class Trainer():
    def __init__(self, config, dataset):
        self.config = config
        self.device = self.get_device()
        self.dataset = dataset

        #create dir to save train results:
        theTime = "{date:%Y-%m-%d_%H-%M-%S}".format(date=datetime.datetime.now())
        self.Train_Results_Dir = 'Classifier/TrainResults/Train_Results_' + theTime
        os.mkdir(self.Train_Results_Dir)

        #create and open a webpage monitor: (we just replace one line in the html file to update the folder)
        with open("Classifier/TrainResults/monitor_base.html") as fin, open("Classifier/TrainResults/monitor.html", 'w') as fout:
            for line in fin:
                lineout = line
                if 'var results_folder' in line:
                    lineout = 'var results_folder = "Classifier/Train_Results_{}/"'.format(theTime)
                fout.write(lineout)

        webbrowser.open("Classifier/TrainResults/monitor.html")

    def train(self):
        # dataset should come as a tuple of (train_dataset,test_dataset)
        train_data = self.dataset[0]
        test_data = self.dataset[1]

        # move dataset to dataloader
        trainloader = DataLoader(train_data, batch_size=self.config['batch_size'], shuffle=True)
        testloader = DataLoader(test_data, batch_size=self.config['batch_size'], shuffle=True)
        
        # TRAINING CONFIGURATIONS:
        net = Net().to(self.device)
        print(net)

        # loss
        self.criterion = nn.CrossEntropyLoss()

        # optimizer
        self.optimizer = optim.SGD(net.parameters(), lr=self.config['lr'], momentum=self.config['momentum'])

        # define tracking measures:
        self.init_tracking_measures()

        # apply network changes according to training state
        if self.config['state'] == 'MNIST': # if training on MNIST
            weights_save_path = self.config['weights_path'] + 'MNIST_weights.pth'

        if self.config['state'] == 'HASY': # if training on HASY
            weights_load_path = self.config['weights_path'] + 'MNIST_weights.pth'
            weights_save_path = self.config['weights_path'] + 'HASY_weights.pth'

            net.load_state_dict(torch.load(weights_load_path)['state_dict']) # load MNIST weights
            net.fc3 = nn.Linear(84, len(config['sym_list']))   # change model's last layer


        # TRAINING:
        print('Start Training on {}'.format(self.device))

        for epochNum in range(self.config['train_epochs']):  # no. of epochs

            net = self.train_epoch(net, trainloader, epochNum)

            self.test_epoch(net, testloader, epochNum)

            self.generate_measures_plots() # update figures after each epoch to observe during training

        self.save_network(net, weights_save_path)

        print('Done Training {} epochs'.format(epochNum+1))

        self.generate_measures_plots()

    def init_tracking_measures(self):
        self.tracking_measures = {}
        self.tracking_measures['batch_train_loss'] = []
        self.tracking_measures['batch_train_acc'] = []
        self.tracking_measures['epoch_train_loss'] = []
        self.tracking_measures['epoch_train_acc'] = []
        self.tracking_measures['epoch_test_loss'] = []
        self.tracking_measures['epoch_test_acc'] = []

    def generate_measures_plots(self):
        for key, value in self.tracking_measures.items():
            fig, ax = plt.subplots(figsize=(10, 5))
            ax.plot(value)
            ax.set_title(key)
            plt.grid()
            fn = os.path.join(self.Train_Results_Dir,'{}.png'.format(key))
            plt.savefig(fn)
            plt.close()

    def train_epoch(self, net, trainloader, epoch):

        epoch_loss = 0
        epoch_acc = 0
        net.train()
        for data in tqdm(trainloader):
            # data pixels and labels to GPU if available
            inputs, labels = data[0].to(self.device, non_blocking=True), data[1].to(self.device, non_blocking=True)

            # set the parameter gradients to zero
            self.optimizer.zero_grad()
            outputs = net(inputs)
            # print(outputs.shape, labels.shape)
            loss = self.criterion(outputs, labels)
            # propagate the loss backward
            loss.backward()
            # update the gradients
            self.optimizer.step()

            batch_loss = loss.item()
            epoch_loss += batch_loss
            self.tracking_measures['batch_train_loss'].append(batch_loss)

            _, predicted = torch.max(outputs.data, 1)
            batch_acc = (predicted == labels).sum().item()/len(predicted)
            epoch_acc += batch_acc
            self.tracking_measures['batch_train_acc'].append(batch_acc)

        epoch_loss /= len(trainloader)
        epoch_acc /= len(trainloader)
        print('Train Epoch {} loss: {:.3f} acc: {:.3f}'.format(epoch + 1, epoch_loss, epoch_acc))
        self.tracking_measures['epoch_train_loss'].append(epoch_loss)
        self.tracking_measures['epoch_train_acc'].append(epoch_acc)

        return net


    def test_epoch(self, net, testloader, epoch):       
        correct = 0
        total = 0
        epoch_loss = 0
        epoch_acc = 0

        net.eval()
        with torch.no_grad():
            for data in testloader:
                inputs, labels = data[0].to(self.device, non_blocking=True), data[1].to(self.device, non_blocking=True)
                print(inputs.shape, labels.shape)
                outputs = net(inputs)
                loss = self.criterion(outputs, labels)

                # total += labels.size(0)
                # correct += (predicted == labels).sum().item()

                batch_loss = loss.item()
                epoch_loss += batch_loss

                _, predicted = torch.max(outputs.data, 1)
                batch_acc = (predicted == labels).sum().item()/len(predicted)
                epoch_acc += batch_acc

        # print('Accuracy of the network on test images: %0.3f %%' % (
        #         100 * correct / total))
        epoch_loss /= len(testloader)
        epoch_acc /= len(testloader)
        print('Test Epoch {} loss: {:.3f} acc: {:.3f}'.format(epoch + 1, epoch_loss, epoch_acc))
        self.tracking_measures['epoch_test_loss'].append(epoch_loss)
        self.tracking_measures['epoch_test_acc'].append(epoch_acc)

    def get_device(self):
        if torch.cuda.is_available():
            device = 'cuda:0'
        else:
            device = 'cpu'
        return device

    def save_network(self, net, weights_save_path):
        saved_dict = {'state_dict': net.state_dict()}
        # add custom data to the saved file:
        saved_dict['train_measures'] = self.tracking_measures
        saved_dict['config'] = self.config
        # saved_dict['class2sym_mapper'] = class2sym_mapper

        fn = os.path.join(self.Train_Results_Dir,config['state'] +'.pth')
        torch.save(saved_dict, fn)
        print('save model in ' + fn)

        fn = weights_save_path
        torch.save(saved_dict, fn)
        print('save model in ' + fn)

In [5]:
def download_dataset(config, transform):
    if config['state'] == 'MNIST':
      import torchvision
      train_dataset = torchvision.datasets.MNIST(config['data_path'], train=True, download=True,
                            transform=transform)
      test_dataset = torchvision.datasets.MNIST(config['data_path'], train=False, download=True,
                            transform=transform)                      


    if config['state'] == 'HASY':
      if not os.path.exists(config['data_path'] + 'hasy-data'): # download data  
        import tarfile
        import requests    
        url = 'https://zenodo.org/record/259444/files/HASYv2.tar.bz2?download=1'
        out = config['data_path'] + 'HASYv2.tar'
        print('Downloading HASY dataset')
        r = requests.get(url)
        with open(out, 'wb') as f:
            f.write(r.content)
        
        my_tar = tarfile.open(out)
        print('Extracting dataset')
        my_tar.extractall(config['data_path'])  # specify which folder to extract to
        my_tar.close()
        print('Done extracting')
        
      meta_data = pd.read_csv(config['data_path'] + 'hasy-data-labels.csv')
      # here we concatenate all_df with equal sign df
      all_df = mapper(meta_data,config['sym_list']) # slice only needed symbols
      print(all_df.latex.value_counts())

      dataset = HASYDataset(config,all_df,transform) # read data into dataset
      train_size = int(config['HASY_train_split'] * len(dataset))
      test_size = len(dataset) - train_size
      train_dataset, test_dataset = torch.utils.data.random_split(dataset,
                                                                  [train_size, test_size]) # split dataset to train and test

    return (train_dataset,test_dataset)

In [6]:
inner_path = ''
config = {}
config['inner_path'] = inner_path
config['data_path'] = inner_path + 'DataSets/'
config['weights_path'] = inner_path + 'Classifier/weights/'
config['train_data_path'] = 'classification-task/fold-1/train.csv'
config['test_data_path']  = 'classification-task/fold-1/test.csv'
config['batch_size'] = 128
config['train_epochs'] = 2
config['lr'] = 0.01
config['momentum'] = 0.9 
config['state'] = 'MNIST'
config['sym_list'] = ['1','2','3','4','5','6','7','8','9',
                      '\\alpha','=','+','-','\\pi','A','X','\\cdot']
config['HASY_train_split'] = 0.8

transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize(0.5,0.5),
                              ])

dataset = download_dataset(config, transform)
theTrainer = Trainer(config,dataset)

tic = time.time()
theTrainer.train()
print('Proccess took {:.2f} m.'.format((time.time() - tic)/60))

  0%|          | 0/469 [00:00<?, ?it/s]

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)
Start Training on cpu


100%|██████████| 469/469 [00:21<00:00, 21.84it/s]


Train Epoch 1 loss: 0.651 acc: 0.802
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28,

100%|██████████| 469/469 [00:21<00:00, 21.92it/s]


Train Epoch 2 loss: 0.084 acc: 0.974
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28,

In [7]:
config['state'] = 'HASY'

transform = transforms.Compose([transforms.Resize([28,28]),                                
                      transforms.ToTensor(),
                      transforms.Normalize(0.5,0.5),
                      ])

dataset = download_dataset(config, transform)
theTrainer = Trainer(config,dataset)

tic = time.time()
theTrainer.train()
print('Proccess took {:.2f} m.'.format((time.time() - tic)/60))

Downloading HASY dataset
Extracting dataset
Done extracting
\alpha    2601
\pi       1533
\cdot      755
A          159
2          124
8          121
3          120
1          118
-          118
6          100
+           90
9           90
5           78
7           75
4           61
X           54
Name: latex, dtype: int64


  3%|▎         | 1/39 [00:00<00:04,  8.69it/s]

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)
Start Training on cpu


100%|██████████| 39/39 [00:03<00:00, 11.19it/s]


Train Epoch 1 loss: 1.484 acc: 0.616
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([88, 1, 28, 28]) torch.Size([88])
Test Epoch 1 loss: 0.915 acc: 0.752


100%|██████████| 39/39 [00:03<00:00, 11.71it/s]


Train Epoch 2 loss: 0.764 acc: 0.774
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([128, 1, 28, 28]) torch.Size([128])
torch.Size([88, 1, 28, 28]) torch.Size([88])
Test Epoch 2 loss: 0.677 acc: 0.784
save model in Classifier/TrainResults/Train_Results_2020-09-01_08-50-15/HASY.pth
save model in Classifier/weights/HASY_weights.pth
Done Training 2 epochs
Proccess took 0.17 m.
