In [16]:
from __future__ import print_function
import pickle 
import numpy as np
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable

In [17]:
from sub import subMNIST       # testing the subclass of MNIST dataset

In [27]:
transform=transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.1307,), (0.3081,))
                             ])

In [30]:
trainset_original = datasets.MNIST('../data', train=True, download=True,
                                  transform=transform)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz


URLError: <urlopen error [Errno 60] Operation timed out>

In [5]:
train_label_index = []
valid_label_index = []
for i in range(10):
    train_label_list = trainset_original.train_labels.numpy()
    label_index = np.where(train_label_list == i)[0]
    label_subindex = list(label_index[:300])
    valid_subindex = list(label_index[300: 1000 + 300])
    train_label_index += label_subindex
    valid_label_index += valid_subindex

In [6]:
trainset_np = trainset_original.train_data.numpy()
trainset_label_np = trainset_original.train_labels.numpy()

In [7]:
train_data_sub = trainset_np[train_label_index]
train_labels_sub = trainset_label_np[train_label_index]

In [1]:
import cv2
import random
import pickle
import constants as c
import numpy as np

c.xtranslate = 2
c.ytranslate = 2
c.rotate     = 20
c.NUM_JITTERS = 5

class DataAug():
    def translate_x(self, img):
        rows, cols = img.shape
        jitter = random.uniform(-c.xtranslate, c.xtranslate)
        M = np.float32([[1, 0, jitter], [0, 1, 0]])
        dst = cv2.warpAffine(img, M, (cols, rows))
        return dst

    def translate_y(self, img):
        rows, cols = img.shape
        jitter = random.uniform(-c.ytranslate, c.ytranslate)
        M = np.float32([[1, 0, 0], [0, 1, jitter]])
        dst = cv2.warpAffine(img, M, (cols, rows))
        return dst

    def rotateImage(self,image):
        jitter = random.uniform(-c.rotate, c.rotate)
        image_center = tuple(np.array(image.shape)/2)
        rot_mat = cv2.getRotationMatrix2D(image_center,jitter,1.0)
        result = cv2.warpAffine(image, rot_mat, image.shape,flags=cv2.INTER_LINEAR)
        return result
    
    def resize(self, image):
        resized_image = cv2.resize(image, (32, 32))
        return resized_image

    def addJitters(self,image):
        timage = self.translate_x(image)
        timage = self.translate_y(timage)
        timage = self.rotateImage(timage)
        timage = self.resize(timage)
        timage = np.expand_dims(timage, axis=0)
        return timage

    def dataaug(self, image, label):
        tlabel = np.array([label])
        timage = self.addJitters(image)

        for i in range(1, c.NUM_JITTERS):
            temp = self.addJitters(image)
            timage = np.append(timage, temp, axis=0)
            tlabel = np.append(tlabel, np.array([label]))
        return timage,tlabel

In [9]:
da = DataAug()

In [10]:
augmented_data, augmented_label = da.dataaug(train_data_sub[0],train_labels_sub[0])

In [11]:
for i in range(1,train_data_sub.shape[0]):
    tdata, tlabel = da.dataaug(train_data_sub[i],train_labels_sub[i])
    augmented_data = np.append(augmented_data,tdata,axis=0)
    augmented_label = np.append(augmented_label,tlabel)

In [12]:
train_data_sub = np.append(train_data_sub,augmented_data,axis=0)
train_labels_sub = np.append(train_labels_sub, augmented_label,axis=0)

In [13]:
augdata = train_data_sub
auglabel = train_labels_sub
print(augdata.shape)
print(auglabel.shape)

(18000, 28, 28)
(18000,)


In [14]:
train_data_sub = torch.from_numpy(augdata)
train_labels_sub = torch.from_numpy(auglabel)
print(train_labels_sub.size())
print(train_data_sub.size())

torch.Size([18000])
torch.Size([18000, 28, 28])


In [15]:
trainset_new = subMNIST(root='./data', train=True, download=True, transform=transform, k=18000)
trainset_new.train_data = train_data_sub.clone()
trainset_new.train_labels = train_labels_sub.clone()

Files already downloaded


In [16]:
trainset_new.train_data.size()

torch.Size([18000, 28, 28])

In [17]:
pickle.dump(trainset_new, open("data/train_labeled_aug-changed.p", "wb" ))

In [18]:
trainset_new.train_data.size()

torch.Size([18000, 28, 28])

In [19]:
validset_np = trainset_original.train_data.numpy()
validset_label_np = trainset_original.train_labels.numpy()
valid_data_sub = torch.from_numpy(validset_np[valid_label_index])
valid_labels_sub = torch.from_numpy(validset_label_np[valid_label_index])

In [20]:
validset = subMNIST(root='./data', train=False, download=True, transform=transform, k=10000)
validset.test_data = valid_data_sub.clone()
validset.test_labels = valid_labels_sub.clone()

Files already downloaded


In [21]:
validset.test_data.size()

torch.Size([10000, 28, 28])

In [23]:
pickle.dump(validset, open("data/validation-changed.p", "wb" ))

In [24]:

train_unlabel_index = []
for i in range(60000):
    if i in train_label_index or i in valid_label_index:
        pass
    else:
        train_unlabel_index.append(i)

In [25]:
trainset_np = trainset_original.train_data.numpy()
trainset_label_np = trainset_original.train_labels.numpy()
train_data_sub_unl = torch.from_numpy(trainset_np[train_unlabel_index])
train_labels_sub_unl = torch.from_numpy(trainset_label_np[train_unlabel_index])

In [26]:
trainset_new_unl = subMNIST(root='./data', train=True, download=True, transform=transform, k=47000)
trainset_new_unl.train_data = train_data_sub_unl.clone()
trainset_new_unl.train_labels = None      # Unlabeled!!

Files already downloaded


In [27]:
trainset_new_unl.train_data.size()

torch.Size([47000, 28, 28])

In [28]:
trainset_new_unl.train_labels

In [30]:
pickle.dump(trainset_new_unl, open("data/train_unlabeled-changed.p", "wb" ))