In [1]:
 ! git clone https://github.com/seshuad/IMagenet
!ls './tiny-imagenet-200/'

!ls './tiny-imagenet-200/val/'

test  train  val  wnids.txt  words.txt
images	val_annotations.txt


In [7]:
import time
import scipy.ndimage as nd
import cv2
import numpy as np

path = './tiny-imagenet-200/'

def get_id_dictionary():
    id_dict = {}
    for i, line in enumerate(open( path + 'wnids.txt', 'r')):
        id_dict[line.replace('\n', '')] = i
    return id_dict
  
def get_class_to_id_dict():
    id_dict = get_id_dictionary()
    all_classes = {}
    result = {}
    for i, line in enumerate(open( path + 'words.txt', 'r')):
        n_id, word = line.split('\t')[:2]
        all_classes[n_id] = word
    for key, value in id_dict.items():
        result[value] = (key, all_classes[key])      
    return result

def get_data(id_dict):
    print('starting loading data')
    train_data, test_data = [], []
    train_labels, test_labels = [], []
    t = time.time()
    for key, value in id_dict.items():
        train_data += [cv2.imread( path + 'train/{}/images/{}_{}.JPEG'.format(key, key, str(i)), cv2.IMREAD_COLOR) for i in range(500)]
        train_labels_ = np.array([[0]*200]*500)
        train_labels_[:, value] = 1
        train_labels += train_labels_.tolist()

    for line in open( path + 'val/val_annotations.txt'):
        img_name, class_id = line.split('\t')[:2]
        test_data.append(cv2.imread( path + 'val/images/{}'.format(img_name) ,cv2.IMREAD_COLOR))
        test_labels_ = np.array([[0]*200])
        test_labels_[0, id_dict[class_id]] = 1
        test_labels += test_labels_.tolist()

    print('finished loading data, in {} seconds'.format(time.time() - t))
    return np.array(train_data), np.array(train_labels), np.array(test_data), np.array(test_labels)
  
train_data, train_labels, test_data, test_labels = get_data(get_id_dictionary())

print( "train data shape: ",  train_data.shape )
print( "train label shape: ", train_labels.shape )
print( "test data shape: ",   test_data.shape )
print( "test_labels.shape: ", test_labels.shape )

starting loading data
finished loading data, in 43.25619649887085 seconds
train data shape:  (100000, 64, 64, 3)
train label shape:  (100000, 200)
test data shape:  (10000, 64, 64, 3)
test_labels.shape:  (10000, 200)


# Concatenate Train and Validation for TinyImage-200 

In [3]:
# Add the two arrays and again split into 70 & 30 data

def addNumpyAsVStack(dataSet,labelSet):
    """
    This method adds two numpy arrays into one.
    Arg1: Tuple, Train and Test data to be added
    Arg2: Tuple, Trains and Test Labels to be added
    Return: two Vstacked arrays for data and label
    """
    train_data, test_data = dataSet
    train_labels, test_labels = labelSet
    
    data = np.vstack((train_data,test_data))
    label = np.vstack((train_labels, test_labels))
    print("VStacking train data of size {} with test data of size {} gives {}".format(train_data.shape,test_data.shape, data.shape))
    print("VStacking train label of size {} with test label of size {} gives {}".format(train_data.shape,test_data.shape, data.shape))
    return (data, label)

data, label = addNumpyAsVStack((train_data,test_data ), (train_labels,test_labels))


VStacking train data of size (100000, 64, 64, 3) with test data of size (10000, 64, 64, 3) gives (110000, 64, 64, 3)
VStacking train label of size (100000, 64, 64, 3) with test label of size (10000, 64, 64, 3) gives (110000, 64, 64, 3)


# Shuffle the DataSet

In [4]:
def shuffle_data(data, label ):
    """
    This method shuffles numpy array.
    Arg1: Numpy Array, Data to be shuffled
    Arg2: Numpy Array, Labels to be shuffled
    Return: Two Shuffled numpy arrays
    """
    size = len(data)
    train_idx = np.arange(size)
    np.random.shuffle(train_idx)

    return data[train_idx], label[train_idx]
  
data, label = shuffle_data(data, label)

# Split the Dataset 

In [5]:
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test=train_test_split(data,label,train_size=0.7,test_size=0.3)

print("Size of Train Data {}, Test Data{}, Train Label{} and Test Label{}".format(x_train.shape,x_test.shape,y_train.shape,y_test.shape))

Size of Train Data (77000, 64, 64, 3), Test Data(33000, 64, 64, 3), Train Label(77000, 200) and Test Label(33000, 200)


## Saving the numpy to speed up process in Colab

In [4]:
 import numpy as np

 np.save('./x_train', x_train)
 np.save('./x_test', x_test)
 np.save('./y_train', y_train)
 np.save('./y_test', y_test)


In [2]:
# import torch

# torch.save(torch.from_numpy(x_train), './processed-data/train/x_train.pt')
# torch.save(torch.from_numpy(x_test), './processed-data/test/x_test.pt')
# torch.save(torch.from_numpy(y_train), './processed-data/train/y_train.pt')
# torch.save(torch.from_numpy(y_test), './processed-data/test/y_test.pt')


# Load the scaled and transposed data

In [7]:
from scaledata import load_data
import numpy as np

x_train,x_test,y_train,y_test = load_data(255)
np.save('./processed-data/train/x_train',x_train)
np.save('./processed-data/test/x_test',x_test)
np.save('./processed-data/train/y_train',y_train)
np.save('./processed-data/test/y_test',y_test)

In [None]:
# import torch.nn as nn
# import torch.optim as optim
# import torchvision.transforms as transforms
# from torch.utils.data import DataLoader
# from torchvision.datasets import CIFAR10
# from lr_finder import LRFinder as lrfinder


# optimizer = optim.SGD(mymodel.parameters(), lr=1e-7)
# criterion = torch.nn.CrossEntropyLoss()
# lr_finder = lrfinder.LRFinder(mymodel, optimizer, criterion, device="cuda")
# lr_finder.range_test((x_test,y_test), end_lr=100, num_iter=150, step_mode="exp")

# lr_finder.plot(skip_end=0, accuracy_flag = True)
# lr_finder.reset()