In [16]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import ast
import cv2 as cv
import pickle

In [7]:
image_size = (64, 64)
num_per_cate = 500 # number of taken images in each category
taken_categories = os.listdir('data/csv/') # list of taken classes
labels = {} # <file name> : <label>
folder_name = 'processed-2' # where to save *.npy files

# Dataset

In [8]:
def drawing_to_numpy(drawing):
    
    drawing = ast.literal_eval(drawing)
    image = np.zeros((256, 256))
    
    for stroke in drawing:
        pre_x = pre_y = None
        for x, y in zip(stroke[0], stroke[1]):
            if pre_x is not None:
                cv.line(image, (pre_x, pre_y), (x, y), 255, 5)
            pre_x, pre_y = x, y
    
    return image

In [9]:
def show_category(name):
    
    data = pd.read_csv('data/csv/' + name)
    ids = np.random.choice(data['drawing'].count(), 10)
    
    for i in range(10):
        image = drawing_to_numpy(data['drawing'][ids[i]])
        
        plt.figure()
        plt.imshow(255 - image, cmap='gray')

In [10]:
def process(category, label, count, folder_name):
    """
    category : name of the csv file
    label : index of category
    count : number of taken images in the category
    folder_name : where to save npy files
    """
    data = pd.read_csv('data/csv/' + category)
    drawings = data['drawing'].to_list()
    key_ids = data['key_id'].to_list()
    
    for i, (drawing, key_id) in enumerate(zip(drawings, key_ids)):
        if i == count: break
        
        image = drawing_to_numpy(drawing)
        image = cv.resize(image, image_size, interpolation=cv.INTER_AREA).reshape((*image_size, 1))
        
        save_name = str(key_id) + '.npy'
        np.save('data/%s/%s' % (folder_name, save_name), image)
        
        labels[save_name] = label

In [13]:
def create_dataset():
    # process category in list
    for category in taken_categories:
        process(category, taken_categories.index(category), num_per_cate, folder_name)
        
    # process the others as a new category
    others = os.listdir('data/csv/')
    for name in taken_categories: 
        others.remove(name)
    if len(others) == 0: return
    count = num_per_cate // len(others)
    
    for category in others:
        if category == others[-1]:
            count += num_per_cate % len(others) #the last category take residual amount
        process(category, len(taken_categories), count, folder_name)

In [14]:
create_dataset()

ZeroDivisionError: integer division or modulo by zero

In [17]:
pickle.dump(labels, open('meta/%s/labels.bin' % folder_name, 'wb'))