In [2]:
from PIL import Image
import ndjson
import json
from types import SimpleNamespace
import os
import os.path
from shutil import copyfile

def create_image(image, filename):
    img = Image.new('RGB', (256,256), "white")
    pixels = img.load()

    x = -1
    y = -1

    for stroke in image:
        for i in range(len(stroke[0])):
            if x != -1: 
                for point in get_line(stroke[0][i], stroke[1][i], x, y):
                    pixels[point[0],point[1]] = (0, 0, 0)
            pixels[stroke[0][i],stroke[1][i]] = (0, 0, 0)
            x = stroke[0][i]
            y = stroke[1][i]
        x = -1
        y = -1
    img.save(filename)

def get_line(x1, y1, x2, y2):
    points = []
    issteep = abs(y2-y1) > abs(x2-x1)
    if issteep:
        x1, y1 = y1, x1
        x2, y2 = y2, x2
    rev = False
    if x1 > x2:
        x1, x2 = x2, x1
        y1, y2 = y2, y1
        rev = True
    deltax = x2 - x1
    deltay = abs(y2-y1)
    error = int(deltax / 2)
    y = y1
    ystep = None
    if y1 < y2:
        ystep = 1
    else:
        ystep = -1
    for x in range(x1, x2 + 1):
        if issteep:
            points.append((y, x))
        else:
            points.append((x, y))
        error -= deltay
        if error < 0:
            y += ystep
            error += deltax
    # Reverse the list if the coordinates were reversed
    if rev:
        points.reverse()
    return points

# it creates a directory to the path
# eg. the path is 'C:\\Users\\arist\\Desktop\\icon'
# and it creates folder 'icon' into the path: 'C:\\Users\\arist\\Desktop\\'
def create_directory(path):
    try:
        os.mkdir(path)
    except OSError:
        print ("Creation of the directory %s failed" % path)
    else:
        print ("Successfully created the directory %s " % path)


def createSketchCategories(sketches_src, sketches_dst, category, sketch, num_photos, icon_name):
    create_directory(sketches_dst + category)
    # load from file-like objects
    # 'C:\\Users\\arist\\Desktop\\Sketches\\aircraft carrier.ndjson'
    with open(sketches_src + sketch + '.ndjson') as f:
        data_dic = ndjson.load(f)

    data = [None]*len(data_dic)
    for i in range(0,len(data_dic)):
        data[i] = json.dumps(data_dic[i])

    # "C:\\programming\\Dataset\\Scripts\\IconToSketch\\sketch\\"
    for i in range(0,num_photos):
        x = json.loads(data[i], object_hook=lambda d: SimpleNamespace(**d))
        create_image(x.drawing, sketches_dst + category + '\\' + icon_name + "_" + str(i) + ".png")

# search through all subfolders of the folder_src_path and it copies
# the icon into the folder_dst_path
def copySVGsToCategoryFolder(folder_src_path, folder_dst_path, icon):
    for src_path, src_names, filenames in os.walk(folder_src_path):
        for filename in [f for f in filenames if f == icon]:
            src = src_path + '\\' + icon
            dst = folder_dst_path + '\\' + icon
            copyfile(src, dst)

In [4]:
def findNdjsonFiles(folder_src_path):
    files_list = []
    for src_path, src_names, filenames in os.walk(folder_src_path):
        for filename in [f for f in filenames if f.endswith('.ndjson')]:
            files_list.append(filename)
    return files_list

def createSketchKeyWords(sketch_names):
    
    for i in range(0, len(sketch_names)):
        sketch_names[i] = sketch_names[i].replace('.ndjson','')

    sketch_key_words = [[]]*len(sketch_names)
    for i in range(0, len(sketch_names)):
        sketch_key_words[i] = sketch_names[i].split()
    
    return sketch_key_words

# It searches into the path of icons and store all the file names
# along with its category name as a tuple (category, file name)
def findAllIconFiles(folder_icons_src_path):
    files_list = []
    for src_path, src_names, filenames in os.walk(folder_icons_src_path):
        for filename in [f for f in filenames if f.endswith('.svg')]:
            files_list.append((src_path.replace(folder_icons_src_path + '\\', ""), filename.replace('.svg', "")))
    return files_list

# the path where i store all the Sketches
folder_sketches_src_path = 'C:\\Users\\arist\\Desktop\\Sketches'
# fetch the all the file names
sketch_names = findNdjsonFiles(folder_sketches_src_path)

# remove the '.ndjson' and create keywords of the name
# if it is separated with space, else only one keyword (the name of file)
sketch_key_words = createSketchKeyWords(sketch_names)


folder_icons_src_path = 'C:\\Users\\arist\\Desktop\\icon'
icons_list = findAllIconFiles(folder_icons_src_path)
# replace the "-" with "" to match the names of icons
for i in range(0, len(icons_list)):
    icons_list[i] = (icons_list[i][0], icons_list[i][1].replace("-", " "))

# create the icons keywords
icons_key_words = [[]]*len(icons_list)
for i in range(0, len(icons_list)):
    icons_key_words[i] = icons_list[i][1].split()
    # remove numbers from keywords
    for j in range(0, len(icons_key_words[i])):
        icons_key_words[i][j] = ''.join([i for i in icons_key_words[i][j] if not i.isdigit()])

# store the indices of lists icons_key_words  and
# sketch_key_words if at least one word is matched
match_indices = []
for i in range(0, len(icons_key_words)):
    match = False
    sketch_index_list = []
    for j in range(0, len(sketch_key_words)):
        if len(set(icons_key_words[i]) & set(sketch_key_words[j])) > 0:
            match = True
            sketch_index_list.append(j)

    max = 0
    # match the two folders with keywords that have the most common words
    for index in sketch_index_list:
        if len(set(icons_key_words[i]) & set(sketch_key_words[index])) > max:
            sketch_index = index
            max = len(set(icons_key_words[i]) & set(sketch_key_words[index]))
    if match:
        match_indices.append((i, sketch_index))

print('Matches: ' + str(len(match_indices)))

# remove numbers from the names to compare it with the sketches
perfect_matches = []
icons_list_without_num = []
for (_,icon) in icons_list:
    icons_list_without_num.append(''.join([i for i in icon if not i.isdigit()]))

# find the perfect matches of strings
for (i,j) in match_indices:
    if icons_list_without_num[i] == sketch_names[j]:
        perfect_matches.append((i, j))

print("Perfect matches: " + str(len(perfect_matches)))
print()

# create category folders in 'C:\\Users\\arist\\Desktop\\Dataset\\sketch\\'
# for the images that are perfectly matched, not all categories
# are created for sketches since the perfect matches are only 102
create_directory('C:\\Users\\arist\\Desktop\\Dataset')
sketches_src = 'C:\\Users\\arist\\Desktop\\Sketches\\'
sketches_dst = 'C:\\Users\\arist\\Desktop\\Dataset\\sketch\\'
icon_dst = 'C:\\Users\\arist\\Desktop\\Dataset\\icon\\'
create_directory(sketches_dst)
create_directory(icon_dst)

# replace the " " with "-" to recreate the initial names of icons
for i in range(0, len(icons_list)):
    icons_list[i] = (icons_list[i][0], icons_list[i][1].replace(" ", "-"))

# Creates the categories in icon and sketch folder, storing the svg files in icons
# and generating 20 sketches for each icon
for (i,j) in perfect_matches:
    createSketchCategories(sketches_src, sketches_dst, icons_list[i][0], sketch_names[j], 20, icons_list[i][1])
    icon_dst_category = icon_dst + icons_list[i][0]
    create_directory(icon_dst_category)
    copySVGsToCategoryFolder(folder_icons_src_path, icon_dst_category, icons_list[i][1] + ".svg")
    

Matches: 406
Perfect matches: 102

Successfully created the directory C:\Users\arist\Desktop\Dataset 
Successfully created the directory C:\Users\arist\Desktop\Dataset\sketch\ 
Successfully created the directory C:\Users\arist\Desktop\Dataset\icon\ 
Successfully created the directory C:\Users\arist\Desktop\Dataset\sketch\animals 
Successfully created the directory C:\Users\arist\Desktop\Dataset\icon\animals 
Creation of the directory C:\Users\arist\Desktop\Dataset\sketch\animals failed
Creation of the directory C:\Users\arist\Desktop\Dataset\icon\animals failed
Creation of the directory C:\Users\arist\Desktop\Dataset\sketch\animals failed
Creation of the directory C:\Users\arist\Desktop\Dataset\icon\animals failed
Creation of the directory C:\Users\arist\Desktop\Dataset\sketch\animals failed
Creation of the directory C:\Users\arist\Desktop\Dataset\icon\animals failed
Creation of the directory C:\Users\arist\Desktop\Dataset\sketch\animals failed
Creation of the directory C:\Users\arist\

## **Renaming the files**
This is a code for renaming the files into the folders of sketch, to keep the order -> "name_1.png",  "name_2.png"...

In [4]:
# It searches into the path of sketches and store all the file names
# along with its category name as a tuple (category, file name)
def findAllSketchFileNames(folder_sketches_path):
    file_names_set = set()
    for src_path, src_names, filenames in os.walk(folder_sketches_path):
        for filename in [f for f in filenames if f.endswith('.png')]:
            file_names_set.add((src_path.replace(folder_sketches_path + '\\', ""), filename.split("_")[0]))
    return file_names_set

# this is the path of the parent folder we want to rename the files
folder_sketches_path_src = "C:\\Users\\arist\\Desktop\\Dataset\\sketch"
file_names_set = findAllSketchFileNames(folder_sketches_path_src)

# this is the destination path that we will create the new files
folder_sketches_path_dst = "C:\\Users\\arist\\Desktop\\dest\\"
create_directory(folder_sketches_path_dst)

# it copies the files into the new path with their new name
for (category, name) in file_names_set:
    category_path_src = folder_sketches_path_src + "\\" + category
    category_path_dst = folder_sketches_path_dst + "\\" + category
    create_directory(category_path_dst)
    count = 1
    for src_path, src_names, filenames in os.walk(category_path_src):
        for filename in [f for f in filenames if f.split("_")[0] == name]:
            new_filename = name + "_" + str(count) + ".png"
            copyfile(category_path_src + "\\" + filename, category_path_dst + "\\" + new_filename)
            count = count + 1

{('objects', 'camera'), ('communication', 'envelope'), ('buildings', 'hospital'), ('animals', 'cat'), ('travel', 'map'), ('summer', 'sun'), ('beverage', 'wine-glass'), ('camping', 'binoculars'), ('objects', 'leaf'), ('communication', 'envelope1'), ('date-time', 'calendar1'), ('shapes', 'square'), ('animals', 'frog'), ('design', 'marker'), ('travel', 'suitcase'), ('medical', 'tooth'), ('music', 'guitar'), ('sports', 'soccer-ball1'), ('vehicles', 'helicopter'), ('vehicles', 'bicycle'), ('design', 'eye'), ('weather', 'moon1'), ('household', 'bed'), ('computers', 'laptop'), ('sports', 'baseball-ball'), ('animals', 'dragon'), ('hotel', 'dumbbell'), ('construction', 'hammer'), ('objects', 'umbrella'), ('writing', 'book'), ('date-time', 'hourglass'), ('date-time', 'calendar'), ('food', 'apple-alt'), ('shapes', 'star'), ('design', 'eye1'), ('food', 'birthday-cake'), ('construction', 'screwdriver'), ('sports', 'soccer-ball'), ('food', 'hamburger'), ('medical', 'stethoscope'), ('computers', 'key