In [1]:
from os import listdir
from os.path import isfile, join
import pickle
from IPython.display import clear_output
import PIL.Image
import multiprocessing
from lxml import etree

%run ./generator.ipynb
%run ./variables.ipynb
%run ./utils.ipynb

TEST = None

In [2]:
def save_maps():
    images = []
    for path in DATASET_PATH:
        images.extend([f for f in listdir(path) if isfile(join(path, f))])

    tmp_code = {}
    i = 1
    for file in images:
        taxon = file.split('_')[1]
        if not (taxon in tmp_code):
            tmp_code[taxon] = i
            i+=1

    savePickle(tmp_code, SAVE_PATH+"/maps/multiclass_label_map.pickle")
    binary_tmp_code = {}
    binary_tmp_code["diatom"] = 1
    savePickle(binary_tmp_code, SAVE_PATH+"/maps/binary_label_map.pickle")
    print("Binary and multiclass label maps saved successfully !")

In [3]:
def worker(lock, n_id, wnumber, verbose):
    print("Worker ", wnumber, " ok!")
    random.seed()
    lock.acquire()
    wid = n_id.value
    if verbose:
        clear_output(wait=True)
        print("Worker ", wnumber, ": ", (wid+1),"/", N_IMAGES)
    n_id.value += 1
    lock.release()
    while(wid<=N_IMAGES):
        final_img, annotations = main_generator(simple_angles = False, 
                                          size_px = 1000,
                                          verbose=False, 
                                          overlapping=0.3, 
                                          n_diatoms=[9,12],
                                          scale_diatoms=[7,5],                                          
                                          n_dust=[25,40],
                                          scale_dust=[3,4])
        string_id = '{:05d}'.format(wid)
        final_img = final_img[:,:,1]
        
        # Init xml tree
        xml_root = etree.Element("annotation")
        etree.SubElement(xml_root, "folder").text = "images"
        etree.SubElement(xml_root, "filename").text = string_id+".png"
        source_xml = etree.SubElement(xml_root, "source")
        etree.SubElement(source_xml, "database").text = DATASET_NAME
        size_xml = etree.SubElement(xml_root, "size")
        etree.SubElement(size_xml, "width").text = str(final_img.shape[0])
        etree.SubElement(size_xml, "height").text = str(final_img.shape[1])
        etree.SubElement(size_xml, "depth").text = str(1)
        
        path_img = "images/"+string_id+".png"
        saveImg(final_img, join(SAVE_PATH, path_img));

        ## Saving individual masks
        taxon_n = {}
        paths = []
        for annotation in annotations:
            taxon = annotation["taxon"]
            if taxon in taxon_n:
                taxon_n[taxon] += 1
            else:
                taxon_n[taxon] = 0
            path_mask = "masks/"+string_id+"_"+taxon+"_"+'{:03d}'.format(taxon_n[taxon])+".png"
            # Saving mask
            img = PIL.Image.fromarray(annotation["patch_mask"])
            annotation.pop("patch_mask")
            #output = io.BytesIO()
            check_dirs(join(SAVE_PATH, path_mask))
            #img.save(join(SAVE_PATH, path_mask), format='PNG')
            annotation["mask_path"] = path_mask
            # Adding bounding box to xml
            object_xml = etree.SubElement(xml_root, "object")
            etree.SubElement(object_xml, "name").text = annotation["taxon"]
            etree.SubElement(object_xml, "xmin").text = str(annotation["xmin"])
            etree.SubElement(object_xml, "ymin").text = str(annotation["ymin"])
            etree.SubElement(object_xml, "xmax").text = str(annotation["xmax"])
            etree.SubElement(object_xml, "ymax").text = str(annotation["ymax"])        
        
        # Saving annotation
        et = etree.ElementTree(xml_root)
        xml_path = os.path.join(SAVE_PATH, "annotations/", string_id+".xml")
        check_dirs(xml_path)
        with open(xml_path, 'wb') as xml_file:
            et.write(xml_file, pretty_print=True)
        
        # Incrementing id
        lock.acquire()
        wid = n_id.value
        if verbose:
            clear_output(wait=True)
            print("Worker ", wnumber, ": ", (wid+1), "/", N_IMAGES)
        n_id.value += 1
        lock.release()
    return 0

In [4]:
# MAIN
jobs = []
n_process = 8
n_id = multiprocessing.Value('i', 0)
lock = multiprocessing.Lock()
verbose = True
save_maps()
print("Generating", N_IMAGES, "images with ", n_process, " workers !")
for i in range(n_process):
    p = multiprocessing.Process(target=worker, args=(lock, n_id, i, verbose,))
    jobs.append(p)
    p.start()

Worker  7 :  16670 / 20000
