# Dataset-Generator.

In [1]:
import os
from datagenerator import DataGenerator
import pickle
import random
import utils

Using TensorFlow backend.


In [2]:
import multiprocessing
multiprocessing_jobs = multiprocessing.cpu_count()
print("Going to spawn", multiprocessing_jobs, "jobs...")

Going to spawn 8 jobs...


# Setting the parameters.

In [3]:
if os.path.exists("datasetpath.txt"):
    dataset_path = open("datasetpath.txt", "r").read().replace("\n", "")
else:
    dataset_path = "../data"

# For creating voxelgrids.
dataset_parameters_voxelgrids = {}
dataset_parameters_voxelgrids["input_type"] = "voxelgrid"
dataset_parameters_voxelgrids["output_targets"] = ["height", "weight"]    
dataset_parameters_voxelgrids["random_seed"] = 666
dataset_parameters_voxelgrids["voxelgrid_target_shape"] = (32, 32, 32)
dataset_parameters_voxelgrids["voxel_size_meters"] = 0.1
dataset_parameters_voxelgrids["voxelgrid_random_rotation"] = True
dataset_parameters_voxelgrids["dataset_size_train"] = 6000
dataset_parameters_voxelgrids["dataset_size_test"] = 1000

# For creating pointclouds.
dataset_parameters_pointclouds = {}
dataset_parameters_pointclouds["input_type"] = "pointcloud"
dataset_parameters_pointclouds["output_targets"] = ["height", "weight"]    
dataset_parameters_pointclouds["random_seed"] = 666
dataset_parameters_pointclouds["pointcloud_target_size"] = 30000
dataset_parameters_pointclouds["pointcloud_random_rotation"] = True
dataset_parameters_pointclouds["dataset_size_train"] = 3000
dataset_parameters_pointclouds["dataset_size_test"] = 500

In [4]:
dataset_parameters_to_use = []
dataset_parameters_to_use.append(dataset_parameters_pointclouds)
#dataset_parameters_to_use.append(dataset_parameters_voxelgrids)

# Creating the data-generator.
Makes use of the parameters.

In [5]:
def create_datagenerator(dataset_parameters):
    print("Creating data-generator...")
    datagenerator = DataGenerator(
        dataset_path=dataset_path, 
        input_type=dataset_parameters["input_type"], 
        output_targets=dataset_parameters["output_targets"],
        voxelgrid_target_shape=dataset_parameters.get("voxelgrid_target_shape", None),
        voxel_size_meters=dataset_parameters.get("voxel_size_meters", None),
        voxelgrid_random_rotation=dataset_parameters.get("voxelgrid_random_rotation", None),
        pointcloud_target_size=dataset_parameters.get("pointcloud_target_size", None),
        pointcloud_random_rotation=dataset_parameters.get("pointcloud_random_rotation", None)
    )
    datagenerator.print_statistics()
    return datagenerator

# Analysis.

In [6]:
analyze = False

In [7]:
datagenerator = create_datagenerator(dataset_parameters_to_use[0])

Creating data-generator...
/Users/tristanbehrens/Datasets/welthungerhilfe/20180803
QR-Code SAM-GOV-025 has 1 different manual measurements
   Target [67, 6.7] with 0 JPGs and 51 PCDs.
QR-Code SAM-SNG-073 has 3 different manual measurements
   Target [82, 8.8] with 0 JPGs and 41 PCDs.
   Target [82, 8.8] with 0 JPGs and 40 PCDs.
   Target [82, 8.5] with 0 JPGs and 0 PCDs.
QR-Code SAM-GOV-014 has 1 different manual measurements
   Target [91.1, 11.1] with 0 JPGs and 77 PCDs.
QR-Code SAM-GOV-087 has 1 different manual measurements
   Target [85.7, 10.2] with 0 JPGs and 60 PCDs.
QR-Code SAM-GOV-004 has 1 different manual measurements
   Target [76.2, 8.3] with 0 JPGs and 71 PCDs.
QR-Code SAM-GOV-026 has 1 different manual measurements
   Target [80.1, 9.2] with 0 JPGs and 47 PCDs.
QR-Code SAM-GOV-068 has 1 different manual measurements
   Target [73.3, 7.4] with 0 JPGs and 39 PCDs.
QR-Code SAM-SNG-013 has 1 different manual measurements
   Target [93.7, 11.2] with 0 JPGs and 0 PCDs.
QR-Cod

In [8]:
if analyze:
    datagenerator.analyze_files()

In [9]:
if analyze:
    datagenerator.analyze_pointclouds()

In [10]:
if analyze:
    datagenerator.analyze_voxelgrids()

# Do the train-test-split and generate.

In [11]:
def split_and_generate(datagenerator, dataset_parameters):

    # Do the split.
    random.seed(dataset_parameters["random_seed"])
    qrcodes_shuffle = datagenerator.qrcodes[:]
    random.shuffle(qrcodes_shuffle)
    split_index = int(0.8 * len(qrcodes_shuffle))
    qrcodes_train = sorted(qrcodes_shuffle[:split_index])
    qrcodes_test = sorted(qrcodes_shuffle[split_index:])
    del qrcodes_shuffle
    print("")

    print("QR-Codes for training:", " ".join(qrcodes_train))
    print("")
    print("QR-Codes for testing:", " ".join(qrcodes_test))
    print("")

    print("Generating training data...")
    dataset_train = next(datagenerator.generate(size=dataset_parameters["dataset_size_train"], qrcodes_to_use=qrcodes_train, yield_file_paths=True, verbose=True, multiprocessing_jobs=multiprocessing_jobs))

    print("Generating testing data...")
    dataset_test = next(datagenerator.generate(size=dataset_parameters["dataset_size_test"], qrcodes_to_use=qrcodes_test, yield_file_paths=True, verbose=True, multiprocessing_jobs=multiprocessing_jobs))

    print("Done.")
    return dataset_train, dataset_test

# Method for saving dataset.

In [12]:
def save_dataset(dataset_train, dataset_test, dataset_parameters):
    print("Saving dataset...")
    datetime_string = utils.get_datetime_string()
    dataset_name = datetime_string + "-" + dataset_parameters["input_type"] + "-dataset.p"
    pickle.dump((dataset_train, dataset_test, dataset_parameters), open(dataset_name, "wb"))
    print("Saved " + dataset_name)

# Generate with parameters.

In [13]:
for dataset_parameters in dataset_parameters_to_use:
    
    datagenerator = create_datagenerator(dataset_parameters)
    dataset_train, dataset_test = split_and_generate(datagenerator, dataset_parameters)
    save_dataset(dataset_train, dataset_test, dataset_parameters)    

Creating data-generator...
/Users/tristanbehrens/Datasets/welthungerhilfe/20180803
QR-Code SAM-GOV-025 has 1 different manual measurements
   Target [67, 6.7] with 0 JPGs and 51 PCDs.
QR-Code SAM-SNG-073 has 3 different manual measurements
   Target [82, 8.8] with 0 JPGs and 41 PCDs.
   Target [82, 8.8] with 0 JPGs and 40 PCDs.
   Target [82, 8.5] with 0 JPGs and 0 PCDs.
QR-Code SAM-GOV-014 has 1 different manual measurements
   Target [91.1, 11.1] with 0 JPGs and 77 PCDs.
QR-Code SAM-GOV-087 has 1 different manual measurements
   Target [85.7, 10.2] with 0 JPGs and 60 PCDs.
QR-Code SAM-GOV-004 has 1 different manual measurements
   Target [76.2, 8.3] with 0 JPGs and 71 PCDs.
QR-Code SAM-GOV-026 has 1 different manual measurements
   Target [80.1, 9.2] with 0 JPGs and 47 PCDs.
QR-Code SAM-GOV-068 has 1 different manual measurements
   Target [73.3, 7.4] with 0 JPGs and 39 PCDs.
QR-Code SAM-SNG-013 has 1 different manual measurements
   Target [93.7, 11.2] with 0 JPGs and 0 PCDs.
QR-Cod

Generating using QR-codes: ['SAM-GOV-001', 'SAM-GOV-004', 'SAM-GOV-005', 'SAM-GOV-008', 'SAM-GOV-011', 'SAM-GOV-013', 'SAM-GOV-014', 'SAM-GOV-023', 'SAM-GOV-025', 'SAM-GOV-026', 'SAM-GOV-033', 'SAM-GOV-034', 'SAM-GOV-035', 'SAM-GOV-037', 'SAM-GOV-038', 'SAM-GOV-042', 'SAM-GOV-043', 'SAM-GOV-044', 'SAM-GOV-045', 'SAM-GOV-046', 'SAM-GOV-049', 'SAM-GOV-050', 'SAM-GOV-051', 'SAM-GOV-052', 'SAM-GOV-054', 'SAM-GOV-063', 'SAM-GOV-064', 'SAM-GOV-066', 'SAM-GOV-067', 'SAM-GOV-068', 'SAM-GOV-069', 'SAM-GOV-073', 'SAM-GOV-089', 'SAM-GOV-090', 'SAM-GOV-099', 'SAM-SNG-011', 'SAM-SNG-013', 'SAM-SNG-014', 'SAM-SNG-015', 'SAM-SNG-016', 'SAM-SNG-018', 'SAM-SNG-059', 'SAM-SNG-061', 'SAM-SNG-066', 'SAM-SNG-067', 'SAM-SNG-068', 'SAM-SNG-070', 'SAM-SNG-075', 'SAM-SNG-076', 'SAM-SNG-081', 'SAM-SNG-083', 'SAM-SNG-085', 'SAM-SNG-086', 'SAM-SNG-088', 'SAM-SNG-091', 'SAM-SNG-095', 'SAM-SNG-096', 'SAM-SNG-097', 'SAM-SNG-098', 'SAM-SNG-099']
Generating using QR-codes: ['SAM-GOV-001', 'SAM-GOV-004', 'SAM-GOV-005',

100% (3 of 3) |##########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (3 of 3) |##########################| Elapsed Time: 0:00:01 Time:  0:00:01

100% (3 of 3) |##########################| Elapsed Time: 0:00:01 ETA:  00:00:00
100% (3 of 3) |##########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (3 of 3) |##########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (3 of 3) |##########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (9 of 9) |##########################| Elapsed Time: 0:00:03 Time:  0:00:03


(3, 30000, 4)
(3, 30000, 4)
(3, 30000, 4)
(3, 30000, 4)
(3, 30000, 4)
(3, 30000, 4)
(3, 30000, 4)
(9, 30000, 4)
Generating testing data...
Generating using QR-codes: ['SAM-GOV-002', 'SAM-GOV-012', 'SAM-GOV-036', 'SAM-GOV-041', 'SAM-GOV-087', 'SAM-GOV-088', 'SAM-SNG-012', 'SAM-SNG-021', 'SAM-SNG-036', 'SAM-SNG-069', 'SAM-SNG-072', 'SAM-SNG-073', 'SAM-SNG-074', 'SAM-SNG-084', 'SAM-SNG-087']


100% (5 of 5) |##########################| Elapsed Time: 0:00:01 Time:  0:00:01


(5, 30000, 4)
Done.
Saving dataset...
Saved 20180807-2040-pointcloud-dataset.p
