# Generation of Training Data for Neural Network

This file generates `nn_X.csv` and `nn_Y.csv`.

In [1]:
import pandas as pd
import numpy as np
import os
import csv
from process_one_chain_using_sift_ransac import *

NN_X_OUTPUT_FILE_NAME = "nn_X.csv"
NN_Y_OUTPUT_FILE_NAME = "nn_Y.csv"
ABSOLUTE_PATH_TO_IMAGES_FOLDER = os.path.join("../data/images")
ABSOLUTE_PATH_TO_RANGE_DATA = os.path.join("../data/range.csv")
ABSOLUTE_PATH_TO_GROUND_TRUTH = os.path.join("../data/train_labels.csv")

## Generation of `nn_X.csv`
1. Iterate through some folders (image chains) in `data/images` and put them through the SIFT/RANSAC pipeline.
2. Write the incremental transformations calculated in `nn_X.csv`.
3. Read the corresponding values from `data/range.csv` and add them to `nn_X.csv`.

# Create a CSV file and write the header.


with open(NN_X_OUTPUT_FILE_NAME, "w") as file:
    file.write("chain_id,i,range,x,y,z,qw,qx,qy,qz\n")

# Load the data.
chain_ids = os.listdir(ABSOLUTE_PATH_TO_IMAGES_FOLDER)
if '.DS_Store' in chain_ids:
    chain_ids.remove('.DS_Store') # This is a hidden folder.

# Sort the chain IDs by alphebatical order.
chain_ids.sort()

for index, chain_id in enumerate(chain_ids[:5]):
    print(f'Processing chain {index + 1} of 5...')
    absolute_path_to_chain_folder = os.path.join(ABSOLUTE_PATH_TO_IMAGES_FOLDER, chain_id)
    process_one_chain_using_sift_ransac(absolute_path_to_chain_folder, ABSOLUTE_PATH_TO_RANGE_DATA, NN_X_OUTPUT_FILE_NAME)

## Generation of `nn_Y.csv`
1. For each image chain that was processed when generating `nn_X.csv`, calculate the incremental transformations using `data/train_labels.csv`.
2. Write the incremental transformations to `nn_Y.csv`.

In [15]:
# Find out which chains have been processed.

# Read rows 2, 102, 202, ... from nn_X.csv until EOF.
chain_ids = []
with open(NN_X_OUTPUT_FILE_NAME, "r") as csv_file:
    reader = csv.reader(csv_file)
    next(reader) # Skip the header.
    try:
        while True:
            row = next(reader) # This is the first row of a chain.
            chain_id = row[0]
            chain_ids.append(chain_id)
            for _ in range(99):
                next(reader) # Skip the other 99 rows of this chain.
    except StopIteration:
        pass
    
print(chain_ids)

# Write the header of nn_Y.csv.
with open(NN_Y_OUTPUT_FILE_NAME, "w") as file:
    file.write("chain_id,i,range,x,y,z,qw,qx,qy,qz\n")

ground_truth = pd.read_csv(ABSOLUTE_PATH_TO_GROUND_TRUTH)

# For an unknown reason, the header row's column names somehow have trailing whitespaces.
# We remove them so we can index into columns.
ground_truth.columns = ground_truth.columns.str.strip()

# Format the first column "chain_id" as strings.
ground_truth["chain_id"] = ground_truth["chain_id"].astype(str)

for (index, chain_id) in enumerate(chain_ids):
    print(f'Processing chain {index + 1} of 5...')

    # Get the ground truth for this chain.
    ground_truth_chain = ground_truth[ground_truth["chain_id"] == chain_id]

    # Note that ground_truth_chain DataFrame preserves the original row indices from ground_truth DataFrame.
    # We reindex the ground truth chain's row numbers so we can use the new row index as a surrogate for the image number.
    ground_truth_chain = ground_truth_chain.reset_index(drop=True)

    # Decompose the net transformations into incremental transformations.

    # Handle the first image -- the reference image.
    with open(NN_Y_OUTPUT_FILE_NAME, "a") as file:
        # Header: chain_id,i,x,y,z,qw,qx,qy,qz
        file.write(f'{chain_id},0,0,0,0,1,0,0,0\n')

    # Then, for i from 1 to 99, we calculate the incremental transformation from image h = i - 1 to image i.
    for i in range(1, 100):
        
        h = i - 1 # We choose the letter h because h is the letter before i.

        from_0_to_h_rotation    = ground_truth_chain.loc[h, ['qw', 'qx', 'qy', 'qz']].values
        from_0_to_h_translation = ground_truth_chain.loc[h, ['x', 'y', 'z']].values
        from_0_to_i_rotation    = ground_truth_chain.loc[i, ['qw', 'qx', 'qy', 'qz']].values
        from_0_to_i_translation = ground_truth_chain.loc[i, ['x', 'y', 'z']].values

        from_h_to_i_rotation, from_h_to_i_translation = decompose_transformations(from_0_to_h_rotation, from_0_to_h_translation, from_0_to_i_rotation, from_0_to_i_translation)

        with open(NN_Y_OUTPUT_FILE_NAME, "a") as file:
            file.write(f'{chain_id},{i},{from_h_to_i_translation[0]},{from_h_to_i_translation[1]},{from_h_to_i_translation[2]},{from_h_to_i_rotation[0]},{from_h_to_i_rotation[1]},{from_h_to_i_rotation[2]},{from_h_to_i_rotation[3]}\n')

['0036465cc6', '0053053e5a', '009ac5e872', '00f87cb03e', '0165a57d79']
Processing chain 1 of 5...
Processing chain 2 of 5...
Processing chain 3 of 5...
Processing chain 4 of 5...
Processing chain 5 of 5...
