# Photo Processing
Data Drawn from [Kaggle](https://www.kaggle.com/datasets/vassiliskrikonis/skylines-12/)  
Code from Zachary Hunt

# Imports

In [None]:
import os
import pickle
import numpy as np
import pandas as pd
from IPython.display import clear_output

import skimage
from skimage import io
from skimage.transform import rescale, resize

from icecream import ic
from natsort import natsorted, ns

# Constants

In [None]:
citymap = {'1': "Chicago",
           '2': "Dallas",
           '3': "Frankfurt",
           '4': "HongKong",
           '5': "Miami",
           '6': "NewYork",
           '7': "Philadelphia",
           '8': "Seattle",
           '9': "Shanghai",
           '10': "Singapore",
           '11': "Tokyo",
           '12': "Toronto", }
resize_border_color = {"Black": 0, "Gray": 0.5, "White": 1}["Black"]  # Currently unuszed: images are just scaled
input_directory = "./data/input/"
output_directory = "./data/"
processed_photo_directory = output_directory + "processed/"

In [None]:
image_names = natsorted(os.listdir(input_directory))  # Files List

In [None]:
# Find upscaling dimensions & Load images
dimensions = set()
for image_name in image_names:
    # Load each image twice to fit within RAM
    img = io.imread(input_directory + image_name)
    dimensions.add(img.shape)

target_dim = max(dimensions)[:2]
ic(target_dim)
target_dim = (374, 562)
ic(target_dim);

In [None]:
# Save Out Images as JPEGs
for image_name in image_names:
    # print(image_name, sep=", ")
    scaled_bw_img = resize(skimage.color.rgb2gray(io.imread(input_directory + image_name)), target_dim)
    io.imsave(processed_photo_directory + image_name, skimage.img_as_ubyte(scaled_bw_img))

In [None]:
images_df = pd.DataFrame(index=image_names)
images_df.index.name = "Filename"
images_df.reset_index(inplace=True)
images_df.index = images_df["Filename"].str.strip('.jpg')
images_df.index.name = "File"
images_df["City"] = images_df["Filename"].apply(lambda name: citymap[name.split('_')[0]])
images_df["Target"] = images_df["City"] == "NewYork"
images_df

In [None]:
# Save to Pickle
with open(output_directory + "PhotoTable.p", 'wb') as pickle_file:
    pickle.dump(images_df, pickle_file)