# Photo Processing
Data Drawn from [Kaggle](https://www.kaggle.com/datasets/vassiliskrikonis/skylines-12/)  
Code from Zachary Hunt

# Imports

In [1]:
import os
import pickle
import numpy as np
import pandas as pd
from IPython.display import clear_output

import skimage
from skimage import io
from skimage.transform import rescale, resize

from icecream import ic
from natsort import natsorted, ns

# Constants

In [2]:
citymap = {'1': "Chicago",
           '2': "Dallas",
           '3': "Frankfurt",
           '4': "HongKong",
           '5': "Miami",
           '6': "NewYork",
           '7': "Philadelphia",
           '8': "Seattle",
           '9': "Shanghai",
           '10': "Singapore",
           '11': "Tokyo",
           '12': "Toronto", }
resize_border_color = {"Black": 0, "Gray": 0.5, "White": 1}["Black"]  # Currently unuszed: images are just scaled
input_directory = "./data/input/"
output_directory = "./data/processed/"

In [3]:
image_names = natsorted(os.listdir(input_directory))  # Files List

In [4]:
# Find upscaling dimensions & Load images
dimensions = set()
for image_name in image_names:
    # Load each image twice to fit within RAM
    img = io.imread(input_directory + image_name)
    dimensions.add(img.shape)

target_dim = max(dimensions)[:2]
ic(target_dim);

ic| target_dim: (3744, 5616)


In [5]:
# Save Out Images as JPEGs
for image_name in image_names:
    # print(image_name, sep=", ")
    scaled_bw_img = resize(skimage.color.rgb2gray(io.imread(input_directory + image_name)), target_dim)
    io.imsave(output_directory + image_name, skimage.img_as_ubyte(scaled_bw_img))

In [6]:
images_df = pd.DataFrame(index=image_names)
images_df.index.name = "Filename"
images_df.reset_index(inplace=True)
images_df.index = images_df["Filename"].str.strip('.jpg')
images_df.index.name = "File"
images_df["City"] = images_df["Filename"].apply(lambda name: citymap[name.split('_')[0]])
images_df["Target"] = images_df["City"] == "NewYork"
images_df

Unnamed: 0_level_0,Filename,City,Target
File,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1_1,1_1.jpg,Chicago,False
1_2,1_2.jpg,Chicago,False
1_3,1_3.jpg,Chicago,False
1_4,1_4.jpg,Chicago,False
1_5,1_5.jpg,Chicago,False
...,...,...,...
12_6,12_6.jpg,Toronto,False
12_7,12_7.jpg,Toronto,False
12_8,12_8.jpg,Toronto,False
12_9,12_9.jpg,Toronto,False


In [7]:
# Save to Pickle
with open(output_directory + "PhotoTable.p", 'wb') as pickle_file:
    pickle.dump(images_df, pickle_file)