In [1]:
import re
import glob
import pandas as pd
import tensorflow as tf

In [41]:
shadow_path = 'data/%s-%s/'
height_path = 'data/%s-heights/'

cities = ['nyc', 'bos', 'chi', 'la', 'dc']
dates = ['jun-21','sep-22','dec-21']
zooms = [14,15,16,17]

In [65]:
def load(city, zoom, i, j):
    
    # Read and decode an image file to a uint8 tensor
    filename = tf.strings.format('{}/{}/{}/{}.png',((height_path)%city,zoom,i,j))
    filename = tf.strings.regex_replace(filename,'\"', "")
    input_image = tf.io.read_file(filename)
    input_image = tf.io.decode_png(input_image)[:,:,0]
    
    filename = tf.strings.format('{}/{}/{}/{}.png',(shadow_path%(city,date),zoom,i,j))
    filename = tf.strings.regex_replace(filename,'\"', "")
    real_image = tf.io.read_file(filename)
    real_image = tf.io.decode_png(real_image)[:,:,0]
    real_image = tf.experimental.numpy.where(input_image<=0, real_image, 0)    
    
    input_image = tf.reshape(input_image, (256, 256, 1))
    input_image = tf.image.grayscale_to_rgb(input_image)
    
    real_image = tf.reshape(real_image, (256, 256, 1))
    real_image = tf.image.grayscale_to_rgb(real_image)

    # Convert both images to float32 tensors
    input_image = tf.cast(input_image, tf.float32)
    real_image = tf.cast(real_image, tf.float32)
    
    return input_image, real_image

def create_dataset(city, date, zoom):
    
    matched_files = []
    all_files = glob.glob((shadow_path+'/%d/*/*.png')%(city, date, zoom))
    count_mistmatches = 0
    for path in all_files:
        match = True

        tks = re.findall(r'\d+', path)
        zoom, i, j = int(tks[-3]), int(tks[-2]), int(tks[-1])

        try:
            input_image, real_image = load(city, zoom, i, j)
            
            input_max = input_image.numpy().max()
            real_max = real_image.numpy().max()

            if input_max > 0 and real_max <= 0:
                match = False
            if real_max > 0 and input_max <= 0:
                match = False
        except Exception as e:
            print(e)
            match = False


        if match:
            matched_files.append((city, date, zoom, i, j))
        else:
            count_mistmatches+=1

    print(city, date, len(matched_files), count_mistmatches)
    
    return matched_files

In [66]:
for city in cities:
    for date in dates:
        for zoom in zooms:
            all_dataset = create_dataset(city, date, zoom)
            df = pd.DataFrame(all_dataset, columns=['city', 'date', 'zoom', 'i', 'j'])
            df.to_csv('data/%s-%s-%d.csv'%(city,date,zoom), index=False, header=True)

nyc dec-21 1676 1028
