In [None]:
import re
import os
import glob
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from utils import load, random_jitter

In [None]:
shadow_path = 'data/shadows/'
height_path = 'data/heights/'

cities = ['sp', 'bue', 'joh', 'syd', 'tok', 'par', 'mex', 'sea', 'aus', 'nyc','bos','chi','la','dc']
dates = ['summer','spring','winter']
zooms = [16]

In [None]:
def check_image(path, city, date, zoom, i, j):
    if date == None:
        filename = '%s/%s/%d/%d/%d.png'%(path,city,zoom,i,j)
    else:
        filename = '%s/%s-%s/%d/%d/%d.png'%(path,city,date,zoom,i,j)
    
    return os.path.exists(filename)

def create_dataset(city, date, zoom):
    
    matched_files = []
    all_files = glob.glob((shadow_path+'/%s-%s/%d/*/*.png')%(city, date, zoom))
    count_mistmatches = 0
    count_total = 0
    
    for path in all_files:
        match = True

#         tks = re.findall(r'\d+', path)
#         zoom, i, j = int(tks[-3]), int(tks[-2]), int(tks[-1])
        tks = os.path.normpath(filepath).split(os.path.sep)
        zoom, i, j = int(tks[-3]), int(tks[-2]), int(tks[-1][:-4])

        if match:
            try:
                input_image, real_image = load(height_path, shadow_path, city, date, zoom, i, j)

                input_max = input_image.numpy().max()
                real_max = real_image.numpy().max()

                if input_max > 0 and real_max <= 0:
                    match = False
                if real_max > 0 and input_max <= 0:
                    match = False
                if real_max <= 0 and input_max <= 0:
                    match = False
            except Exception as e:
                match = False
            
        # check neighbors
        neighbors = []
        if match:
            for x in range(-1,2):
                for y in range(-1,2):
                    building_exists = check_image(height_path, city, None, zoom, i+y, j+x)
                    shadow_exists = check_image(shadow_path, city, date, zoom, i+y, j+x)
                    exists = shadow_exists and building_exists
                    neighbors.append(exists)
                    
        count_total+=1
        if match:
            values = [city, date, zoom, i, j]
            values.extend(neighbors)
            matched_files.append(values)
        else:
            count_mistmatches+=1
            
    columns = ['city', 'date', 'zoom', 'i', 'j']
    columns.extend(range(0,9))
    df = pd.DataFrame(matched_files, columns=columns)
    
    # ignore borders to avoid missing data
    df = df.loc[(df['i'] > df['i'].min()) & (df['i'] < df['i'].max()) & (df['j'] > df['j'].min()) & (df['j'] < df['j'].max())]

    print(city, date, zoom, len(df), count_total, count_mistmatches, (count_mistmatches/count_total)*100)
    
    return df

In [None]:
for city in cities:
    for date in dates:
        for zoom in zooms:
            df = create_dataset(city, date, zoom)
            filename = 'data/evaluation/%s-%s-%d.csv'%(city,date,zoom)
            folder = os.path.dirname(filename)
            if not os.path.exists(folder):
                os.makedirs(folder)
            df.to_csv(filename,index=False, header=True)