In [1]:
import re
import os
import glob
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from utils import load, random_jitter

In [2]:
shadow_path = 'data/shadows/'
height_path = 'data/heights/'

cities = ['sp', 'bue', 'joh', 'syd', 'tok', 'par', 'mex', 'sea', 'aus', 'nyc','bos','chi','la','dc']
dates = ['summer','spring','winter']
zooms = [16]

In [3]:
def check_image(path, city, date, zoom, i, j):
    if date == None:
        filename = '%s/%s/%d/%d/%d.png'%(path,city,zoom,i,j)
    else:
        filename = '%s/%s-%s/%d/%d/%d.png'%(path,city,date,zoom,i,j)
    
    return os.path.exists(filename)

def create_dataset(city, date, zoom):
    
    matched_files = []
    all_files = glob.glob((shadow_path+'/%s-%s/%d/*/*.png')%(city, date, zoom))
    count_mistmatches = 0
    count_total = 0
    
    for path in all_files:
        match = True

        tks = re.findall(r'\d+', path)
        zoom, i, j = int(tks[-3]), int(tks[-2]), int(tks[-1])

        if match:
            try:
                input_image, real_image = load(height_path, shadow_path, city, date, zoom, i, j)

                input_max = input_image.numpy().max()
                real_max = real_image.numpy().max()

                if input_max > 0 and real_max <= 0:
                    match = False
                if real_max > 0 and input_max <= 0:
                    match = False
                if real_max <= 0 and input_max <= 0:
                    match = False
            except Exception as e:
                match = False
            
        # check neighbors
        neighbors = []
        if match:
            for x in range(-1,2):
                for y in range(-1,2):
                    building_exists = check_image(height_path, city, None, zoom, i+y, j+x)
                    shadow_exists = check_image(shadow_path, city, date, zoom, i+y, j+x)
                    exists = shadow_exists and building_exists
                    neighbors.append(exists)
                    
        count_total+=1
        if match:
            values = [city, date, zoom, i, j]
            values.extend(neighbors)
            matched_files.append(values)
        else:
            count_mistmatches+=1
            
    columns = ['city', 'date', 'zoom', 'i', 'j']
    columns.extend(range(0,9))
    df = pd.DataFrame(matched_files, columns=columns)
    
    # ignore borders to avoid missing data
    df = df.loc[(df['i'] > df['i'].min()) & (df['i'] < df['i'].max()) & (df['j'] > df['j'].min()) & (df['j'] < df['j'].max())]

    print(city, date, zoom, len(df), count_total, count_mistmatches, (count_mistmatches/count_total)*100)
    
    return df

In [4]:
for city in cities:
    for date in dates:
        for zoom in zooms:
            df = create_dataset(city, date, zoom)
            filename = 'data/evaluation/%s-%s-%d.csv'%(city,date,zoom)
            folder = os.path.dirname(filename)
            if not os.path.exists(folder):
                os.makedirs(folder)
            df.to_csv(filename,index=False, header=True)

sp summer 16 1016 1330 93 6.992481203007518
sp spring 16 1016 1330 93 6.992481203007518
sp winter 16 1016 1330 93 6.992481203007518
bue summer 16 1037 1517 325 21.423862887277522
bue spring 16 1037 1517 325 21.423862887277522
bue winter 16 1036 1517 326 21.48978246539222
joh summer 16 1990 3780 1644 43.492063492063494
joh spring 16 1987 3780 1648 43.597883597883595
joh winter 16 1979 3780 1656 43.80952380952381
syd summer 16 768 1482 609 41.093117408906885
syd spring 16 768 1482 609 41.093117408906885
syd winter 16 765 1482 612 41.29554655870445
tok summer 16 1617 2254 372 16.50399290150843
tok spring 16 1617 2254 372 16.50399290150843
tok winter 16 1617 2254 372 16.50399290150843
par summer 16 873 1150 13 1.1304347826086958
par spring 16 873 1150 13 1.1304347826086958
par winter 16 873 1150 13 1.1304347826086958
mex summer 16 1838 3286 1318 40.10955569080949
mex spring 16 1838 3286 1318 40.10955569080949
mex winter 16 1837 3286 1319 40.13998782714547
sea summer 16 1203 2120 745 35.141