In [1]:
import glob
import os
import random
import cv2
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from tqdm.notebook import trange, tqdm

In [None]:
# for tile_heights folder for each city, calculate the rmse for summer, spring, winter 

files = glob.glob('data/tile_heights/*.csv')

for file in tqdm(files, total=len(files), leave=False):
    file = file.replace('\\','/')
    tiles = pd.read_csv(file)
    city = file.split('/')[-1].split('.')[0].split('-')[0]

    # create three columns for summer, spring, winter and add rmse values for each tile for three seasons
    tiles['summer'], tiles['spring'], tiles['winter'] = 0, 0, 0

    # iter tiles df
    for index, row in tqdm(tiles.iterrows(), total=tiles.shape[0], leave=False):
        # get i, j
        i = int(row['i'])
        j = int(row['j'])
        seasons = ['summer', 'spring', 'winter']
        for season in seasons:
            
            heights = np.empty([256, 256])
            img = cv2.imread('data/heights_new/%s/16/%d/%d.png'%(city,i,j)) 
            heights = img[:,:,0]

            img = cv2.imread('data/shadows_new/%s-%s/16/%d/%d.png'%(city,season,i,j)) 
            values_gan = img[:,:,0]
            values_gan[heights>0] = 0

            img = cv2.imread('data/shadows/%s-%s/16/%d/%d.png'%(city,season,i,j))
            values_true = img[:,:,0]
            values_true[heights>0] = 0

            target = values_true/255    #[:,128:-128,128:-128,:]
            target = target.astype(np.float32)
            prediction = values_gan/255     #[:,128:-128,128:-128,:]
            prediction = prediction.astype(np.float32)

            mse = np.mean((target-prediction) ** 2)
            rmse = np.sqrt(mse)
            tiles.at[index, season] = rmse # in place update of row[season]

    tiles.to_csv('data/tile_heights/%s-16.csv'%city, index=False)

In [2]:
# combine all cities together in one dataframe
files = glob.glob('data/tile_heights/*.csv')
df = pd.DataFrame()
for file in tqdm(files, total=len(files), leave=False):
    file = file.replace('\\','/')
    city = file.split('/')[-1].split('.')[0].split('-')[0]
    tiles = pd.read_csv(file)
    tiles['city'] = city
    # without append
    df = pd.concat([df, tiles], ignore_index=True)

  0%|          | 0/14 [00:00<?, ?it/s]

In [3]:
df['avg'] = df[['summer', 'spring', 'winter']].mean(axis=1)
df

Unnamed: 0,i,j,height,summer,spring,winter,city,avg
0,14951,26964,11.102304,0.024548,0.022893,0.030387,aus,0.025943
1,14951,26965,8.771652,0.027654,0.025744,0.032588,aus,0.028662
2,14951,26966,7.091804,0.036468,0.035041,0.041422,aus,0.037644
3,14951,26967,5.792377,0.054950,0.055047,0.067578,aus,0.059192
4,14951,26968,6.835874,0.039291,0.039068,0.047510,aus,0.041956
...,...,...,...,...,...,...,...,...
22737,58239,25814,6.470588,0.140046,0.156544,0.186568,tok,0.161053
22738,58239,25815,6.470588,0.151712,0.189622,0.290870,tok,0.210735
22739,58239,25816,6.470588,0.115214,0.142628,0.180287,tok,0.146043
22740,58239,25817,6.470588,0.047356,0.062975,0.046908,tok,0.052413


In [4]:
# find quantiles of avg rmse
rmse_quantiles = df['avg'].quantile([0, 0.25, 0.5, 0.75, 1]).to_list()
rmse_iqr = rmse_quantiles[3] - rmse_quantiles[1]
print(rmse_quantiles)
print(rmse_iqr* 1.5)

[0.0013154229576077666, 0.030090603977441756, 0.060380268841981846, 0.08397920181353884, 0.4377887348333995]
0.08083289675414562


In [5]:
len(df[df['avg']>rmse_iqr*1.5]) / len(df)

0.27939495207105797

In [6]:
height_quantiles = df['height'].quantile([0, 0.25, 0.5, 0.75, 1]).to_list()
height_iqr = height_quantiles[3] - height_quantiles[1]
print(height_quantiles)

[0.0, 6.470588235294118, 6.470588235294119, 7.7298641889929645, 228.59184781473823]


In [8]:
# df[(df['avg']<=rmse_quantiles[3]) & (df['height']<=height_quantiles[2]) & (df['city'] == 'chi')]

In [7]:
colors = [(1, 1, 1), (0, 0, 0)]
cmap_g = LinearSegmentedColormap.from_list('colorscale', colors, N=256)

colors = [(1, 1, 1), (254/255,204/255,92/255), (253/255,141/255,60/255), (240/255,59/255,32/255), (189/255,0,38/255)]
cmap_r = LinearSegmentedColormap.from_list('colorscale', colors, N=100)

In [None]:
test = df[(df['avg']<=rmse_quantiles[3]) & (df['city'] == 'sea')]
test = test.sort_values(by=['avg'], ascending=False)
print(len(test))

# print(len(test))
# (df['avg']<=rmse_quantiles[3])
# (df['avg']>rmse_iqr* 1.5)
# & (df['height']<=height_quantiles[2])

# iter test. pick random 50
# test = test.sample(n=50, replace=False)
count = 0
for index, row in tqdm(test.iterrows(), total=test.shape[0], leave=False):
    if(count<250): 
        count += 1
        continue
    if(count>500): break

    # get i, j
    i = int(row['i'])
    j = int(row['j'])
    height = float(row['height'])
    avg = float(row['avg'])

    print(i,j, height, avg)
    city = 'sea'
    season = 'winter'

    heights = np.empty([256, 256])
    img = cv2.imread('data/heights_new/%s/16/%d/%d.png'%(city,i,j))
    heights = img[:,:,0]


    img = cv2.imread('data/shadows_new/%s-%s/16/%d/%d.png'%(city,season,i,j))
    values_gan = img[:,:,0]
    values_gan[heights>0] = 0

    img = cv2.imread('data/shadows/%s-%s/16/%d/%d.png'%(city,season,i,j))
    values_true = img[:,:,0]
    values_true[heights>0] = 0

    # find contours
    # contours, hierarchy = cv2.findContours(heights, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    # if(len(contours)>100):

    # subplot the three
    fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 5))
    ax1.imshow(heights/np.max(heights), cmap=cmap_g)
    ax2.imshow(values_gan, cmap=cmap_r)
    ax3.imshow(values_true, cmap=cmap_r)

    plt.show()
    count += 1


In [55]:
# nyc:
# small error 19265 24657 19264 24666 19261 24684 19260 24669 19254 24684 19255 24679 19267 24671 19292 24656 19312 24602 19322 24614 19295 24659 19311 24603 19304 24672 19346 24629 19346 24634 19349 24631 19324 24634 19267 24660 19315 24602 19317 24604 19330 24636 19307 24614 19320 24612
# large error 19322 24610 19298 24642 19302 24672 19299 24643 19313 24610 19300 24639 19299 24639

# arr only with small error new:
arr_small_error= [['nyc', 19265, 24657], ['nyc', 19264, 24666], ['nyc', 19261, 24684], ['nyc', 19260, 24669], ['nyc', 19254, 24684], ['nyc', 19255, 24679], ['nyc', 19267, 24671], ['nyc', 19292, 24656], ['nyc', 19312, 24602], ['nyc', 19322, 24614], ['nyc', 19295, 24659], ['nyc', 19311, 24603], ['nyc', 19304, 24672], ['nyc', 19346, 24629], ['nyc', 19346, 24634], ['nyc', 19349, 24631], ['nyc', 19324, 24634], ['nyc', 19267, 24660], ['nyc', 19315, 24602], ['nyc', 19317, 24604], ['nyc', 19330, 24636], ['nyc', 19307, 24614], ['nyc', 19320, 24612]]
arr_large_error = [['nyc', 19322, 24610], ['nyc', 19298, 24642], ['nyc', 19302, 24672], ['nyc', 19299, 24643], ['nyc', 19313, 24610], ['nyc', 19300, 24639], ['nyc', 19299, 24639]]

# pick random 10 from arr_small_error
arr_small_error = random.sample(arr_small_error, 10)

In [56]:
# sea:
# large error: 10498 22890 10499 22888 10496 22881 10500 22874 10499 22889 10503 22873 10502 22873 10497 22882 10502 22875 10500 22903
# small error: 10499 22876 10494 22903 10499 22893 10496 22876 10496 22880 10499 22868 10501 22890 10503 22890 10495 22876 10498 22881 10503 22874 10499 22881 10491 22882 10490 22874 10496 22858 10498 22901 10497 22895 10494 22876

arr_small_error.extend([['sea', 10499, 22876], ['sea', 10494, 22903], ['sea', 10499, 22893], ['sea', 10496, 22876], ['sea', 10496, 22880], ['sea', 10499, 22868], ['sea', 10501, 22890], ['sea', 10503, 22890], ['sea', 10495, 22876], ['sea', 10498, 22881], ['sea', 10503, 22874], ['sea', 10499, 22881], ['sea', 10491, 22882], ['sea', 10490, 22874], ['sea', 10496, 22858], ['sea', 10498, 22901], ['sea', 10497, 22895], ['sea', 10494, 22876] ])
arr_large_error.extend([['sea', 10498, 22890], ['sea', 10499, 22888], ['sea', 10496, 22881], ['sea', 10500, 22874], ['sea', 10499, 22889], ['sea', 10503, 22873], ['sea', 10502, 22873], ['sea', 10497, 22882], ['sea', 10502, 22875], ['sea', 10500, 22903]])

# pick random 10 from arr_small_error
arr_large_error = random.sample(arr_large_error, 10)

In [66]:
def read_heights_gan_true(city, season, i, j):
    heights = np.empty([256, 256])
    img = cv2.imread('data/heights_new/%s/16/%d/%d.png'%(city,i,j))
    heights = img[:,:,0]


    img = cv2.imread('data/shadows_new/%s-%s/16/%d/%d.png'%(city,season,i,j))
    values_gan = img[:,:,0]
    values_gan[heights>0] = 0

    img = cv2.imread('data/shadows/%s-%s/16/%d/%d.png'%(city,season,i,j))
    values_true = img[:,:,0]
    values_true[heights>0] = 0

    return heights, values_gan, values_true

season = 'winter'

for city, i, j in arr_large_error:
    heights, values_gan, values_true = read_heights_gan_true(city, season, i, j)

    # save to folder. plt axis off
    folder = 'data/plots/fig4/%s/large_error/'%(city)
    if not os.path.exists(folder):
        os.makedirs(folder)
    
    # instead of 3 separate plots, combine into one as subplot and save
    fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 5))
    
    ax1.imshow(heights, cmap=cmap_g)
    ax1.axis('off')
    ax2.imshow(values_true, cmap=cmap_r)
    ax2.axis('off')
    ax3.imshow(values_gan, cmap=cmap_r)
    ax3.axis('off')

    plt.savefig('%s/%d-%d.png'%(folder,i,j), bbox_inches='tight', pad_inches=0)
    plt.close()


In [67]:
def plot(heights, values_gan, values_true, folder):
    plt.imshow(heights, cmap=cmap_g)
    plt.axis('off')
    plt.savefig('%s/heights.png'%folder, bbox_inches='tight', pad_inches=0)
    plt.close()

    plt.imshow(values_gan, cmap=cmap_r)
    plt.axis('off')
    plt.savefig('%s/values_gan.png'%folder, bbox_inches='tight', pad_inches=0)
    plt.close()

    plt.imshow(values_true, cmap=cmap_r)
    plt.axis('off')
    plt.savefig('%s/values_true.png'%folder, bbox_inches='tight', pad_inches=0)
    plt.close()

    return

good = [['joh', 37874, 37694] , ['sea', 10495, 22876], ['nyc', 19317, 24604]]
bad = [['joh', 37900, 37696] , ['sea', 10503, 22873], ['nyc', 19300, 24639]]

season = 'winter'
for city, i, j in good:
    heights, values_gan, values_true = read_heights_gan_true(city, season, i, j)
    
    folder = 'data/plots/fig4/%s/good/'%(city)
    if not os.path.exists(folder):
        os.makedirs(folder)

    plot(heights, values_gan, values_true, folder)

for city, i, j in bad:
    heights, values_gan, values_true = read_heights_gan_true(city, season, i, j)
    
    folder = 'data/plots/fig4/%s/bad/'%(city)
    if not os.path.exists(folder):
        os.makedirs(folder)

    plot(heights, values_gan, values_true, folder)