In [2]:
import pandas as pd
import os
import numpy as np
from scipy.sparse import coo_matrix

## Visual Features

In [2]:
def load_labels():
    # prepare all the labels
    # scene category relevant
    file_name_category = 'categories_places365.txt'
    if not os.access(file_name_category, os.W_OK):
        synset_url = 'https://raw.githubusercontent.com/csailvision/places365/master/categories_places365.txt'
        os.system('wget ' + synset_url)
    classes = list()
    with open(file_name_category) as class_file:
        for line in class_file:
            classes.append(line.strip().split(' ')[0][3:])
    classes = tuple(classes)

    # indoor and outdoor relevant
    file_name_IO = 'IO_places365.txt'
    if not os.access(file_name_IO, os.W_OK):
        synset_url = 'https://raw.githubusercontent.com/csailvision/places365/master/IO_places365.txt'
        os.system('wget ' + synset_url)
    with open(file_name_IO) as f:
        lines = f.readlines()
        labels_IO = []
        for line in lines:
            items = line.rstrip().split()
            labels_IO.append(int(items[-1]) -1) # 0 is indoor, 1 is outdoor
    labels_IO = np.array(labels_IO)

    # scene attribute relevant
    file_name_attribute = 'labels_sunattribute.txt'
    if not os.access(file_name_attribute, os.W_OK):
        synset_url = 'https://raw.githubusercontent.com/csailvision/places365/master/labels_sunattribute.txt'
        os.system('wget ' + synset_url)
    with open(file_name_attribute) as f:
        lines = f.readlines()
        labels_attribute = [item.rstrip() for item in lines]
    file_name_W = 'W_sceneattribute_wideresnet18.npy'
    if not os.access(file_name_W, os.W_OK):
        synset_url = 'http://places2.csail.mit.edu/models_places365/W_sceneattribute_wideresnet18.npy'
        os.system('wget ' + synset_url)
    W_attribute = np.load(file_name_W)

    return classes, labels_IO, labels_attribute, W_attribute

In [3]:
def idx_to_mat(M, M_ori, ID, k):
    row = np.repeat(np.arange(0,len(M))[:,None],k,axis=1).flatten()
    col = ID.flatten()
    data = M.flatten()
    
    M_new = coo_matrix((data, (row, col)), shape=M_ori.shape).toarray()
    
    return M_new

In [4]:
def k_hot_filter(M, k):
    IDX = np.argsort(M,axis=1)
    ID = IDX[:,-k:]
    ID_rev = IDX[:,:-k]
    
    R_id = np.take(M,ID, axis=1)
    M_id = np.stack([R_id[i,i,:] for i in range(len(M))])
    
    conf = M_id.sum(axis=1)
    
    R_rev = np.take(np.ones(M.shape),ID_rev, axis=1)
    R_rev_id = np.stack([R_rev[i,i,:] for i in range(len(M))])
    M_rev = R_rev_id*(1-conf[:,None])/(M.shape[1]-k)
    
    M0 = idx_to_mat(M_id, M, ID, k)
    M1 = idx_to_mat(M_rev, M, ID_rev, M.shape[1]-k)
    
    return M0+M1

In [5]:
classes, labels_IO, labels_attribute, W_attribute = load_labels()

In [7]:
IMG_pred_150 = pd.read_csv('Venezia/data_storage/IMG_pred_150.csv',sep='\t', index_col='Unnamed: 0')
face_pred = pd.read_csv('Venezia/data_storage/Face_preds.csv',sep='\t', index_col='Unnamed: 0')

In [326]:
IMG_pred_150 = pd.read_csv('Amsterdam/data_storage/IMG_pred_150.csv',sep='\t')
face_pred = pd.read_csv('Amsterdam/data_storage/Face_preds.csv',sep='\t', index_col='Unnamed: 0')

In [354]:
IMG_pred_150 = pd.read_csv('Suzhou/data_storage/IMG_pred_150.csv',sep='\t')
face_pred = pd.read_csv('Suzhou/data_storage/Face_preds.csv',sep='\t', index_col='Unnamed: 0')

In [33]:
IMG_pred_150 = pd.read_csv('data_storage/images/IMG_pred_150_cat.csv',sep='\t', index_col='Unnamed: 0')
face_pred = pd.read_csv('data_storage/images/Face_preds.csv',sep='\t', index_col='Unnamed: 0')

In [34]:
IMG_pred_150

Unnamed: 0,ID,Attributes_0,Attributes_1,Attributes_2,Attributes_3,Attributes_4,Attributes_5,Attributes_6,Attributes_7,Attributes_8,...,cat_id_vote,cat_probs_vote,category_vote,conf_vote,cat_id_stack,cat_probs_stack,category_stack,conf_stack,category_same,index.1
0,50770113131,man-made,natural light,no horizon,open area,vertical components,vegetation,sunny,leaves,foliage,...,0,[0.39053658 0.07688859 0.05247085 0.01808068 0...,architectural elements,0.390537,0,[0.434003 0.09930002 0.0074593 0.01666662 0...,architectural elements,0.434003,True,50770113131
1,50770228312,man-made,natural light,open area,vertical components,no horizon,touring,sunny,brick,praying,...,5,[0.25264659 0.02824921 0.00182519 0.00707536 0...,monuments,0.689898,5,[0.36544746 0.03825779 0.00173039 0.00638233 0...,monuments,0.556244,True,50770228312
2,50766039303,natural light,open area,man-made,boating,sunny,swimming,transporting,far-away horizon,still water,...,4,[0.0073182 0.35323225 0.00486117 0.00246835 0...,landscape scenery and natural features,0.477274,1,[0.04836891 0.40009176 0.02311173 0.04185914 0...,form,0.400092,False,50766039303
3,50766893587,natural light,man-made,open area,touring,boating,swimming,sunny,still water,vertical components,...,6,[0.02507305 0.25538455 0.01152419 0.00863368 0...,people,0.481766,6,[0.0181194 0.33873159 0.06019872 0.05836022 0...,people,0.364438,True,50766893587
4,50766892772,man-made,natural light,open area,no horizon,vertical components,sunny,pavement,driving,touring,...,1,[0.01647333 0.89346488 0.01105204 0.00141608 0...,form,0.893465,1,[7.14481079e-03 9.37520564e-01 6.32304290e-03 ...,form,0.937521,True,50766892772
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80958,20879329608,man-made,natural light,open area,sunny,no horizon,vertical components,touring,brick,glass,...,5,[0.03339911 0.28411171 0.00525297 0.00765534 0...,monuments,0.509671,5,[0.09946484 0.32641971 0.00675882 0.00629108 0...,monuments,0.458628,True,20879329608
80959,21040989736,man-made,no horizon,natural light,aged,vertical components,praying,symmetrical,brick,enclosed area,...,0,[0.94034656 0.01258757 0.00357108 0.01685633 0...,architectural elements,0.940347,0,[9.43930439e-01 7.15241083e-03 3.10525620e-04 ...,architectural elements,0.943930,True,21040989736
80960,20880415099,man-made,no horizon,natural light,aged,rock,open area,dry,sunny,brick,...,0,[0.87164785 0.03271126 0.00088374 0.0227124 0...,architectural elements,0.871648,0,[9.09375858e-01 1.06165719e-02 5.26034688e-04 ...,architectural elements,0.909376,True,20880415099
80961,20879281658,man-made,no horizon,natural light,aged,dry,brick,wood,enclosed area,dirty,...,0,[0.55386971 0.32644859 0.00757863 0.01558469 0...,architectural elements,0.553870,0,[0.6153323 0.179466 0.00454481 0.06058073 0...,architectural elements,0.615332,True,20879281658


In [382]:
face_pred

Unnamed: 0,index,Face_prob,Face_boxes,Face_num_faces,Face_area,Face_areas,Face_ratio,index.1
0,50770113131,0.0,[],0,0.0,[],0.0,50770113131
1,50770228312,0.0,[],0,0.0,[],0.0,50770228312
2,50766039303,0.0,[],0,0.0,[],0.0,50766039303
3,50766893587,0.0,[],0,0.0,[],0.0,50766893587
4,50766892772,0.0,[],0,0.0,[],0.0,50766892772
...,...,...,...,...,...,...,...,...
80958,20879329608,0.0,[],0,0.0,[],0.0,20879329608
80959,21040989736,0.0,[],0,0.0,[],0.0,21040989736
80960,20880415099,0.0,[],0,0.0,[],0.0,20880415099
80961,20879281658,0.0,[],0,0.0,[],0.0,20879281658


In [35]:
H = IMG_pred_150[IMG_pred_150.columns[51:563]]

In [36]:
H

Unnamed: 0,VisFeat_0,VisFeat_1,VisFeat_2,VisFeat_3,VisFeat_4,VisFeat_5,VisFeat_6,VisFeat_7,VisFeat_8,VisFeat_9,...,VisFeat_502,VisFeat_503,VisFeat_504,VisFeat_505,VisFeat_506,VisFeat_507,VisFeat_508,VisFeat_509,VisFeat_510,VisFeat_511
0,0.095580,1.484140,0.167445,0.336726,0.396595,1.318515,0.116753,0.704350,0.172106,0.757024,...,0.917451,1.152833,0.489559,0.313069,0.137527,0.787630,0.285278,0.471025,1.154447,1.687698
1,0.362849,2.836981,0.030664,0.786215,0.178590,1.163436,0.499891,0.430285,1.069200,0.197102,...,2.555568,0.199221,1.318208,0.266274,0.167791,1.638651,0.379830,0.921426,2.066001,0.905397
2,0.099797,0.947452,0.381367,0.213776,0.151737,0.437884,2.635227,0.394498,0.006307,0.623336,...,1.052630,0.157473,0.465565,0.321595,0.653918,0.297669,0.094641,0.219914,1.218820,0.736437
3,0.316780,0.736990,0.384384,0.187189,0.271150,1.719753,0.928709,0.179578,0.232560,0.771742,...,2.007199,1.004224,1.507421,0.207160,0.187115,0.471811,0.177643,1.205583,0.610920,0.165037
4,0.202310,0.243766,0.980162,0.481957,0.178570,1.774477,0.567979,0.999433,0.341573,0.946846,...,2.133504,0.545064,0.898647,0.457206,0.086738,0.168795,0.565256,1.466466,1.873460,0.119550
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80958,0.704612,0.602044,0.421721,0.525769,0.397438,1.362708,0.285670,0.679079,0.060115,0.162297,...,2.042380,0.526155,0.870230,0.547946,0.259592,0.506071,0.188364,0.490420,1.051661,0.424045
80959,0.207785,0.882313,0.853951,2.248372,0.377950,1.868718,0.027710,0.037824,0.432519,0.002355,...,0.888428,0.315798,1.203163,0.230864,0.410047,1.079965,1.455479,0.204381,1.584732,1.432753
80960,0.239252,0.848180,0.419910,1.281097,0.235951,0.732951,0.194065,0.064664,0.368393,0.071118,...,0.753681,0.247543,1.084328,0.114132,0.312017,0.077341,0.956637,0.235695,1.850316,1.839262
80961,0.461720,0.124599,0.876571,1.559331,0.484961,0.324873,0.392323,0.332792,0.633564,0.085607,...,1.157800,0.726961,1.090512,0.386779,0.094730,0.724957,0.527739,0.453889,0.985986,1.031839


In [37]:
W = np.matmul(np.array(H),W_attribute.T)

In [38]:
W_logit = (np.exp(W)/np.sum(np.exp(W),axis=1)[:,None])

In [39]:
W_logit

array([[0.00472968, 0.00660566, 0.0071637 , ..., 0.00758422, 0.0131058 ,
        0.0064717 ],
       [0.00614284, 0.00909148, 0.00794041, ..., 0.00764144, 0.00708265,
        0.00730435],
       [0.02463707, 0.01276119, 0.0099947 , ..., 0.00598794, 0.00883059,
        0.00703367],
       ...,
       [0.00517499, 0.00670504, 0.00737468, ..., 0.00915012, 0.00894595,
        0.00714663],
       [0.00613419, 0.0098961 , 0.00944293, ..., 0.00988938, 0.00682114,
        0.00793346],
       [0.00558306, 0.00742625, 0.00811362, ..., 0.0085044 , 0.00890578,
        0.00806659]])

In [16]:
def take_per_row(A, indx, num_elem=1):
    all_indx = indx[:,None] + np.arange(num_elem)
    return A[np.arange(all_indx.shape[0])[:,None], all_indx]

In [40]:
IDX = np.argsort(W_logit,axis=1)
IDX[:,-10:][:,0]

array([37, 77, 13, ..., 56, 89, 64], dtype=int64)

In [41]:
a = take_per_row(W_logit, IDX[:,-10:][:,0])

In [42]:
L_a = W_logit * (pd.DataFrame(W_logit,columns=['ATT_'+a for a in labels_attribute])>=a)
conf = L_a.sum(axis=1)
La = ((L_a + ((1-np.array(conf))/92)[:,None])*(L_a==0) + L_a)
La

Unnamed: 0,ATT_boating,ATT_driving,ATT_biking,ATT_transporting,ATT_sunbathing,ATT_touring,ATT_hiking,ATT_climbing,ATT_camping,ATT_reading,...,ATT_far-away horizon,ATT_no horizon,ATT_rugged scene,ATT_vertical components,ATT_horizontal components,ATT_symmetrical,ATT_cluttered space,ATT_scary,ATT_soothing,ATT_stressful
0,0.007941,0.007941,0.007941,0.007941,0.007941,0.007941,0.007941,0.007941,0.007941,0.007941,...,0.007941,0.041550,0.007941,0.021930,0.007941,0.007941,0.007941,0.007941,0.007941,0.007941
1,0.007538,0.007538,0.007538,0.007538,0.007538,0.020698,0.007538,0.007538,0.007538,0.007538,...,0.007538,0.022183,0.007538,0.027873,0.007538,0.007538,0.007538,0.007538,0.007538,0.007538
2,0.024637,0.007689,0.007689,0.017210,0.007689,0.007689,0.007689,0.007689,0.007689,0.007689,...,0.016379,0.007689,0.007689,0.007689,0.007689,0.007689,0.007689,0.007689,0.007689,0.007689
3,0.021762,0.007393,0.007393,0.007393,0.007393,0.023330,0.007393,0.007393,0.007393,0.007393,...,0.007393,0.015937,0.007393,0.016149,0.007393,0.007393,0.007393,0.007393,0.007393,0.007393
4,0.007826,0.014660,0.007826,0.007826,0.007826,0.014368,0.007826,0.007826,0.007826,0.007826,...,0.007826,0.018777,0.007826,0.018383,0.007826,0.007826,0.007826,0.007826,0.007826,0.007826
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80958,0.007739,0.007739,0.007739,0.007739,0.007739,0.015239,0.007739,0.007739,0.007739,0.007739,...,0.007739,0.016754,0.007739,0.015712,0.012102,0.007739,0.007739,0.007739,0.007739,0.007739
80959,0.007850,0.007850,0.007850,0.007850,0.007850,0.007850,0.007850,0.007850,0.007850,0.007850,...,0.007850,0.063667,0.007850,0.018533,0.007850,0.015999,0.007850,0.007850,0.007850,0.007850
80960,0.007926,0.007926,0.007926,0.007926,0.007926,0.007926,0.007926,0.007926,0.007926,0.007926,...,0.007926,0.040070,0.007926,0.007926,0.007926,0.007926,0.007926,0.007926,0.007926,0.007926
80961,0.008142,0.008142,0.008142,0.008142,0.008142,0.008142,0.008142,0.008142,0.008142,0.008142,...,0.008142,0.048417,0.008142,0.008142,0.008142,0.008142,0.008142,0.008142,0.008142,0.008142


In [28]:
La = pd.DataFrame(k_hot_filter(W_logit,10),columns=['ATT_'+a for a in labels_attribute])
La

Unnamed: 0,ATT_boating,ATT_driving,ATT_biking,ATT_transporting,ATT_sunbathing,ATT_touring,ATT_hiking,ATT_climbing,ATT_camping,ATT_reading,...,ATT_far-away horizon,ATT_no horizon,ATT_rugged scene,ATT_vertical components,ATT_horizontal components,ATT_symmetrical,ATT_cluttered space,ATT_scary,ATT_soothing,ATT_stressful
0,0.019991,0.007074,0.007074,0.007074,0.007074,0.018967,0.007074,0.007074,0.007074,0.007074,...,0.016033,0.007074,0.007074,0.007074,0.007074,0.007074,0.007074,0.007074,0.007074,0.007074
1,0.011022,0.007022,0.007022,0.007022,0.007022,0.013117,0.007022,0.007022,0.007022,0.007022,...,0.024333,0.007022,0.007022,0.012180,0.007022,0.007022,0.007022,0.007022,0.007022,0.007022
2,0.021909,0.007077,0.007077,0.007077,0.007077,0.021507,0.007077,0.007077,0.007077,0.007077,...,0.007077,0.007077,0.007077,0.018033,0.007077,0.007077,0.007077,0.007077,0.007077,0.007077
3,0.008118,0.008118,0.008118,0.008118,0.008118,0.015944,0.008118,0.008118,0.008118,0.008118,...,0.008118,0.035501,0.008118,0.016390,0.008118,0.008118,0.008118,0.008118,0.008118,0.011374
4,0.007887,0.007887,0.007887,0.007887,0.007887,0.007887,0.007887,0.007887,0.007887,0.007887,...,0.007887,0.047680,0.007887,0.007887,0.007887,0.007887,0.007887,0.007887,0.007887,0.007887
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2946,0.008231,0.008231,0.008231,0.008231,0.008231,0.008231,0.008231,0.008231,0.008231,0.008231,...,0.008231,0.047875,0.008231,0.008231,0.008231,0.008231,0.008231,0.008231,0.008231,0.008231
2947,0.007976,0.007976,0.007976,0.007976,0.007976,0.007976,0.007976,0.007976,0.007976,0.007976,...,0.007976,0.059437,0.007976,0.007976,0.007976,0.007976,0.007976,0.007976,0.007976,0.007976
2948,0.007691,0.007691,0.007691,0.007691,0.007691,0.007691,0.007691,0.007691,0.007691,0.007691,...,0.007691,0.042148,0.007691,0.007691,0.007691,0.007691,0.007691,0.007691,0.007691,0.007691
2949,0.008304,0.008304,0.008304,0.008304,0.008304,0.008304,0.008304,0.008304,0.008304,0.008304,...,0.008304,0.032347,0.008304,0.015322,0.013575,0.008304,0.008304,0.008304,0.008304,0.008304


In [43]:
La.sum(axis=1)

0        1.0
1        1.0
2        1.0
3        1.0
4        1.0
        ... 
80958    1.0
80959    1.0
80960    1.0
80961    1.0
80962    1.0
Length: 80963, dtype: float64

In [44]:
Places = IMG_pred_150[IMG_pred_150.columns[21:51]]

In [45]:
Places

Unnamed: 0,Places_p_0,Places_p_1,Places_p_2,Places_p_3,Places_p_4,Places_p_5,Places_p_6,Places_p_7,Places_p_8,Places_p_9,...,Places_ID_0,Places_ID_1,Places_ID_2,Places_ID_3,Places_ID_4,Places_ID_5,Places_ID_6,Places_ID_7,Places_ID_8,Places_ID_9
0,0.140020,0.126312,0.042274,0.034378,0.033253,0.031212,0.025955,0.025221,0.018702,0.018020,...,201,196,10,290,261,109,345,33,272,197
1,0.869308,0.022048,0.011398,0.011329,0.010825,0.010472,0.007895,0.004579,0.003729,0.003200,...,327,108,237,109,91,334,132,230,196,213
2,0.778716,0.043855,0.026737,0.025956,0.015962,0.009945,0.008016,0.006170,0.005180,0.005148,...,79,273,49,78,171,229,348,283,319,218
3,0.835532,0.017288,0.012632,0.011651,0.011080,0.008530,0.008107,0.007858,0.007776,0.006972,...,79,229,218,49,132,353,268,273,78,171
4,0.158031,0.154220,0.126143,0.063206,0.055104,0.046235,0.038958,0.031665,0.026696,0.023611,...,319,109,4,227,270,79,256,189,47,77
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80958,0.140349,0.070128,0.063321,0.046199,0.042463,0.038982,0.029716,0.029220,0.026144,0.020487,...,79,132,77,108,270,213,8,5,256,109
80959,0.918045,0.042664,0.007215,0.006569,0.006335,0.003469,0.002794,0.001578,0.001568,0.001061,...,123,3,41,69,226,90,327,129,12,227
80960,0.174099,0.121597,0.103429,0.098353,0.080807,0.042609,0.028817,0.027616,0.021983,0.017629,...,3,69,123,292,226,13,330,74,41,296
80961,0.153207,0.122745,0.086672,0.081746,0.048086,0.045712,0.041104,0.038536,0.020216,0.019491,...,123,216,227,41,308,43,37,131,129,332


In [46]:
Places_p = Places[Places.columns[:5]]
Places_id = Places[Places.columns[20:25]]

In [47]:
conf = Places_p.sum(axis=1)
conf

0        0.376236
1        0.924908
2        0.891226
3        0.888184
4        0.556705
           ...   
80958    0.362460
80959    0.980830
80960    0.578285
80961    0.492456
80962    0.793029
Length: 80963, dtype: float64

In [48]:
Places_ori = np.zeros((len(Places),len(classes)))

In [49]:
L_s = pd.DataFrame(idx_to_mat(np.array(Places_p), Places_ori, np.array(Places_id), 5),columns=['SCE_'+ a for a in classes])

In [50]:
Ls = ((L_s + ((1-np.array(conf))/360)[:,None])*(L_s==0) + L_s)

In [51]:
Ls

Unnamed: 0,SCE_airfield,SCE_airplane_cabin,SCE_airport_terminal,SCE_alcove,SCE_alley,SCE_amphitheater,SCE_amusement_arcade,SCE_amusement_park,SCE_apartment_building/outdoor,SCE_aquarium,...,SCE_waterfall,SCE_watering_hole,SCE_wave,SCE_wet_bar,SCE_wheat_field,SCE_wind_farm,SCE_windmill,SCE_yard,SCE_youth_hostel,SCE_zen_garden
0,0.001733,0.001733,0.001733,0.001733,0.001733,0.001733,0.001733,0.001733,0.001733,0.001733,...,0.001733,0.001733,0.001733,0.001733,0.001733,0.001733,0.001733,0.001733,0.001733,0.001733
1,0.000209,0.000209,0.000209,0.000209,0.000209,0.000209,0.000209,0.000209,0.000209,0.000209,...,0.000209,0.000209,0.000209,0.000209,0.000209,0.000209,0.000209,0.000209,0.000209,0.000209
2,0.000302,0.000302,0.000302,0.000302,0.000302,0.000302,0.000302,0.000302,0.000302,0.000302,...,0.000302,0.000302,0.000302,0.000302,0.000302,0.000302,0.000302,0.000302,0.000302,0.000302
3,0.000311,0.000311,0.000311,0.000311,0.000311,0.000311,0.000311,0.000311,0.000311,0.000311,...,0.000311,0.000311,0.000311,0.000311,0.000311,0.000311,0.000311,0.000311,0.000311,0.000311
4,0.001231,0.001231,0.001231,0.001231,0.126143,0.001231,0.001231,0.001231,0.001231,0.001231,...,0.001231,0.001231,0.001231,0.001231,0.001231,0.001231,0.001231,0.001231,0.001231,0.001231
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80958,0.001771,0.001771,0.001771,0.001771,0.001771,0.001771,0.001771,0.001771,0.001771,0.001771,...,0.001771,0.001771,0.001771,0.001771,0.001771,0.001771,0.001771,0.001771,0.001771,0.001771
80959,0.000053,0.000053,0.000053,0.042664,0.000053,0.000053,0.000053,0.000053,0.000053,0.000053,...,0.000053,0.000053,0.000053,0.000053,0.000053,0.000053,0.000053,0.000053,0.000053,0.000053
80960,0.001171,0.001171,0.001171,0.174099,0.001171,0.001171,0.001171,0.001171,0.001171,0.001171,...,0.001171,0.001171,0.001171,0.001171,0.001171,0.001171,0.001171,0.001171,0.001171,0.001171
80961,0.001410,0.001410,0.001410,0.001410,0.001410,0.001410,0.001410,0.001410,0.001410,0.001410,...,0.001410,0.001410,0.001410,0.001410,0.001410,0.001410,0.001410,0.001410,0.001410,0.001410


In [52]:
Ls.sum(axis=1)

0        1.0
1        1.0
2        1.0
3        1.0
4        1.0
        ... 
80958    1.0
80959    1.0
80960    1.0
80961    1.0
80962    1.0
Length: 80963, dtype: float64

In [56]:
F = face_pred.set_index('index')[['Face_num_faces', 'Face_prob', 'Face_ratio']]

In [54]:
F = face_pred[['Face_num_faces', 'Face_prob', 'Face_ratio']]

In [57]:
F

Unnamed: 0_level_0,Face_num_faces,Face_prob,Face_ratio
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
50770113131,0,0.0,0.0
50770228312,0,0.0,0.0
50766039303,0,0.0,0.0
50766893587,0,0.0,0.0
50766892772,0,0.0,0.0
...,...,...,...
20879329608,0,0.0,0.0
21040989736,0,0.0,0.0
20880415099,0,0.0,0.0
20879281658,0,0.0,0.0


In [58]:
VIS_FEAT = IMG_pred_150[['ID','IO_type']].merge(H, how='outer', left_index=True, right_index=True).merge(F, how='outer', 
                left_on='ID', right_index=True).merge(Ls, how='outer', left_index=True, right_index=True).merge(La, 
                                                                    how='outer', left_index=True, right_index=True)

In [59]:
VIS_FEAT

Unnamed: 0,ID,IO_type,VisFeat_0,VisFeat_1,VisFeat_2,VisFeat_3,VisFeat_4,VisFeat_5,VisFeat_6,VisFeat_7,...,ATT_far-away horizon,ATT_no horizon,ATT_rugged scene,ATT_vertical components,ATT_horizontal components,ATT_symmetrical,ATT_cluttered space,ATT_scary,ATT_soothing,ATT_stressful
0,50770113131,outdoor,0.095580,1.484140,0.167445,0.336726,0.396595,1.318515,0.116753,0.704350,...,0.007941,0.041550,0.007941,0.021930,0.007941,0.007941,0.007941,0.007941,0.007941,0.007941
1,50770228312,outdoor,0.362849,2.836981,0.030664,0.786215,0.178590,1.163436,0.499891,0.430285,...,0.007538,0.022183,0.007538,0.027873,0.007538,0.007538,0.007538,0.007538,0.007538,0.007538
2,50766039303,outdoor,0.099797,0.947452,0.381367,0.213776,0.151737,0.437884,2.635227,0.394498,...,0.016379,0.007689,0.007689,0.007689,0.007689,0.007689,0.007689,0.007689,0.007689,0.007689
3,50766893587,outdoor,0.316780,0.736990,0.384384,0.187189,0.271150,1.719753,0.928709,0.179578,...,0.007393,0.015937,0.007393,0.016149,0.007393,0.007393,0.007393,0.007393,0.007393,0.007393
4,50766892772,outdoor,0.202310,0.243766,0.980162,0.481957,0.178570,1.774477,0.567979,0.999433,...,0.007826,0.018777,0.007826,0.018383,0.007826,0.007826,0.007826,0.007826,0.007826,0.007826
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80958,20879329608,outdoor,0.704612,0.602044,0.421721,0.525769,0.397438,1.362708,0.285670,0.679079,...,0.007739,0.016754,0.007739,0.015712,0.012102,0.007739,0.007739,0.007739,0.007739,0.007739
80959,21040989736,outdoor,0.207785,0.882313,0.853951,2.248372,0.377950,1.868718,0.027710,0.037824,...,0.007850,0.063667,0.007850,0.018533,0.007850,0.015999,0.007850,0.007850,0.007850,0.007850
80960,20880415099,outdoor,0.239252,0.848180,0.419910,1.281097,0.235951,0.732951,0.194065,0.064664,...,0.007926,0.040070,0.007926,0.007926,0.007926,0.007926,0.007926,0.007926,0.007926,0.007926
80961,20879281658,outdoor,0.461720,0.124599,0.876571,1.559331,0.484961,0.324873,0.392323,0.332792,...,0.008142,0.048417,0.008142,0.008142,0.008142,0.008142,0.008142,0.008142,0.008142,0.008142


In [68]:
VIS_FEAT[VIS_FEAT.columns[882:984]]

Unnamed: 0,ATT_boating,ATT_driving,ATT_biking,ATT_transporting,ATT_sunbathing,ATT_touring,ATT_hiking,ATT_climbing,ATT_camping,ATT_reading,...,ATT_far-away horizon,ATT_no horizon,ATT_rugged scene,ATT_vertical components,ATT_horizontal components,ATT_symmetrical,ATT_cluttered space,ATT_scary,ATT_soothing,ATT_stressful
0,0.007941,0.007941,0.007941,0.007941,0.007941,0.007941,0.007941,0.007941,0.007941,0.007941,...,0.007941,0.041550,0.007941,0.021930,0.007941,0.007941,0.007941,0.007941,0.007941,0.007941
1,0.007538,0.007538,0.007538,0.007538,0.007538,0.020698,0.007538,0.007538,0.007538,0.007538,...,0.007538,0.022183,0.007538,0.027873,0.007538,0.007538,0.007538,0.007538,0.007538,0.007538
2,0.024637,0.007689,0.007689,0.017210,0.007689,0.007689,0.007689,0.007689,0.007689,0.007689,...,0.016379,0.007689,0.007689,0.007689,0.007689,0.007689,0.007689,0.007689,0.007689,0.007689
3,0.021762,0.007393,0.007393,0.007393,0.007393,0.023330,0.007393,0.007393,0.007393,0.007393,...,0.007393,0.015937,0.007393,0.016149,0.007393,0.007393,0.007393,0.007393,0.007393,0.007393
4,0.007826,0.014660,0.007826,0.007826,0.007826,0.014368,0.007826,0.007826,0.007826,0.007826,...,0.007826,0.018777,0.007826,0.018383,0.007826,0.007826,0.007826,0.007826,0.007826,0.007826
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80958,0.007739,0.007739,0.007739,0.007739,0.007739,0.015239,0.007739,0.007739,0.007739,0.007739,...,0.007739,0.016754,0.007739,0.015712,0.012102,0.007739,0.007739,0.007739,0.007739,0.007739
80959,0.007850,0.007850,0.007850,0.007850,0.007850,0.007850,0.007850,0.007850,0.007850,0.007850,...,0.007850,0.063667,0.007850,0.018533,0.007850,0.015999,0.007850,0.007850,0.007850,0.007850
80960,0.007926,0.007926,0.007926,0.007926,0.007926,0.007926,0.007926,0.007926,0.007926,0.007926,...,0.007926,0.040070,0.007926,0.007926,0.007926,0.007926,0.007926,0.007926,0.007926,0.007926
80961,0.008142,0.008142,0.008142,0.008142,0.008142,0.008142,0.008142,0.008142,0.008142,0.008142,...,0.008142,0.048417,0.008142,0.008142,0.008142,0.008142,0.008142,0.008142,0.008142,0.008142


In [324]:
VIS_FEAT.to_csv('Venezia/data_storage/Visual_Features.csv', sep='\t')

In [350]:
VIS_FEAT.to_csv('Amsterdam/data_storage/Visual_Features.csv', sep='\t')

In [377]:
VIS_FEAT.to_csv('Suzhou/data_storage/Visual_Features.csv', sep='\t')

In [60]:
VIS_FEAT.to_csv('data_storage/images/Visual_Features.csv', sep='\t')

## Textual Features

In [6]:
bert = pd.read_csv('Venezia/data_storage/metadata_bert.csv',sep='\t', index_col='Unnamed: 0')

In [121]:
bert = pd.read_csv('Amsterdam/data_storage/metadata_bert.csv',sep='\t', index_col='Unnamed: 0')

In [131]:
bert = pd.read_csv('Suzhou/data_storage/metadata_bert.csv',sep='\t', index_col='Unnamed: 0')

  bert = pd.read_csv('Suzhou/data_storage/metadata_bert.csv',sep='\t', index_col='Unnamed: 0')


In [140]:
bert = pd.read_csv('data_storage/images/metadata_bert.csv',sep='\t', index_col='Unnamed: 0')

  bert = pd.read_csv('data_storage/images/metadata_bert.csv',sep='\t', index_col='Unnamed: 0')


In [8]:
bert

Unnamed: 0,origin,index,candownload,url_c,url_q,url_n,url_largest,owner,owner_loc,title,...,max_3_col,max_1,max_3,valid,max_1_col_agg,max_2_col_agg,max_3_col_agg,max_1_eq,max_3_eq,text_bool
0,3,51870743082,1,https://live.staticflickr.com/65535/5187074308...,https://live.staticflickr.com/65535/5187074308...,https://live.staticflickr.com/65535/5187074308...,https://live.staticflickr.com/65535/5187074308...,47954272@N06,"Tallinn, Estonia",...,...,Criteria i,0.362388,0.787226,True,Criteria ii,Criteria iv,Criteria i,True,1.0,True
1,4,51871789478,1,https://live.staticflickr.com/65535/5187178947...,https://live.staticflickr.com/65535/5187178947...,https://live.staticflickr.com/65535/5187178947...,https://live.staticflickr.com/65535/5187178947...,47954272@N06,"Tallinn, Estonia",...,...,Criteria iv,0.638658,0.898092,True,Criteria ii,Criteria i,Criteria iv,True,1.0,True
2,5,51870742942,1,https://live.staticflickr.com/65535/5187074294...,https://live.staticflickr.com/65535/5187074294...,https://live.staticflickr.com/65535/5187074294...,https://live.staticflickr.com/65535/5187074294...,47954272@N06,"Tallinn, Estonia",...,...,Criteria iv,0.396352,0.812845,True,Criteria ii,Criteria i,Criteria iv,True,1.0,True
3,6,51870380541,1,https://live.staticflickr.com/65535/5187038054...,https://live.staticflickr.com/65535/5187038054...,https://live.staticflickr.com/65535/5187038054...,https://live.staticflickr.com/65535/5187038054...,64148082@N02,,VENEZIA. CARNEVALE 2019,...,Criteria iv,0.823930,0.930945,True,Criteria vi,Criteria iii,Criteria i,True,0.5,True
4,13,51863124066,1,https://live.staticflickr.com/65535/5186312406...,https://live.staticflickr.com/65535/5186312406...,https://live.staticflickr.com/65535/5186312406...,https://live.staticflickr.com/65535/5186312406...,186704588@N07,,Venice - Ponte dei Sospiri,...,Criteria i,0.337761,0.871130,True,Criteria iv,Criteria i,Criteria vi,False,1.0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2946,4981,51233430558,1,https://live.staticflickr.com/65535/5123343055...,https://live.staticflickr.com/65535/5123343055...,https://live.staticflickr.com/65535/5123343055...,https://live.staticflickr.com/65535/5123343055...,83293103@N00,"Milano, Italy",20210520_145859,...,Criteria iv,0.571989,0.903156,True,Criteria i,Criteria ii,Criteria iv,True,1.0,True
2947,4982,51234286630,1,https://live.staticflickr.com/65535/5123428663...,https://live.staticflickr.com/65535/5123428663...,https://live.staticflickr.com/65535/5123428663...,https://live.staticflickr.com/65535/5123428663...,83293103@N00,"Milano, Italy",20210520_145745,...,Criteria iv,0.571989,0.903156,True,Criteria i,Criteria ii,Criteria iv,True,1.0,True
2948,4983,51234294135,1,https://live.staticflickr.com/65535/5123429413...,https://live.staticflickr.com/65535/5123429413...,https://live.staticflickr.com/65535/5123429413...,https://live.staticflickr.com/65535/5123429413...,83293103@N00,"Milano, Italy",20210520_164727,...,Criteria iv,0.571989,0.903156,True,Criteria i,Criteria ii,Criteria iv,True,1.0,True
2949,4984,51234291635,1,https://live.staticflickr.com/65535/5123429163...,https://live.staticflickr.com/65535/5123429163...,https://live.staticflickr.com/65535/5123429163...,https://live.staticflickr.com/65535/5123429163...,83293103@N00,"Milano, Italy",20210520_155646,...,Criteria iv,0.571989,0.903156,True,Criteria i,Criteria ii,Criteria iv,True,1.0,True


In [7]:
bert['text_bool'] = bert['max_3_eq']>-1

In [143]:
O = bert[bert['text_bool']][['English', 'Italian', 'Other_Lang']].rename(columns={'Italian':'Local_Lang'})
O

Unnamed: 0,English,Local_Lang,Other_Lang
0,0.0,0.0,0.0
1,0.0,0.0,0.0
2,0.0,0.0,0.0
3,0.0,0.0,0.0
4,0.0,0.0,0.0
...,...,...,...
80958,0.0,0.0,0.0
80959,0.0,0.0,0.0
80960,0.0,0.0,0.0
80961,0.0,0.0,0.0


In [126]:
O = bert[bert['text_bool']][['English', 'Dutch', 'Other_Lang']].rename(columns={'Dutch':'Local_Lang'})
O

Unnamed: 0,English,Local_Lang,Other_Lang
0,1.0,0.0,0.0
1,1.0,0.0,0.0
2,1.0,1.0,0.0
3,1.0,1.0,0.0
4,1.0,1.0,1.0
...,...,...,...
3720,0.0,1.0,0.0
3721,1.0,0.0,0.0
3722,1.0,1.0,1.0
3723,0.0,0.0,1.0


In [135]:
O = bert[bert['text_bool']][['English', 'Chinese', 'Other_Lang']].rename(columns={'Chinese':'Local_Lang'})
O

Unnamed: 0,English,Local_Lang,Other_Lang
0,0.0,1.0,0.0
5,0.0,1.0,0.0
8,0.0,1.0,0.0
15,0.0,1.0,0.0
92,1.0,0.0,0.0
...,...,...,...
3041,1.0,0.0,1.0
3048,1.0,0.0,0.0
3049,1.0,0.0,0.0
3050,1.0,0.0,1.0


In [155]:
B = bert[bert['text_bool']][bert.columns[43:811]]
B

Unnamed: 0,BERT_0,BERT_1,BERT_2,BERT_3,BERT_4,BERT_5,BERT_6,BERT_7,BERT_8,BERT_9,...,BERT_758,BERT_759,BERT_760,BERT_761,BERT_762,BERT_763,BERT_764,BERT_765,BERT_766,BERT_767
0,-0.725867,0.645667,-0.835403,0.396469,0.699823,0.583684,0.684937,-0.300641,-0.861769,-0.870342,...,0.446480,-0.160617,0.825009,-0.447969,0.968792,0.137899,0.621971,-0.179741,0.535175,0.448510
1,-0.687322,0.634107,-0.852408,0.411906,0.678268,0.560558,0.701253,-0.255225,-0.884209,-0.848714,...,0.514765,-0.176437,0.816575,-0.499717,0.964874,0.072426,0.673002,-0.329150,0.561132,0.377601
2,0.152785,0.484428,-0.967312,0.135312,0.760674,-0.058021,-0.004875,-0.202761,-0.989303,0.906264,...,-0.332770,-0.849182,0.997512,-0.663447,-0.436441,-0.705778,0.317978,-0.675741,0.461170,-0.678003
3,0.883628,0.647049,-0.881726,-0.699901,0.148423,0.216855,-0.859930,-0.231999,-0.871000,0.998952,...,-0.328970,-0.446491,0.777226,-0.911606,0.338391,-0.653141,0.268055,-0.048600,0.579666,-0.933288
4,0.828631,0.395096,-0.950549,-0.189191,0.378434,0.155617,-0.158817,0.305665,-0.802254,0.809712,...,0.423444,0.278416,0.177207,-0.906934,0.576211,-0.770738,0.516666,-0.651380,0.474633,-0.865077
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80958,-0.602327,0.844247,0.003864,0.583110,-0.078405,0.370737,0.929915,-0.734345,-0.319077,-0.520302,...,0.685031,-0.024899,-0.167808,0.101503,0.905999,0.569518,0.783320,0.749429,0.866267,0.295226
80959,-0.807686,0.784066,0.898045,0.665245,-0.414406,0.384306,0.977027,-0.796748,0.615145,-0.209704,...,0.744247,0.639540,-0.796536,0.606640,0.959838,0.869317,0.683151,0.922499,0.816869,0.615930
80960,-0.815055,0.762962,-0.031195,0.752968,0.215688,0.283594,0.972989,-0.798809,-0.394815,-0.792138,...,0.576397,-0.241379,0.515048,0.559288,0.912735,0.500433,0.525574,0.677767,0.837013,0.358870
80961,-0.787440,0.802281,0.809869,0.402350,-0.299049,0.475018,0.927901,-0.776222,0.365500,-0.290354,...,0.778824,0.259840,-0.762688,0.556807,0.953909,0.842399,0.703403,0.912806,0.877418,0.477641


In [144]:
B = bert[bert['text_bool']][bert.columns[46:814]]
B

Unnamed: 0,BERT_3,BERT_4,BERT_5,BERT_6,BERT_7,BERT_8,BERT_9,BERT_10,BERT_11,BERT_12,...,BERT_761,BERT_762,BERT_763,BERT_764,BERT_765,BERT_766,BERT_767,max_1_val,max_1_col,max_2_val
0,0.396469,0.699823,0.583684,0.684937,-0.300641,-0.861769,-0.870342,-0.270273,0.392132,0.718036,...,-0.447969,0.968792,0.137899,0.621971,-0.179741,0.535175,0.448510,0.624199,Criteria iv,0.183747
1,0.411906,0.678268,0.560558,0.701253,-0.255225,-0.884209,-0.848714,-0.266009,0.386042,0.624316,...,-0.499717,0.964874,0.072426,0.673002,-0.329150,0.561132,0.377601,0.690831,Criteria iv,0.130004
2,0.135312,0.760674,-0.058021,-0.004875,-0.202761,-0.989303,0.906264,-0.698797,0.887834,-0.372035,...,-0.663447,-0.436441,-0.705778,0.317978,-0.675741,0.461170,-0.678003,0.687655,Criteria vi,0.083139
3,-0.699901,0.148423,0.216855,-0.859930,-0.231999,-0.871000,0.998952,-0.291405,0.083433,-0.857605,...,-0.911606,0.338391,-0.653141,0.268055,-0.048600,0.579666,-0.933288,0.390900,Criteria vi,0.228499
4,-0.189191,0.378434,0.155617,-0.158817,0.305665,-0.802254,0.809712,-0.262057,0.104558,-0.900556,...,-0.906934,0.576211,-0.770738,0.516666,-0.651380,0.474633,-0.865077,0.594140,Criteria iv,0.166905
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80958,0.583110,-0.078405,0.370737,0.929915,-0.734345,-0.319077,-0.520302,0.453560,-0.612344,0.504709,...,0.101503,0.905999,0.569518,0.783320,0.749429,0.866267,0.295226,0.390654,Criteria ii,0.383132
80959,0.665245,-0.414406,0.384306,0.977027,-0.796748,0.615145,-0.209704,0.617512,-0.861686,0.913866,...,0.606640,0.959838,0.869317,0.683151,0.922499,0.816869,0.615930,0.511271,Criteria ii,0.276984
80960,0.752968,0.215688,0.283594,0.972989,-0.798809,-0.394815,-0.792138,0.063225,-0.119373,0.889248,...,0.559288,0.912735,0.500433,0.525574,0.677767,0.837013,0.358870,0.437061,Criteria i,0.254000
80961,0.402350,-0.299049,0.475018,0.927901,-0.776222,0.365500,-0.290354,0.544445,-0.886663,0.845405,...,0.556807,0.953909,0.842399,0.703403,0.912806,0.877418,0.477641,0.548089,Criteria ii,0.236939


In [156]:
TEX_FEAT = bert[['index','text_bool','revised_text', 'num_sent', 'text_len']].merge(B, how='left', left_index=True, 
            right_index=True).merge(O, how='left',left_index=True, right_index=True)
TEX_FEAT

Unnamed: 0,index,text_bool,revised_text,num_sent,text_len,BERT_0,BERT_1,BERT_2,BERT_3,BERT_4,...,BERT_761,BERT_762,BERT_763,BERT_764,BERT_765,BERT_766,BERT_767,English,Local_Lang,Other_Lang
0,6,True,It has title of Venice - Minelli Palace Courty...,12.0,236.0,-0.725867,0.645667,-0.835403,0.396469,0.699823,...,-0.447969,0.968792,0.137899,0.621971,-0.179741,0.535175,0.448510,0.0,0.0,0.0
1,7,True,It has title of Venice - Minelli Palace. It ha...,12.0,234.0,-0.687322,0.634107,-0.852408,0.411906,0.678268,...,-0.499717,0.964874,0.072426,0.673002,-0.329150,0.561132,0.377601,0.0,0.0,0.0
2,10,True,"It has tags of canalgrande, canal, bridge, pie...",6.0,84.0,0.152785,0.484428,-0.967312,0.135312,0.760674,...,-0.663447,-0.436441,-0.705778,0.317978,-0.675741,0.461170,-0.678003,0.0,0.0,0.0
3,11,True,"It has tags of pier, italy, venice, gondola, c...",4.0,80.0,0.883628,0.647049,-0.881726,-0.699901,0.148423,...,-0.911606,0.338391,-0.653141,0.268055,-0.048600,0.579666,-0.933288,0.0,0.0,0.0
4,12,True,"It has tags of commercialbuildings, italy, ven...",4.0,76.0,0.828631,0.395096,-0.950549,-0.189191,0.378434,...,-0.906934,0.576211,-0.770738,0.516666,-0.651380,0.474633,-0.865077,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80958,123096,True,It has title of Here the packages are delivere...,2.0,35.0,-0.602327,0.844247,0.003864,0.583110,-0.078405,...,0.101503,0.905999,0.569518,0.783320,0.749429,0.866267,0.295226,0.0,0.0,0.0
80959,123097,True,It has title of Door No.342. It has tags of Ve...,2.0,25.0,-0.807686,0.784066,0.898045,0.665245,-0.414406,...,0.606640,0.959838,0.869317,0.683151,0.922499,0.816869,0.615930,0.0,0.0,0.0
80960,123098,True,It has title of Ave Maria. It has tags of Vene...,2.0,26.0,-0.815055,0.762962,-0.031195,0.752968,0.215688,...,0.559288,0.912735,0.500433,0.525574,0.677767,0.837013,0.358870,0.0,0.0,0.0
80961,123099,True,It has title of Kirchenportal der Church of Sa...,2.0,33.0,-0.787440,0.802281,0.809869,0.402350,-0.299049,...,0.556807,0.953909,0.842399,0.703403,0.912806,0.877418,0.477641,0.0,0.0,0.0


In [157]:
TEX_FEAT = bert[['index.1','text_bool','revised_text', 'num_sent', 'text_len']].merge(B, how='left', left_index=True, 
            right_index=True).merge(O, how='left',left_index=True, right_index=True).rename(columns={'index.1':'index'})
TEX_FEAT

Unnamed: 0,index,text_bool,revised_text,num_sent,text_len,BERT_0,BERT_1,BERT_2,BERT_3,BERT_4,...,BERT_761,BERT_762,BERT_763,BERT_764,BERT_765,BERT_766,BERT_767,English,Local_Lang,Other_Lang
0,50770113131,True,It has title of Venice - Minelli Palace Courty...,12.0,236.0,-0.725867,0.645667,-0.835403,0.396469,0.699823,...,-0.447969,0.968792,0.137899,0.621971,-0.179741,0.535175,0.448510,0.0,0.0,0.0
1,50770228312,True,It has title of Venice - Minelli Palace. It ha...,12.0,234.0,-0.687322,0.634107,-0.852408,0.411906,0.678268,...,-0.499717,0.964874,0.072426,0.673002,-0.329150,0.561132,0.377601,0.0,0.0,0.0
2,50766039303,True,"It has tags of canalgrande, canal, bridge, pie...",6.0,84.0,0.152785,0.484428,-0.967312,0.135312,0.760674,...,-0.663447,-0.436441,-0.705778,0.317978,-0.675741,0.461170,-0.678003,0.0,0.0,0.0
3,50766893587,True,"It has tags of pier, italy, venice, gondola, c...",4.0,80.0,0.883628,0.647049,-0.881726,-0.699901,0.148423,...,-0.911606,0.338391,-0.653141,0.268055,-0.048600,0.579666,-0.933288,0.0,0.0,0.0
4,50766892772,True,"It has tags of commercialbuildings, italy, ven...",4.0,76.0,0.828631,0.395096,-0.950549,-0.189191,0.378434,...,-0.906934,0.576211,-0.770738,0.516666,-0.651380,0.474633,-0.865077,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80958,20879329608,True,It has title of Here the packages are delivere...,2.0,35.0,-0.602327,0.844247,0.003864,0.583110,-0.078405,...,0.101503,0.905999,0.569518,0.783320,0.749429,0.866267,0.295226,0.0,0.0,0.0
80959,21040989736,True,It has title of Door No.342. It has tags of Ve...,2.0,25.0,-0.807686,0.784066,0.898045,0.665245,-0.414406,...,0.606640,0.959838,0.869317,0.683151,0.922499,0.816869,0.615930,0.0,0.0,0.0
80960,20880415099,True,It has title of Ave Maria. It has tags of Vene...,2.0,26.0,-0.815055,0.762962,-0.031195,0.752968,0.215688,...,0.559288,0.912735,0.500433,0.525574,0.677767,0.837013,0.358870,0.0,0.0,0.0
80961,20879281658,True,It has title of Kirchenportal der Church of Sa...,2.0,33.0,-0.787440,0.802281,0.809869,0.402350,-0.299049,...,0.556807,0.953909,0.842399,0.703403,0.912806,0.877418,0.477641,0.0,0.0,0.0


In [158]:
TEX_FEAT[TEX_FEAT['text_bool']]

Unnamed: 0,index,text_bool,revised_text,num_sent,text_len,BERT_0,BERT_1,BERT_2,BERT_3,BERT_4,...,BERT_761,BERT_762,BERT_763,BERT_764,BERT_765,BERT_766,BERT_767,English,Local_Lang,Other_Lang
0,50770113131,True,It has title of Venice - Minelli Palace Courty...,12.0,236.0,-0.725867,0.645667,-0.835403,0.396469,0.699823,...,-0.447969,0.968792,0.137899,0.621971,-0.179741,0.535175,0.448510,0.0,0.0,0.0
1,50770228312,True,It has title of Venice - Minelli Palace. It ha...,12.0,234.0,-0.687322,0.634107,-0.852408,0.411906,0.678268,...,-0.499717,0.964874,0.072426,0.673002,-0.329150,0.561132,0.377601,0.0,0.0,0.0
2,50766039303,True,"It has tags of canalgrande, canal, bridge, pie...",6.0,84.0,0.152785,0.484428,-0.967312,0.135312,0.760674,...,-0.663447,-0.436441,-0.705778,0.317978,-0.675741,0.461170,-0.678003,0.0,0.0,0.0
3,50766893587,True,"It has tags of pier, italy, venice, gondola, c...",4.0,80.0,0.883628,0.647049,-0.881726,-0.699901,0.148423,...,-0.911606,0.338391,-0.653141,0.268055,-0.048600,0.579666,-0.933288,0.0,0.0,0.0
4,50766892772,True,"It has tags of commercialbuildings, italy, ven...",4.0,76.0,0.828631,0.395096,-0.950549,-0.189191,0.378434,...,-0.906934,0.576211,-0.770738,0.516666,-0.651380,0.474633,-0.865077,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80958,20879329608,True,It has title of Here the packages are delivere...,2.0,35.0,-0.602327,0.844247,0.003864,0.583110,-0.078405,...,0.101503,0.905999,0.569518,0.783320,0.749429,0.866267,0.295226,0.0,0.0,0.0
80959,21040989736,True,It has title of Door No.342. It has tags of Ve...,2.0,25.0,-0.807686,0.784066,0.898045,0.665245,-0.414406,...,0.606640,0.959838,0.869317,0.683151,0.922499,0.816869,0.615930,0.0,0.0,0.0
80960,20880415099,True,It has title of Ave Maria. It has tags of Vene...,2.0,26.0,-0.815055,0.762962,-0.031195,0.752968,0.215688,...,0.559288,0.912735,0.500433,0.525574,0.677767,0.837013,0.358870,0.0,0.0,0.0
80961,20879281658,True,It has title of Kirchenportal der Church of Sa...,2.0,33.0,-0.787440,0.802281,0.809869,0.402350,-0.299049,...,0.556807,0.953909,0.842399,0.703403,0.912806,0.877418,0.477641,0.0,0.0,0.0


In [164]:
TEX_FEAT[TEX_FEAT.columns[773:776]]

Unnamed: 0,English,Local_Lang,Other_Lang
0,0.0,0.0,0.0
1,0.0,0.0,0.0
2,0.0,0.0,0.0
3,0.0,0.0,0.0
4,0.0,0.0,0.0
...,...,...,...
80958,0.0,0.0,0.0
80959,0.0,0.0,0.0
80960,0.0,0.0,0.0
80961,0.0,0.0,0.0


In [112]:
TEX_FEAT.to_csv('Venezia/data_storage/Textual_Features.csv', sep='\t')

In [130]:
TEX_FEAT.to_csv('Amsterdam/data_storage/Textual_Features.csv', sep='\t')

In [139]:
TEX_FEAT.to_csv('Suzhou/data_storage/Textual_Features.csv', sep='\t')

In [162]:
TEX_FEAT.to_csv('data_storage/images/Textual_Features.csv', sep='\t')

## Heritage Values Labels

In [24]:
classes = ['Criteria i', 'Criteria ii', 'Criteria iii', 'Criteria iv', 'Criteria v', 'Criteria vi', 
              'Criteria vii', 'Criteria viii', 'Criteria ix', 'Criteria x', 'Others']

In [6]:
bert = pd.read_csv('Venezia/data_storage/metadata_bert.csv',sep='\t', index_col='Unnamed: 0')

In [14]:
ulmfit = pd.read_csv('Venezia/data_storage/metadata_ulmfit.csv',sep='\t', index_col='Unnamed: 0')

In [4]:
cmp = pd.read_csv('Venezia/data_storage/metadata_compare.csv',sep='\t',index_col='Unnamed: 0')

#cmp = cmp.merge(bert[['max_1_eq','max_3_eq']], how='right',left_index=True, right_index=True)

cmp['conf_3'] = (cmp['max_3_ulmfit'] + cmp['max_3_bert'])/2
cmp['conf_1'] = (cmp['max_1_ulmfit'] + cmp['max_1_bert'])/2

In [53]:
bert = pd.read_csv('Amsterdam/data_storage/metadata_bert.csv',sep='\t', index_col='Unnamed: 0')

In [61]:
ulmfit = pd.read_csv('Amsterdam/data_storage/metadata_ulmfit.csv',sep='\t', index_col='Unnamed: 0')

In [65]:
cmp = pd.read_csv('Amsterdam/data_storage/metadata_compare.csv',sep='\t',index_col='Unnamed: 0')

#cmp = cmp.merge(bert[['max_1_eq','max_3_eq']], how='right',left_index=True, right_index=True)

cmp['conf_3'] = (cmp['max_3_ulmfit'] + cmp['max_3_bert'])/2
cmp['conf_1'] = (cmp['max_1_ulmfit'] + cmp['max_1_bert'])/2

In [74]:
bert = pd.read_csv('Suzhou/data_storage/metadata_bert.csv',sep='\t', index_col='Unnamed: 0')

  bert = pd.read_csv('Suzhou/data_storage/metadata_bert.csv',sep='\t', index_col='Unnamed: 0')


In [75]:
ulmfit = pd.read_csv('Suzhou/data_storage/metadata_ulmfit.csv',sep='\t', index_col='Unnamed: 0')

In [76]:
cmp = pd.read_csv('Suzhou/data_storage/metadata_compare.csv',sep='\t',index_col='Unnamed: 0')

#cmp = cmp.merge(bert[['max_1_eq','max_3_eq']], how='right',left_index=True, right_index=True)

cmp['conf_3'] = (cmp['max_3_ulmfit'] + cmp['max_3_bert'])/2
cmp['conf_1'] = (cmp['max_1_ulmfit'] + cmp['max_1_bert'])/2

In [92]:
bert = pd.read_csv('data_storage/images/metadata_bert.csv',sep='\t', index_col='Unnamed: 0')

  bert = pd.read_csv('data_storage/images/metadata_bert.csv',sep='\t', index_col='Unnamed: 0')


In [91]:
ulmfit = pd.read_csv('data_storage/images/metadata_ulmfit.csv',sep='\t', index_col='Unnamed: 0')

In [93]:
cmp = pd.read_csv('data_storage/images/metadata_compare.csv',sep='\t',index_col='Unnamed: 0')

#cmp = cmp.merge(bert[['max_1_eq','max_3_eq']], how='right',left_index=True, right_index=True)

cmp['conf_3'] = (cmp['max_3_ulmfit'] + cmp['max_3_bert'])/2
cmp['conf_1'] = (cmp['max_1_ulmfit'] + cmp['max_1_bert'])/2

In [95]:
bert['text_bool'] = bert['max_3_eq']>-1

In [94]:
bert_l = bert[bert.columns[35:46]]

In [96]:
ulmfit_l = ulmfit[ulmfit.columns[35:46]]

In [99]:
bert_l = bert[bert.columns[32:43]]

In [100]:
ulmfit_l = ulmfit[ulmfit.columns[32:43]]

In [101]:
bert_l

Unnamed: 0,Criteria i,Criteria ii,Criteria iii,Criteria iv,Criteria v,Criteria vi,Criteria vii,Criteria viii,Criteria ix,Criteria x,Others
0,0.183747,0.022853,0.083780,0.624199,0.009987,0.045060,0.007873,0.002240,0.002382,0.003424,0.014457
1,0.130004,0.021327,0.083284,0.690831,0.009521,0.039857,0.005688,0.001748,0.001966,0.002678,0.013094
2,0.022963,0.017869,0.070070,0.083139,0.029432,0.687655,0.030171,0.007955,0.015020,0.017473,0.018253
3,0.019122,0.088968,0.111985,0.228499,0.078189,0.390900,0.020495,0.006239,0.020129,0.006759,0.028715
4,0.009118,0.043270,0.073843,0.594140,0.166905,0.072603,0.005750,0.002411,0.008906,0.004332,0.018721
...,...,...,...,...,...,...,...,...,...,...,...
80958,0.121423,0.390654,0.032405,0.383132,0.007551,0.042279,0.001727,0.001719,0.001826,0.001700,0.015584
80959,0.276984,0.511271,0.049984,0.114366,0.003641,0.014104,0.002231,0.003422,0.002994,0.003382,0.017621
80960,0.437061,0.254000,0.071049,0.140171,0.003359,0.066108,0.002784,0.002676,0.002237,0.002568,0.017988
80961,0.236939,0.548089,0.029292,0.130852,0.003584,0.022139,0.002243,0.003827,0.002795,0.002883,0.017357


In [102]:
ulmfit_l

Unnamed: 0,Criteria i,Criteria ii,Criteria iii,Criteria iv,Criteria v,Criteria vi,Criteria vii,Criteria viii,Criteria ix,Criteria x,Others
0,0.300038,0.105972,0.110439,0.391179,0.042873,0.043235,0.002022,0.001575,0.001025,0.001220,0.000422
1,0.300537,0.105456,0.111514,0.387872,0.043060,0.045290,0.002039,0.001567,0.001015,0.001215,0.000434
2,0.183531,0.225481,0.083111,0.449886,0.008262,0.042647,0.002100,0.002684,0.001059,0.001152,0.000087
3,0.085485,0.132468,0.205980,0.378402,0.047569,0.071661,0.040116,0.015467,0.009485,0.013226,0.000141
4,0.045470,0.187583,0.247151,0.351008,0.074293,0.075992,0.007469,0.002897,0.003285,0.004723,0.000129
...,...,...,...,...,...,...,...,...,...,...,...
80958,0.014668,0.161494,0.416799,0.236805,0.074207,0.085732,0.002166,0.002078,0.003267,0.002727,0.000056
80959,0.025120,0.297634,0.334180,0.136499,0.015375,0.185791,0.001277,0.001423,0.001244,0.001374,0.000083
80960,0.027727,0.222477,0.365697,0.173320,0.017452,0.184820,0.001975,0.002437,0.002078,0.001947,0.000069
80961,0.049469,0.091807,0.350125,0.163804,0.015471,0.322296,0.002075,0.001710,0.001439,0.001700,0.000105


In [103]:
v_labels = (bert_l + ulmfit_l)/2
v_labels

Unnamed: 0,Criteria i,Criteria ii,Criteria iii,Criteria iv,Criteria v,Criteria vi,Criteria vii,Criteria viii,Criteria ix,Criteria x,Others
0,0.241893,0.064412,0.097109,0.507689,0.026430,0.044148,0.004947,0.001907,0.001703,0.002322,0.007439
1,0.215271,0.063392,0.097399,0.539351,0.026291,0.042574,0.003864,0.001658,0.001491,0.001947,0.006764
2,0.103247,0.121675,0.076590,0.266513,0.018847,0.365151,0.016136,0.005319,0.008040,0.009312,0.009170
3,0.052303,0.110718,0.158982,0.303451,0.062879,0.231280,0.030306,0.010853,0.014807,0.009993,0.014428
4,0.027294,0.115427,0.160497,0.472574,0.120599,0.074298,0.006610,0.002654,0.006096,0.004527,0.009425
...,...,...,...,...,...,...,...,...,...,...,...
80958,0.068046,0.276074,0.224602,0.309969,0.040879,0.064006,0.001947,0.001898,0.002546,0.002214,0.007820
80959,0.151052,0.404452,0.192082,0.125432,0.009508,0.099948,0.001754,0.002422,0.002119,0.002378,0.008852
80960,0.232394,0.238239,0.218373,0.156745,0.010405,0.125464,0.002380,0.002557,0.002157,0.002258,0.009029
80961,0.143204,0.319948,0.189708,0.147328,0.009527,0.172217,0.002159,0.002768,0.002117,0.002292,0.008731


In [104]:
a = v_labels[classes].to_numpy().argsort()[:, :-4:-1]
c = np.array(classes)[a]
d = v_labels[classes].to_numpy()[np.arange(a.shape[0])[:, None], a]

df1 = pd.DataFrame(c).rename(columns=lambda x : f'max_{x+1}_col')
df2 = pd.DataFrame(d).rename(columns=lambda x : f'max_{x+1}_val')
c = v_labels.columns.tolist() + [y for x in zip(df2.columns, df1.columns) for y in x]

v_labels = pd.concat([v_labels, df1, df2], axis=1).reindex(c, axis=1)

v_labels['max_1'] = v_labels['max_1_val']
v_labels['max_3'] = v_labels['max_1_val']+v_labels['max_2_val']+v_labels['max_3_val']
v_labels

Unnamed: 0,Criteria i,Criteria ii,Criteria iii,Criteria iv,Criteria v,Criteria vi,Criteria vii,Criteria viii,Criteria ix,Criteria x,Others,max_1_val,max_1_col,max_2_val,max_2_col,max_3_val,max_3_col,max_1,max_3
0,0.241893,0.064412,0.097109,0.507689,0.026430,0.044148,0.004947,0.001907,0.001703,0.002322,0.007439,0.507689,Criteria iv,0.241893,Criteria i,0.097109,Criteria iii,0.507689,0.846691
1,0.215271,0.063392,0.097399,0.539351,0.026291,0.042574,0.003864,0.001658,0.001491,0.001947,0.006764,0.539351,Criteria iv,0.215271,Criteria i,0.097399,Criteria iii,0.539351,0.852021
2,0.103247,0.121675,0.076590,0.266513,0.018847,0.365151,0.016136,0.005319,0.008040,0.009312,0.009170,0.365151,Criteria vi,0.266513,Criteria iv,0.121675,Criteria ii,0.365151,0.753339
3,0.052303,0.110718,0.158982,0.303451,0.062879,0.231280,0.030306,0.010853,0.014807,0.009993,0.014428,0.303451,Criteria iv,0.231280,Criteria vi,0.158982,Criteria iii,0.303451,0.693713
4,0.027294,0.115427,0.160497,0.472574,0.120599,0.074298,0.006610,0.002654,0.006096,0.004527,0.009425,0.472574,Criteria iv,0.160497,Criteria iii,0.120599,Criteria v,0.472574,0.753670
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80958,0.068046,0.276074,0.224602,0.309969,0.040879,0.064006,0.001947,0.001898,0.002546,0.002214,0.007820,0.309969,Criteria iv,0.276074,Criteria ii,0.224602,Criteria iii,0.309969,0.810644
80959,0.151052,0.404452,0.192082,0.125432,0.009508,0.099948,0.001754,0.002422,0.002119,0.002378,0.008852,0.404452,Criteria ii,0.192082,Criteria iii,0.151052,Criteria i,0.404452,0.747587
80960,0.232394,0.238239,0.218373,0.156745,0.010405,0.125464,0.002380,0.002557,0.002157,0.002258,0.009029,0.238239,Criteria ii,0.232394,Criteria i,0.218373,Criteria iii,0.238239,0.689006
80961,0.143204,0.319948,0.189708,0.147328,0.009527,0.172217,0.002159,0.002768,0.002117,0.002292,0.008731,0.319948,Criteria ii,0.189708,Criteria iii,0.172217,Criteria vi,0.319948,0.681873


In [105]:
cmp

Unnamed: 0,origin,index,max_1_col_ulmfit,max_2_col_ulmfit,max_3_col_ulmfit,max_1_ulmfit,max_3_ulmfit,max_1_col_bert,max_2_col_bert,max_3_col_bert,max_1_bert,max_3_bert,same_1,same_3,conf_3,conf_1
0,6,6,Criteria iv,Criteria i,Criteria iii,0.391179,0.801656,Criteria iv,Criteria i,Criteria iii,0.624199,0.891726,True,1.0,0.846691,0.507689
1,7,7,Criteria iv,Criteria i,Criteria iii,0.387872,0.799923,Criteria iv,Criteria i,Criteria iii,0.690831,0.904119,True,1.0,0.852021,0.539351
2,10,10,Criteria iv,Criteria ii,Criteria i,0.449886,0.858898,Criteria vi,Criteria iv,Criteria iii,0.687655,0.840863,False,0.2,0.849881,0.568770
3,11,11,Criteria iv,Criteria iii,Criteria ii,0.378402,0.716850,Criteria vi,Criteria iv,Criteria iii,0.390900,0.731383,False,0.5,0.724117,0.384651
4,12,12,Criteria iv,Criteria iii,Criteria ii,0.351008,0.785742,Criteria iv,Criteria v,Criteria iii,0.594140,0.834888,True,0.5,0.810315,0.472574
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80958,116670,123096,Criteria iii,Criteria iv,Criteria ii,0.416799,0.815098,Criteria ii,Criteria iv,Criteria i,0.390654,0.895209,False,0.5,0.855154,0.403726
80959,116671,123097,Criteria iii,Criteria ii,Criteria vi,0.334180,0.817606,Criteria ii,Criteria i,Criteria iv,0.511271,0.902621,False,0.2,0.860113,0.422726
80960,116672,123098,Criteria iii,Criteria ii,Criteria vi,0.365697,0.772994,Criteria i,Criteria ii,Criteria iv,0.437061,0.831232,False,0.2,0.802113,0.401379
80961,116673,123099,Criteria iii,Criteria vi,Criteria iv,0.350125,0.836224,Criteria ii,Criteria i,Criteria iv,0.548089,0.915880,False,0.2,0.876052,0.449107


In [106]:
v_labels['conf_3'] = cmp['conf_3']
v_labels['conf_1'] = cmp['conf_1']
v_labels['same_3'] = cmp['same_3']
v_labels['same_1'] = cmp['same_1']

In [107]:
v_labels

Unnamed: 0,Criteria i,Criteria ii,Criteria iii,Criteria iv,Criteria v,Criteria vi,Criteria vii,Criteria viii,Criteria ix,Criteria x,...,max_2_val,max_2_col,max_3_val,max_3_col,max_1,max_3,conf_3,conf_1,same_3,same_1
0,0.241893,0.064412,0.097109,0.507689,0.026430,0.044148,0.004947,0.001907,0.001703,0.002322,...,0.241893,Criteria i,0.097109,Criteria iii,0.507689,0.846691,0.846691,0.507689,1.0,True
1,0.215271,0.063392,0.097399,0.539351,0.026291,0.042574,0.003864,0.001658,0.001491,0.001947,...,0.215271,Criteria i,0.097399,Criteria iii,0.539351,0.852021,0.852021,0.539351,1.0,True
2,0.103247,0.121675,0.076590,0.266513,0.018847,0.365151,0.016136,0.005319,0.008040,0.009312,...,0.266513,Criteria iv,0.121675,Criteria ii,0.365151,0.753339,0.849881,0.568770,0.2,False
3,0.052303,0.110718,0.158982,0.303451,0.062879,0.231280,0.030306,0.010853,0.014807,0.009993,...,0.231280,Criteria vi,0.158982,Criteria iii,0.303451,0.693713,0.724117,0.384651,0.5,False
4,0.027294,0.115427,0.160497,0.472574,0.120599,0.074298,0.006610,0.002654,0.006096,0.004527,...,0.160497,Criteria iii,0.120599,Criteria v,0.472574,0.753670,0.810315,0.472574,0.5,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80958,0.068046,0.276074,0.224602,0.309969,0.040879,0.064006,0.001947,0.001898,0.002546,0.002214,...,0.276074,Criteria ii,0.224602,Criteria iii,0.309969,0.810644,0.855154,0.403726,0.5,False
80959,0.151052,0.404452,0.192082,0.125432,0.009508,0.099948,0.001754,0.002422,0.002119,0.002378,...,0.192082,Criteria iii,0.151052,Criteria i,0.404452,0.747587,0.860113,0.422726,0.2,False
80960,0.232394,0.238239,0.218373,0.156745,0.010405,0.125464,0.002380,0.002557,0.002157,0.002258,...,0.232394,Criteria i,0.218373,Criteria iii,0.238239,0.689006,0.802113,0.401379,0.2,False
80961,0.143204,0.319948,0.189708,0.147328,0.009527,0.172217,0.002159,0.002768,0.002117,0.002292,...,0.189708,Criteria iii,0.172217,Criteria vi,0.319948,0.681873,0.876052,0.449107,0.2,False


In [111]:
VAL_LAB = bert[['index.1','text_bool']].merge(v_labels[bert['text_bool']],how='left', left_index=True, right_index=True)

In [112]:
VAL_LAB['labelled'] = (VAL_LAB.conf_3>=0.75) & (VAL_LAB.same_3>=0.5)

In [113]:
VAL_LAB[VAL_LAB.labelled]

Unnamed: 0,index.1,text_bool,Criteria i,Criteria ii,Criteria iii,Criteria iv,Criteria v,Criteria vi,Criteria vii,Criteria viii,...,max_2_col,max_3_val,max_3_col,max_1,max_3,conf_3,conf_1,same_3,same_1,labelled
0,50770113131,True,0.241893,0.064412,0.097109,0.507689,0.026430,0.044148,0.004947,0.001907,...,Criteria i,0.097109,Criteria iii,0.507689,0.846691,0.846691,0.507689,1.0,True,True
1,50770228312,True,0.215271,0.063392,0.097399,0.539351,0.026291,0.042574,0.003864,0.001658,...,Criteria i,0.097399,Criteria iii,0.539351,0.852021,0.852021,0.539351,1.0,True,True
4,50766892772,True,0.027294,0.115427,0.160497,0.472574,0.120599,0.074298,0.006610,0.002654,...,Criteria iii,0.120599,Criteria v,0.472574,0.753670,0.810315,0.472574,0.5,True,True
5,50766892252,True,0.024067,0.110218,0.208847,0.395105,0.147828,0.084316,0.006409,0.002525,...,Criteria iii,0.147828,Criteria v,0.395105,0.751779,0.801720,0.395105,0.5,True,True
7,50763281362,True,0.052154,0.045569,0.205780,0.087766,0.012058,0.511174,0.034524,0.019425,...,Criteria iii,0.087766,Criteria iv,0.511174,0.804720,0.804719,0.511174,1.0,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80952,20906151848,True,0.148628,0.038375,0.207089,0.429762,0.016484,0.110343,0.016347,0.009239,...,Criteria iii,0.148628,Criteria i,0.429762,0.785479,0.803788,0.480751,0.5,False,True
80953,20905940510,True,0.033005,0.436002,0.166744,0.245028,0.019090,0.086166,0.001302,0.001254,...,Criteria iv,0.166744,Criteria iii,0.436002,0.847774,0.857761,0.436002,0.5,True,True
80954,20472920383,True,0.123870,0.288300,0.174762,0.305799,0.014098,0.078043,0.001457,0.001616,...,Criteria ii,0.174762,Criteria iii,0.305799,0.768861,0.830410,0.371838,0.5,False,True
80956,21067259705,True,0.061778,0.231981,0.196921,0.336562,0.021311,0.135269,0.001387,0.001556,...,Criteria ii,0.196921,Criteria iii,0.336562,0.765465,0.822078,0.358216,0.5,False,True


In [123]:
VAL_LAB[VAL_LAB.columns[25:]]

Unnamed: 0,labelled
0,True
1,True
2,False
3,False
4,True
...,...
80958,True
80959,False
80960,False
80961,False


In [52]:
VAL_LAB.to_csv('Venezia/data_storage/Value_Labels.csv', sep='\t')

In [73]:
VAL_LAB.to_csv('Amsterdam/data_storage/Value_Labels.csv', sep='\t')

In [90]:
VAL_LAB.to_csv('Suzhou/data_storage/Value_Labels.csv', sep='\t')

In [114]:
VAL_LAB.to_csv('data_storage/images/Value_Labels.csv', sep='\t')

## Heritage Attributes Labels

In [143]:
from ast import literal_eval

In [163]:
category_dict = {0: 'architectural elements',
 1: 'form',
 2: 'gastronomy',
 3: 'interior',
 4: 'landscape scenery and natural features',
 5: 'monuments',
 6: 'people',
 7: 'product',
 8: 'urban scenery'}

In [181]:
cat_dict_new = {0: 'Building Elements',
 1: 'Urban Form Elements',
 2: 'Gastronomy',
 3: 'Interior Scenery',
 4: 'Natural Features and Land-scape Scenery',
 5: 'Monuments and Buildings',
 6: 'People’s Activity and Association',
 7: 'Artifact Products',
 8: 'Urban Scenery'}

In [184]:
categories = [cat_dict_new[v] for v in cat_dict_new]
categories

['Building Elements',
 'Urban Form Elements',
 'Gastronomy',
 'Interior Scenery',
 'Natural Features and Land-scape Scenery',
 'Monuments and Buildings',
 'People’s Activity and Association',
 'Artifact Products',
 'Urban Scenery']

In [124]:
IMG_pred_150 = pd.read_csv('Venezia/data_storage/IMG_pred_150_cat.csv',sep='\t', index_col='Unnamed: 0')

In [236]:
IMG_pred_150 = pd.read_csv('Amsterdam/data_storage/IMG_pred_150_cat.csv',sep='\t')

In [255]:
IMG_pred_150 = pd.read_csv('Suzhou/data_storage/IMG_pred_150_cat.csv',sep='\t')

In [272]:
IMG_pred_150 = pd.read_csv('data_storage/images/IMG_pred_150_cat.csv',sep='\t', index_col='Unnamed: 0')

In [273]:
IMG_pred_150[IMG_pred_150.columns[565:]]

Unnamed: 0,confidence,cat_id_vote,cat_probs_vote,category_vote,conf_vote,cat_id_stack,cat_probs_stack,category_stack,conf_stack,category_same,index.1
0,0.376236,0,[0.39053658 0.07688859 0.05247085 0.01808068 0...,architectural elements,0.390537,0,[0.434003 0.09930002 0.0074593 0.01666662 0...,architectural elements,0.434003,True,50770113131
1,0.924908,5,[0.25264659 0.02824921 0.00182519 0.00707536 0...,monuments,0.689898,5,[0.36544746 0.03825779 0.00173039 0.00638233 0...,monuments,0.556244,True,50770228312
2,0.891226,4,[0.0073182 0.35323225 0.00486117 0.00246835 0...,landscape scenery and natural features,0.477274,1,[0.04836891 0.40009176 0.02311173 0.04185914 0...,form,0.400092,False,50766039303
3,0.888184,6,[0.02507305 0.25538455 0.01152419 0.00863368 0...,people,0.481766,6,[0.0181194 0.33873159 0.06019872 0.05836022 0...,people,0.364438,True,50766893587
4,0.556705,1,[0.01647333 0.89346488 0.01105204 0.00141608 0...,form,0.893465,1,[7.14481079e-03 9.37520564e-01 6.32304290e-03 ...,form,0.937521,True,50766892772
...,...,...,...,...,...,...,...,...,...,...,...
80958,0.362460,5,[0.03339911 0.28411171 0.00525297 0.00765534 0...,monuments,0.509671,5,[0.09946484 0.32641971 0.00675882 0.00629108 0...,monuments,0.458628,True,20879329608
80959,0.980830,0,[0.94034656 0.01258757 0.00357108 0.01685633 0...,architectural elements,0.940347,0,[9.43930439e-01 7.15241083e-03 3.10525620e-04 ...,architectural elements,0.943930,True,21040989736
80960,0.578285,0,[0.87164785 0.03271126 0.00088374 0.0227124 0...,architectural elements,0.871648,0,[9.09375858e-01 1.06165719e-02 5.26034688e-04 ...,architectural elements,0.909376,True,20880415099
80961,0.492456,0,[0.55386971 0.32644859 0.00757863 0.01558469 0...,architectural elements,0.553870,0,[0.6153323 0.179466 0.00454481 0.06058073 0...,architectural elements,0.615332,True,20879281658


In [274]:
VOTEs = IMG_pred_150['cat_probs_vote'].str.replace('[','').str.replace(']','').str.replace('\n','').str.split(expand=True).rename(columns=cat_dict_new)

  VOTEs = IMG_pred_150['cat_probs_vote'].str.replace('[','').str.replace(']','').str.replace('\n','').str.split(expand=True).rename(columns=cat_dict_new)


In [275]:
STACKs = IMG_pred_150['cat_probs_stack'].str.replace('[','').str.replace(']','').str.replace('\n','').str.split(expand=True).rename(columns=cat_dict_new)

  STACKs = IMG_pred_150['cat_probs_stack'].str.replace('[','').str.replace(']','').str.replace('\n','').str.split(expand=True).rename(columns=cat_dict_new)


In [276]:
VOTEs = VOTEs.astype(float)
VOTEs

Unnamed: 0,Building Elements,Urban Form Elements,Gastronomy,Interior Scenery,Natural Features and Land-scape Scenery,Monuments and Buildings,People’s Activity and Association,Artifact Products,Urban Scenery
0,0.390537,0.076889,0.052471,0.018081,0.009278,0.373008,0.021499,0.022670,0.035567
1,0.252647,0.028249,0.001825,0.007075,0.006304,0.689898,0.005580,0.001402,0.007019
2,0.007318,0.353232,0.004861,0.002468,0.477274,0.022278,0.023849,0.006545,0.102173
3,0.025073,0.255385,0.011524,0.008634,0.060189,0.082511,0.481766,0.007979,0.066940
4,0.016473,0.893465,0.011052,0.001416,0.002594,0.041075,0.012039,0.006095,0.015790
...,...,...,...,...,...,...,...,...,...
80958,0.033399,0.284112,0.005253,0.007655,0.011063,0.509671,0.023306,0.006414,0.119127
80959,0.940347,0.012588,0.003571,0.016856,0.001716,0.014993,0.004445,0.000980,0.004503
80960,0.871648,0.032711,0.000884,0.022712,0.002328,0.050936,0.011154,0.004965,0.002662
80961,0.553870,0.326449,0.007579,0.015585,0.003696,0.010892,0.061895,0.017490,0.002546


In [277]:
STACKs=STACKs.astype(float)
STACKs

Unnamed: 0,Building Elements,Urban Form Elements,Gastronomy,Interior Scenery,Natural Features and Land-scape Scenery,Monuments and Buildings,People’s Activity and Association,Artifact Products,Urban Scenery
0,0.434003,0.099300,0.007459,0.016667,0.015785,0.376650,0.023540,0.008071,0.018525
1,0.365447,0.038258,0.001730,0.006382,0.002585,0.556244,0.003236,0.005169,0.020949
2,0.048369,0.400092,0.023112,0.041859,0.220126,0.032953,0.136749,0.015246,0.081495
3,0.018119,0.338732,0.060199,0.058360,0.022458,0.050185,0.364438,0.057663,0.029847
4,0.007145,0.937521,0.006323,0.002909,0.000551,0.025030,0.015413,0.001229,0.003880
...,...,...,...,...,...,...,...,...,...
80958,0.099465,0.326420,0.006759,0.006291,0.003333,0.458628,0.014077,0.008013,0.077014
80959,0.943930,0.007152,0.000311,0.021950,0.004905,0.014882,0.003127,0.003087,0.000657
80960,0.909376,0.010617,0.000526,0.029639,0.007066,0.031976,0.005292,0.004368,0.001140
80961,0.615332,0.179466,0.004545,0.060581,0.015517,0.047708,0.055019,0.015428,0.006404


In [278]:
a_labels = (VOTEs + STACKs)/2
a_labels

Unnamed: 0,Building Elements,Urban Form Elements,Gastronomy,Interior Scenery,Natural Features and Land-scape Scenery,Monuments and Buildings,People’s Activity and Association,Artifact Products,Urban Scenery
0,0.412270,0.088094,0.029965,0.017374,0.012531,0.374829,0.022520,0.015371,0.027046
1,0.309047,0.033253,0.001778,0.006729,0.004444,0.623071,0.004408,0.003285,0.013984
2,0.027844,0.376662,0.013986,0.022164,0.348700,0.027616,0.080299,0.010896,0.091834
3,0.021596,0.297058,0.035861,0.033497,0.041323,0.066348,0.423102,0.032821,0.048393
4,0.011809,0.915493,0.008688,0.002162,0.001573,0.033053,0.013726,0.003662,0.009835
...,...,...,...,...,...,...,...,...,...
80958,0.066432,0.305266,0.006006,0.006973,0.007198,0.484150,0.018691,0.007214,0.098070
80959,0.942138,0.009870,0.001941,0.019403,0.003311,0.014937,0.003786,0.002034,0.002580
80960,0.890512,0.021664,0.000705,0.026175,0.004697,0.041456,0.008223,0.004666,0.001901
80961,0.584601,0.252957,0.006062,0.038083,0.009606,0.029300,0.058457,0.016459,0.004475


In [279]:
ATT_LAB = IMG_pred_150[['ID','name']].merge(a_labels, how='left', left_index=True, right_index=True).drop('name',axis=1)
ATT_LAB

Unnamed: 0,ID,Building Elements,Urban Form Elements,Gastronomy,Interior Scenery,Natural Features and Land-scape Scenery,Monuments and Buildings,People’s Activity and Association,Artifact Products,Urban Scenery
0,50770113131,0.412270,0.088094,0.029965,0.017374,0.012531,0.374829,0.022520,0.015371,0.027046
1,50770228312,0.309047,0.033253,0.001778,0.006729,0.004444,0.623071,0.004408,0.003285,0.013984
2,50766039303,0.027844,0.376662,0.013986,0.022164,0.348700,0.027616,0.080299,0.010896,0.091834
3,50766893587,0.021596,0.297058,0.035861,0.033497,0.041323,0.066348,0.423102,0.032821,0.048393
4,50766892772,0.011809,0.915493,0.008688,0.002162,0.001573,0.033053,0.013726,0.003662,0.009835
...,...,...,...,...,...,...,...,...,...,...
80958,20879329608,0.066432,0.305266,0.006006,0.006973,0.007198,0.484150,0.018691,0.007214,0.098070
80959,21040989736,0.942138,0.009870,0.001941,0.019403,0.003311,0.014937,0.003786,0.002034,0.002580
80960,20880415099,0.890512,0.021664,0.000705,0.026175,0.004697,0.041456,0.008223,0.004666,0.001901
80961,20879281658,0.584601,0.252957,0.006062,0.038083,0.009606,0.029300,0.058457,0.016459,0.004475


In [280]:
IMG_pred_150['category'] = IMG_pred_150[IMG_pred_150.category_same]['category_vote']

In [281]:
IMG_pred_150['category_id'] = IMG_pred_150[IMG_pred_150.category_same]['cat_id_vote']

In [282]:
IMG_pred_150['category_id'] = IMG_pred_150['category_id'].fillna(-1).astype(int)

In [283]:
IMG_pred_150['conf'] = (IMG_pred_150['conf_vote'] + IMG_pred_150['conf_stack'])/2

In [284]:
IMG_pred_150[['category','category_id','category_vote','cat_id_vote', 'category_stack','cat_id_stack',  'conf', 'category_same']]

Unnamed: 0,category,category_id,category_vote,cat_id_vote,category_stack,cat_id_stack,conf,category_same
0,architectural elements,0,architectural elements,0,architectural elements,0,0.412270,True
1,monuments,5,monuments,5,monuments,5,0.623071,True
2,,-1,landscape scenery and natural features,4,form,1,0.438683,False
3,people,6,people,6,people,6,0.423102,True
4,form,1,form,1,form,1,0.915493,True
...,...,...,...,...,...,...,...,...
80958,monuments,5,monuments,5,monuments,5,0.484150,True
80959,architectural elements,0,architectural elements,0,architectural elements,0,0.942138,True
80960,architectural elements,0,architectural elements,0,architectural elements,0,0.890512,True
80961,architectural elements,0,architectural elements,0,architectural elements,0,0.584601,True


In [285]:
ATT_LAB = ATT_LAB.merge(IMG_pred_150[['category','category_id','category_vote','cat_id_vote', 
                                      'category_stack','cat_id_stack',  'conf', 'category_same']],
                       how='left', left_index=True, right_index=True)

In [286]:
ATT_LAB['labelled'] = (ATT_LAB.conf>=0.7) & (ATT_LAB.category_same)

In [287]:
ATT_LAB[ATT_LAB.labelled]

Unnamed: 0,ID,Building Elements,Urban Form Elements,Gastronomy,Interior Scenery,Natural Features and Land-scape Scenery,Monuments and Buildings,People’s Activity and Association,Artifact Products,Urban Scenery,category,category_id,category_vote,cat_id_vote,category_stack,cat_id_stack,conf,category_same,labelled
4,50766892772,0.011809,0.915493,0.008688,0.002162,0.001573,0.033053,0.013726,0.003662,0.009835,form,1,form,1,form,1,0.915493,True,True
5,50766892252,0.007013,0.861934,0.003147,0.003801,0.001778,0.112822,0.003591,0.001306,0.004609,form,1,form,1,form,1,0.861934,True,True
8,50762326397,0.016873,0.845055,0.008787,0.007844,0.008445,0.075485,0.021208,0.003526,0.012778,form,1,form,1,form,1,0.845055,True,True
9,50759304312,0.023229,0.827964,0.003225,0.005748,0.004723,0.114696,0.009097,0.002800,0.008518,form,1,form,1,form,1,0.827964,True,True
12,50753064816,0.063995,0.753307,0.009293,0.006454,0.002883,0.122078,0.026211,0.004682,0.011096,form,1,form,1,form,1,0.753307,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80954,20472920383,0.753900,0.076857,0.008495,0.037670,0.010506,0.044045,0.048432,0.017303,0.002793,architectural elements,0,architectural elements,0,architectural elements,0,0.753900,True,True
80955,20906083108,0.037035,0.734087,0.017871,0.010127,0.002192,0.047726,0.135027,0.009292,0.006643,form,1,form,1,form,1,0.734087,True,True
80959,21040989736,0.942138,0.009870,0.001941,0.019403,0.003311,0.014937,0.003786,0.002034,0.002580,architectural elements,0,architectural elements,0,architectural elements,0,0.942138,True,True
80960,20880415099,0.890512,0.021664,0.000705,0.026175,0.004697,0.041456,0.008223,0.004666,0.001901,architectural elements,0,architectural elements,0,architectural elements,0,0.890512,True,True


In [295]:
ATT_LAB[ATT_LAB.columns[16:19]]

Unnamed: 0,conf,category_same,labelled
0,0.412270,True,False
1,0.623071,True,False
2,0.438683,False,False
3,0.423102,True,False
4,0.915493,True,True
...,...,...,...
80958,0.484150,True,False
80959,0.942138,True,True
80960,0.890512,True,True
80961,0.584601,True,False


In [235]:
ATT_LAB.to_csv('Venezia/data_storage/Attribute_Labels.csv', sep='\t')

In [254]:
ATT_LAB.to_csv('Amsterdam/data_storage/Attribute_Labels.csv', sep='\t')

In [271]:
ATT_LAB.to_csv('Suzhou/data_storage/Attribute_Labels.csv', sep='\t')

In [288]:
ATT_LAB.to_csv('data_storage/images/Attribute_Labels.csv', sep='\t')