In [1]:
import pandas as pd
import numpy as np
from scipy import stats

### Get raw predictions

In [2]:
# Read data
preds = pd.read_csv("../data/trainset_prediction_results_raw.csv", index_col="im_name")

# Rename images im10.jpg -> 10
preds = preds.reset_index()

def name_to_number(name):
    name = name.split(".")[0]
    name = name[2:]
    return name

preds["im_num"] = preds["im_name"].apply(lambda x: name_to_number(x))

# Set im_num to index
preds = preds.set_index("im_num")

# Drop old name
preds = preds.drop(columns=['im_name'])

# Sort
preds = preds.sort_index(ascending=True)
preds = preds.astype(float)
preds

Unnamed: 0_level_0,baby,bird,car,clouds,dog,female,flower,male,night,people,portrait,river,sea,tree
im_num,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,0.059114,0.189894,0.270527,0.514116,0.376831,0.471086,0.452792,0.479973,0.335236,0.636476,0.460556,0.111273,0.186122,0.330838
10,0.176588,0.215513,0.389820,0.489287,0.396909,0.433458,0.415022,0.486882,0.442126,0.609778,0.464283,0.094839,0.157231,0.368786
100,0.070583,0.240978,0.366895,0.419676,0.438019,0.494183,0.393568,0.411997,0.332402,0.605474,0.505368,0.140239,0.177220,0.411658
1000,0.071286,0.252124,0.276460,0.494695,0.328481,0.501982,0.466024,0.455709,0.424696,0.607908,0.485162,0.131687,0.216783,0.382426
10000,0.056155,0.228149,0.074180,0.532709,0.237711,0.490562,0.449285,0.410413,0.532735,0.655322,0.514495,0.104140,0.154763,0.323060
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,0.040681,0.166330,0.365468,0.529521,0.334640,0.471692,0.370028,0.481998,0.422202,0.598420,0.435404,0.055396,0.134456,0.312829
9996,0.028645,0.207533,0.164387,0.427778,0.400044,0.462290,0.492896,0.479316,0.317838,0.638808,0.463050,0.076265,0.071814,0.286018
9997,0.072235,0.200894,0.310594,0.509093,0.347295,0.394384,0.485461,0.471936,0.276968,0.565460,0.429677,0.136493,0.093088,0.397676
9998,0.075210,0.337895,0.331656,0.422889,0.357848,0.493790,0.482956,0.465575,0.373146,0.574420,0.473360,0.131791,0.103322,0.433890


### Get true labels

In [3]:
true_labels = pd.read_csv("../data/all_labels.csv", index_col="im_num")
true_labels = true_labels.drop(columns=['count'])
true_labels = true_labels.astype(int)
true_labels

Unnamed: 0_level_0,baby,bird,car,clouds,dog,female,flower,male,night,people,portrait,river,sea,tree
im_num,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,0,0,0,0,0,1,0,0,0,1,1,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,1,0,1,0,0,0,0
5,0,0,0,0,0,0,0,1,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19996,0,0,0,1,0,0,0,0,0,0,0,0,0,0
19997,0,0,0,0,0,0,0,1,0,1,0,0,0,0
19998,0,0,0,0,0,0,0,0,0,0,0,0,0,0
19999,0,0,0,0,0,1,0,0,0,1,1,0,0,0


### Mask predictions with true labels

In [4]:
# Index types to integer
preds.index = preds.index.map(int)
true_labels.index = true_labels.index.map(int)

# Map predictions with true labels
preds_true = preds.mul(true_labels)

# Loop and get lowest xx% value
thresholds = []
print("Lowest 30 % values")
for name in preds_true:
    values = preds_true[name]
    values = list(filter(lambda x: x > 0, values))
    threshold = round(np.percentile(values, 30), 4)
    thresholds.append(threshold)
    print(name, ":", threshold)
    
print("\n Thresholds:", thresholds)

Lowest 30 % values
baby : 0.0475
bird : 0.2552
car : 0.2075
clouds : 0.4757
dog : 0.2438
female : 0.4631
flower : 0.453
male : 0.4519
night : 0.3461
people : 0.6052
portrait : 0.4606
river : 0.0645
sea : 0.0856
tree : 0.3173

 Thresholds: [0.0475, 0.2552, 0.2075, 0.4757, 0.2438, 0.4631, 0.453, 0.4519, 0.3461, 0.6052, 0.4606, 0.0645, 0.0856, 0.3173]
