# Create metadata

In [1]:
import pandas as pd
import numpy as np

In [4]:
continents = pd.read_csv("../../data/latlongs/test_continents.csv")
continents.head(5)

Unnamed: 0,lats,longs,id,CONTINENT
0,6.552393,37.070625,135505799,Africa
1,6.495981,35.55612,135505800,Africa
2,8.384319,38.972473,135505801,Africa
3,7.217695,33.701621,135505803,Africa
4,9.565499,40.256333,135505804,Africa


In [13]:
clouds = []
slopes = []
ids = continents['id']
for i in ids:
    cloud = np.load("../../data/raw/test-clouds/{}.npy".format(str(i)))
    data = np.load("../../data/test-s2/{}.npy".format(str(i)))
    slopes.append(np.mean(data[0, :, :, 10]))
    cloud_percents = [len(np.argwhere(cloud[i].flatten() > 0.3)) for i in range(cloud.shape[0])]
    cloud_percents = np.array(cloud_percents) / cloud.shape[1]*cloud.shape[2]
    cloud_percents = len(np.argwhere(cloud_percents > 0.2)) / len(cloud_percents)
    clouds.append(cloud_percents)

In [15]:
continents['clouds'] = clouds
continents['slopes'] = slopes
continents.to_csv("../../data/metrics/metadata/metadata.csv")

# Load proposed model predictions

In [20]:
proposed = pd.read_csv("../../data/metrics/proposed-sample.csv")
proposed = proposed.join(continents, how = 'inner')
proposed.head(5)

Unnamed: 0.1,Unnamed: 0,true,pred,f1_hard,error,tp,fp,fn,tp_soft,fp_soft,fn_soft,group,model,plot_id,lats,longs,id,CONTINENT,clouds,slopes
0,0,29.081633,19.897959,0.645833,9.183673,31.0,8,26,46.0,0.0,11.0,20,proposed,135505799,6.552393,37.070625,135505799,Africa,0.552239,0.223983
1,1,100.0,40.816327,0.57971,59.183673,80.0,0,116,131.0,0.0,65.0,90,proposed,135505800,6.495981,35.55612,135505800,Africa,0.636364,0.086151
2,2,11.22449,7.653061,0.540541,3.571429,10.0,5,12,16.0,0.0,6.0,10,proposed,135505801,8.384319,38.972473,135505801,Africa,0.307692,0.053171
3,3,3.061224,15.306122,0.055556,12.244898,1.0,29,5,3.0,25.0,3.0,0,proposed,135505803,7.217695,33.701621,135505803,Africa,0.688525,0.041682
4,4,0.0,3.571429,0.0,3.571429,0.0,7,0,0.0,7.0,0.0,0,proposed,135505804,9.565499,40.256333,135505804,Africa,0.24,0.048161


# Overall metrics

In [48]:
tp = proposed.sum()['tp_soft']
fp = proposed.sum()['fp_soft']
fn = proposed.sum()['fn_soft']
precision = tp / (tp + fp)
recall = tp / (tp + fn)
print(float(precision), float(recall))

0.950917626973965 0.9336618072839945


## Regional metrics

In [44]:
sums = proposed.groupby("CONTINENT").sum()
for i in ['Africa', 'Asia', "Australia", "Europe", "North America", "South America"]:
    tp = sums[sums.index == i]['tp_soft']
    fp = sums[sums.index == i]['fp_soft']
    fn = sums[sums.index == i]['fn_soft']
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    print(i, float(precision), float(recall))

Africa 0.9625504834169625 0.9101253616200579
Asia 0.9191338930623066 0.9132152787940875
Australia 0.9523607876349778 0.9264675592173017
Europe 0.9227500965623793 0.9363731382283773
North America 0.9576759328825445 0.9736721495136732
South America 0.943580809434468 0.9296276736202799


# Cloud metrics

In [31]:
tp = proposed[proposed['clouds'] < 0.75].sum()['tp_soft']
fp = proposed[proposed['clouds'] < 0.75].sum()['fp_soft']
fn = proposed[proposed['clouds'] < 0.75].sum()['fn_soft']
error = proposed[proposed['clouds'] < 0.75].mean()['error']
count = proposed[proposed['clouds'] < 0.75].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Below 0.75 cloud: ", precision, recall, error, count)

Below 0.75 cloud:  0.9511777008262763 0.9319269722366852 6.907152261826391 170128


In [33]:
tp = proposed[proposed['clouds'] >= 0.75].sum()['tp_soft']
fp = proposed[proposed['clouds'] >= 0.75].sum()['fp_soft']
fn = proposed[proposed['clouds'] >= 0.75].sum()['fn_soft']
error = proposed[proposed['clouds'] >= 0.75].mean()['error']
count = proposed[proposed['clouds'] >= 0.75].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Above 0.75 cloud: ", precision, recall, error, count)

Above 0.75 cloud:  0.9501185146168027 0.9390389921391015 6.9727891156462585 45864


# Slope metrics

In [46]:
tp = proposed[proposed['slopes'] < 0.1].sum()['tp_soft']
fp = proposed[proposed['slopes'] < 0.1].sum()['fp_soft']
fn = proposed[proposed['slopes'] < 0.1].sum()['fn_soft']
error = proposed[proposed['slopes'] < 0.1].mean()['error']
count = proposed[proposed['slopes'] < 0.1].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Below 0.1 slope: ", precision, recall, error, count)

Below 0.1 slope:  0.9506964365948917 0.9228665850262535 6.924036281179138 176400


In [47]:
tp = proposed[proposed['slopes'] >= 0.1].sum()['tp_soft']
fp = proposed[proposed['slopes'] >= 0.1].sum()['fp_soft']
fn = proposed[proposed['slopes'] >= 0.1].sum()['fn_soft']
error = proposed[proposed['slopes'] >= 0.1].mean()['error']
count = proposed[proposed['slopes'] >= 0.1].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Above 0.1 slope: ", precision, recall, error, count)

Above 0.1 slope:  0.9515755924536061 0.9672884987197575 6.907961204283693 39592


# Canopy metrics

In [51]:
tp = proposed[proposed['true'] < 20].sum()['tp_soft']
fp = proposed[proposed['true'] < 20].sum()['fp_soft']
fn = proposed[proposed['true'] < 20].sum()['fn_soft']
error = proposed[proposed['true'] < 20].mean()['error']
count = proposed[proposed['true'] < 20].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Below 20%: ", precision, recall, error, count)

Below 20%:  0.5231864537233019 0.5978452066842568 4.295085381091211 115248


In [61]:
tp = proposed[proposed['true'].between(20, 40)].sum()['tp_soft']
fp = proposed[proposed['true'].between(20, 40)].sum()['fp_soft']
fn = proposed[proposed['true'].between(20, 40)].sum()['fn_soft']

error = proposed[proposed['true'].between(20, 40)].mean()['error']
count = proposed[proposed['true'].between(20, 40)].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Between 20 and 40%: ", precision, recall, error, count)

Between 20 and 40%:  0.8895027624309392 0.8386781702751099 13.970315398886829 21560


In [62]:
tp = proposed[proposed['true'].between(40, 60)].sum()['tp_soft']
fp = proposed[proposed['true'].between(40, 60)].sum()['fp_soft']
fn = proposed[proposed['true'].between(40, 60)].sum()['fn_soft']

error = proposed[proposed['true'].between(40, 60)].mean()['error']
count = proposed[proposed['true'].between(40, 60)].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Between 40 and 60%: ", precision, recall, error, count)

Between 40 and 60%:  0.9415225530397575 0.8839973217274858 16.25125460020074 11956


In [63]:
tp = proposed[proposed['true'].between(60, 101)].sum()['tp_soft']
fp = proposed[proposed['true'].between(60, 101)].sum()['fp_soft']
fn = proposed[proposed['true'].between(60, 101)].sum()['fn_soft']

error = proposed[proposed['true'].between(60, 101)].mean()['error']
count = proposed[proposed['true'].between(60, 101)].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Between 60 and 100%: ", precision, recall, error, count)

Between 60 and 100%:  0.9942524003227878 0.9724401204838684 7.502826203367645 67228


In [87]:
proposed = pd.read_csv("../../data/metrics/proposed.csv")
print(proposed.mean()['soft_prec'], proposed.mean()['soft_rec'])

0.8644605443408511 0.8358825439098567


# Load Random forest predictions

In [88]:
proposed = pd.read_csv("../../data/metrics/rf-sample.csv")
proposed = proposed.join(continents, how = 'inner')
proposed.head(5)

Unnamed: 0.1,Unnamed: 0,true,pred,f1_hard,error,tp,fp,fn,tp_soft,fp_soft,fn_soft,group,model,lats,longs,id,CONTINENT,clouds,slopes
0,0,29.081633,33.163265,0.688525,4.081633,42.0,23,15,42.0,6.0,3.0,20,Random Forest,6.552393,37.070625,135505799,Africa,0.552239,0.223983
1,1,100.0,13.265306,0.234234,86.734694,26.0,0,170,26.0,0.0,100.0,90,Random Forest,6.495981,35.55612,135505800,Africa,0.636364,0.086151
2,2,11.22449,3.061224,0.071429,8.163265,1.0,5,21,1.0,4.0,19.0,10,Random Forest,8.384319,38.972473,135505801,Africa,0.307692,0.053171
3,3,3.061224,43.877551,0.086957,40.816327,4.0,82,2,4.0,66.0,0.0,0,Random Forest,7.217695,33.701621,135505803,Africa,0.688525,0.041682
4,4,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0,Random Forest,9.565499,40.256333,135505804,Africa,0.24,0.048161


In [89]:
sums = proposed.groupby("CONTINENT").sum()
for i in ['Africa', 'Asia', "Australia", "Europe", "North America", "South America"]:
    tp = sums[sums.index == i]['tp_soft']
    fp = sums[sums.index == i]['fp_soft']
    fn = sums[sums.index == i]['fn_soft']
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    print(i, float(precision), float(recall))

Africa 0.9063200815494393 0.8084197126750319
Asia 0.8944373049384983 0.8401448525607863
Australia 0.9141049968374446 0.8813269910964752
Europe 0.8526092152436595 0.9176790052879805
North America 0.9106339984844658 0.9652993466852308
South America 0.8248557240638841 0.9296626834064438


In [79]:
tp = proposed[proposed['true'] < 20].sum()['tp_soft']
fp = proposed[proposed['true'] < 20].sum()['fp_soft']
fn = proposed[proposed['true'] < 20].sum()['fn_soft']
error = proposed[proposed['true'] < 20].mean()['error']
count = proposed[proposed['true'] < 20].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Below 20%: ", precision, recall, error, count)

Below 20%:  0.16666666666666666 0.29134078212290504 6.734867121721353 115444


In [80]:
tp = proposed[proposed['clouds'] >= 0.75].sum()['tp_soft']
fp = proposed[proposed['clouds'] >= 0.75].sum()['fp_soft']
fn = proposed[proposed['clouds'] >= 0.75].sum()['fn_soft']
error = proposed[proposed['clouds'] >= 0.75].mean()['error']
count = proposed[proposed['clouds'] >= 0.75].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Above 0.75 cloud: ", precision, recall, error, count)

Above 0.75 cloud:  0.8896187683284458 0.9148923336751312 10.805471124620063 46060


In [81]:
tp = proposed[proposed['slopes'] >= 0.1].sum()['tp_soft']
fp = proposed[proposed['slopes'] >= 0.1].sum()['fp_soft']
fn = proposed[proposed['slopes'] >= 0.1].sum()['fn_soft']
error = proposed[proposed['slopes'] >= 0.1].mean()['error']
count = proposed[proposed['slopes'] >= 0.1].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Above 0.1 slope: ", precision, recall, error, count)

Above 0.1 slope:  0.9026670917012007 0.9531556802244039 10.452900371971447 39788


In [86]:
proposed = pd.read_csv("../../data/metrics/rf.csv")
print(proposed.mean()['soft_prec'], proposed.mean()['soft_rec'])

0.7614104096671157 0.6932136477840563


# Load U-Net forest predictions

In [90]:
proposed = pd.read_csv("../../data/metrics/unet-sample.csv")
proposed = proposed.join(continents, how = 'inner')
proposed.head(5)

Unnamed: 0.1,Unnamed: 0,true,pred,f1_hard,error,tp,fp,fn,tp_soft,fp_soft,fn_soft,group,model,lats,longs,id,CONTINENT,clouds,slopes
0,0,29.081633,8.163265,0.246575,20.918367,9.0,7,48,21.0,1.0,36.0,20,U-net,6.552393,37.070625,135505799,Africa,0.552239,0.223983
1,1,100.0,0.0,0.0,100.0,0.0,0,196,0.0,0.0,196.0,90,U-net,6.495981,35.55612,135505800,Africa,0.636364,0.086151
2,2,11.22449,0.0,0.0,11.22449,0.0,0,22,0.0,0.0,22.0,10,U-net,8.384319,38.972473,135505801,Africa,0.307692,0.053171
3,3,3.061224,0.0,0.0,3.061224,0.0,0,6,0.0,0.0,6.0,0,U-net,7.217695,33.701621,135505803,Africa,0.688525,0.041682
4,4,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0,U-net,9.565499,40.256333,135505804,Africa,0.24,0.048161


In [91]:
sums = proposed.groupby("CONTINENT").sum()
for i in ['Africa', 'Asia', "Australia", "Europe", "North America", "South America"]:
    tp = sums[sums.index == i]['tp_soft']
    fp = sums[sums.index == i]['fp_soft']
    fn = sums[sums.index == i]['fn_soft']
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    print(i, float(precision), float(recall))

Africa 0.9294718747015567 0.7508196721311475
Asia 0.8957775795420755 0.8817503292843554
Australia 0.9275988043839256 0.8629248197734295
Europe 0.8764488286066584 0.9286647504572773
North America 0.9080292702485966 0.9225951010846871
South America 0.9198328267477204 0.9589384737259045


In [75]:
tp = proposed[proposed['true'] < 20].sum()['tp_soft']
fp = proposed[proposed['true'] < 20].sum()['fp_soft']
fn = proposed[proposed['true'] < 20].sum()['fn_soft']
error = proposed[proposed['true'] < 20].mean()['error']
count = proposed[proposed['true'] < 20].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Below 20%: ", precision, recall, error, count)

Below 20%:  0.33954918032786885 0.3643359718557608 5.926683067114792 115444


In [76]:
tp = proposed[proposed['clouds'] >= 0.75].sum()['tp_soft']
fp = proposed[proposed['clouds'] >= 0.75].sum()['fp_soft']
fn = proposed[proposed['clouds'] >= 0.75].sum()['fn_soft']
error = proposed[proposed['clouds'] >= 0.75].mean()['error']
count = proposed[proposed['clouds'] >= 0.75].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Above 0.75 cloud: ", precision, recall, error, count)

Above 0.75 cloud:  0.9181706206822685 0.9163548904329236 9.765523230568823 46060


In [77]:
tp = proposed[proposed['slopes'] >= 0.1].sum()['tp_soft']
fp = proposed[proposed['slopes'] >= 0.1].sum()['fp_soft']
fn = proposed[proposed['slopes'] >= 0.1].sum()['fn_soft']
error = proposed[proposed['slopes'] >= 0.1].mean()['error']
count = proposed[proposed['slopes'] >= 0.1].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Above 0.1 slope: ", precision, recall, error, count)

Above 0.1 slope:  0.9203084832904884 0.9072196938353331 12.184578264803458 39788


In [85]:
proposed = pd.read_csv("../../data/metrics/unet.csv")
print(proposed.mean()['soft_prec'], proposed.mean()['soft_rec'])

0.7735725287501736 0.7203105493872846
