# Create metadata

In [1]:
import pandas as pd
import numpy as np

In [2]:
import scipy.stats


def mean_confidence_interval(data, confidence=0.95):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
    return m, m-h, m+h

In [6]:
continents = pd.read_csv("../../data/metrics/metadata/metadata.csv")
continents.head(5)

Unnamed: 0.1,Unnamed: 0,lats,longs,id,CONTINENT,clouds,slopes
0,0,6.552393,37.070625,135505799,Africa,0.552239,0.223983
1,1,6.495981,35.55612,135505800,Africa,0.636364,0.086151
2,2,8.384319,38.972473,135505801,Africa,0.307692,0.053171
3,3,7.217695,33.701621,135505803,Africa,0.688525,0.041682
4,4,9.565499,40.256333,135505804,Africa,0.24,0.048161


In [2]:
continents = pd.read_csv("../../data/latlongs/test_continents.csv")
continents.head(5)

Unnamed: 0,lats,longs,id,CONTINENT
0,6.552393,37.070625,135505799,Africa
1,6.495981,35.55612,135505800,Africa
2,8.384319,38.972473,135505801,Africa
3,7.217695,33.701621,135505803,Africa
4,9.565499,40.256333,135505804,Africa


In [3]:
clouds = []
slopes = []
ids = continents['id']
for i in ids:
    cloud = np.load("../../data/raw/test-clouds/{}.npy".format(str(i)))
    data = np.load("../../data/test-s2/{}.npy".format(str(i)))
    slopes.append(np.mean(data[0, :, :, 10]))
    cloud_percents = [len(np.argwhere(cloud[i].flatten() > 0.3)) for i in range(cloud.shape[0])]
    cloud_percents = np.array(cloud_percents) / cloud.shape[1]*cloud.shape[2]
    cloud_percents = len(np.argwhere(cloud_percents > 0.2)) / len(cloud_percents)
    clouds.append(cloud_percents)

FileNotFoundError: [Errno 2] No such file or directory: '../../data/raw/test-clouds/135505799.npy'

In [None]:
continents['clouds'] = clouds
continents['slopes'] = slopes
continents.to_csv("../../data/metrics/metadata/metadata.csv")

# Load proposed model predictions

In [87]:
proposed = pd.read_csv("../../data/metrics/proposed-sample.csv")
proposed = proposed.join(continents, how = 'inner')
proposed.head(5)

Unnamed: 0.1,Unnamed: 0,true,pred,f1_hard,error,tp,fp,fn,tp_soft,fp_soft,fn_soft,group,model,plot_id,lats,longs,id,CONTINENT
0,0,29.081633,27.55102,0.684685,1.530612,38.0,16,19,49.0,0.0,8.0,20,proposed,135505799,6.552393,37.070625,135505799,Africa
1,1,100.0,33.163265,0.498084,66.836735,65.0,0,131,95.0,0.0,101.0,90,proposed,135505800,6.495981,35.55612,135505800,Africa
2,2,11.22449,11.734694,0.533333,0.510204,12.0,11,10,16.0,1.0,6.0,10,proposed,135505801,8.384319,38.972473,135505801,Africa
3,3,3.061224,0.510204,0.285714,2.55102,1.0,0,5,1.0,0.0,5.0,0,proposed,135505803,7.217695,33.701621,135505803,Africa
4,4,0.0,10.204082,0.0,10.204082,0.0,20,0,0.0,20.0,0.0,0,proposed,135505804,9.565499,40.256333,135505804,Africa


In [88]:
means_l = []
means = []
means_u = []
proposed['ccgroup'] = 0
proposed['ccgroup'][(proposed['group'] >= 30) & (proposed['group'] <= 70)] = 1
for i in range(0, 100, 10):
    group = proposed[proposed['group'] == i]
    ci = mean_confidence_interval(group['error'], 0.95)
    if i >= 30 and i <= 70:
        means_l.append(ci[0])
        means_u.append(ci[2])
        means.append(ci[1])
        print(i, ci)
print(np.mean(means_l), np.mean(means), np.mean(means_u))

print(mean_confidence_interval(proposed[proposed['ccgroup'] == 1]['error']))

30 (14.655612244897961, 9.98529465984393, 19.325929829951992)
40 (17.540464461646728, 12.193202996070974, 22.887725927222483)
50 (16.49659863945578, 11.137335150797272, 21.85586212811429)
60 (20.21066491112574, 15.30508365065455, 25.116246171596927)
70 (14.755102040816327, 10.929367290737584, 18.58083679089507)
16.731688459588508 11.91005674962086 21.553320169556155
(16.678736010533246, 14.542344079689995, 18.815127941376495)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [53]:
mean_confidence_interval(proposed['error'], 0.95)

(10.674505522970748, 9.726780665897085, 11.62223038004441)

# Overall metrics

In [6]:
tp = proposed.sum()['tp_soft']
fp = proposed.sum()['fp_soft']
fn = proposed.sum()['fn_soft']
precision = tp / (tp + fp)
recall = tp / (tp + fn)
print(float(precision), float(recall))

0.9538015224060691 0.9392966399514575


In [7]:
np.mean(proposed['error'])

7.130819659987407

## Regional metrics

In [8]:
sums = proposed.groupby("CONTINENT").sum()
for i in ['Africa', 'Asia', "Australia", "Europe", "North America", "South America"]:
    tp = sums[sums.index == i]['tp_soft']
    fp = sums[sums.index == i]['fp_soft']
    fn = sums[sums.index == i]['fn_soft']
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    print(i, float(precision), float(recall))

Africa 0.9522558214747736 0.9107957621220323
Asia 0.9167635095874491 0.9236060295624177
Australia 0.9480096628505409 0.929557157569516
Europe 0.9563005478737282 0.9613165486493574
North America 0.9710346195813907 0.9739329419958214
South America 0.9450782700981692 0.9425774014289494


# Cloud metrics

In [14]:
tp = proposed[proposed['clouds'] < 0.75].sum()['tp_soft']
fp = proposed[proposed['clouds'] < 0.75].sum()['fp_soft']
fn = proposed[proposed['clouds'] < 0.75].sum()['fn_soft']
error = proposed[proposed['clouds'] < 0.75].mean()['error']
count = proposed[proposed['clouds'] < 0.75].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Below 0.75 cloud: ", precision, recall, error, count)

KeyError: 'clouds'

In [15]:
tp = proposed[proposed['clouds'] >= 0.75].sum()['tp_soft']
fp = proposed[proposed['clouds'] >= 0.75].sum()['fp_soft']
fn = proposed[proposed['clouds'] >= 0.75].sum()['fn_soft']
error = proposed[proposed['clouds'] >= 0.75].mean()['error']
count = proposed[proposed['clouds'] >= 0.75].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Above 0.75 cloud: ", precision, recall, error, count)

KeyError: 'clouds'

# Slope metrics

In [16]:
tp = proposed[proposed['slopes'] < 0.1].sum()['tp_soft']
fp = proposed[proposed['slopes'] < 0.1].sum()['fp_soft']
fn = proposed[proposed['slopes'] < 0.1].sum()['fn_soft']
error = proposed[proposed['slopes'] < 0.1].mean()['error']
count = proposed[proposed['slopes'] < 0.1].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Below 0.1 slope: ", precision, recall, error, count)

KeyError: 'slopes'

In [17]:
tp = proposed[proposed['slopes'] >= 0.1].sum()['tp_soft']
fp = proposed[proposed['slopes'] >= 0.1].sum()['fp_soft']
fn = proposed[proposed['slopes'] >= 0.1].sum()['fn_soft']
error = proposed[proposed['slopes'] >= 0.1].mean()['error']
count = proposed[proposed['slopes'] >= 0.1].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Above 0.1 slope: ", precision, recall, error, count)

KeyError: 'slopes'

# Canopy metrics

In [9]:
tp = proposed[proposed['true'] < 20].sum()['tp_soft']
fp = proposed[proposed['true'] < 20].sum()['fp_soft']
fn = proposed[proposed['true'] < 20].sum()['fn_soft']
error = proposed[proposed['true'] < 20].mean()['error']
count = proposed[proposed['true'] < 20].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Below 20%: ", precision, recall, error, count)

Below 20%:  0.5980007840062721 0.661391719054845 3.9048896008915515 114856


In [10]:
tp = proposed[proposed['true'].between(20, 40)].sum()['tp_soft']
fp = proposed[proposed['true'].between(20, 40)].sum()['fp_soft']
fn = proposed[proposed['true'].between(20, 40)].sum()['fn_soft']

error = proposed[proposed['true'].between(20, 40)].mean()['error']
count = proposed[proposed['true'].between(20, 40)].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Between 20 and 40%: ", precision, recall, error, count)

Between 20 and 40%:  0.8791262933158154 0.8632478632478633 14.681007538150398 21756


In [11]:
tp = proposed[proposed['true'].between(0, 40)].sum()['tp_soft']
fp = proposed[proposed['true'].between(0, 40)].sum()['fp_soft']
fn = proposed[proposed['true'].between(0, 40)].sum()['fn_soft']

error = proposed[proposed['true'].between(0, 40)].mean()['error']
count = proposed[proposed['true'].between(0, 40)].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Between 0 and 40%: ", precision, recall, error, count)

Between 0 and 40%:  0.7509605933339291 0.7771407434806732 5.6210288993646245 136612


In [12]:
tp = proposed[proposed['true'].between(40, 60)].sum()['tp_soft']
fp = proposed[proposed['true'].between(40, 60)].sum()['fp_soft']
fn = proposed[proposed['true'].between(40, 60)].sum()['fn_soft']

error = proposed[proposed['true'].between(40, 60)].mean()['error']
count = proposed[proposed['true'].between(40, 60)].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Between 40 and 60%: ", precision, recall, error, count)

Between 40 and 60%:  0.9301621236415464 0.9048526863084922 17.00968523002421 11564


In [13]:
tp = proposed[proposed['true'].between(60, 101)].sum()['tp_soft']
fp = proposed[proposed['true'].between(60, 101)].sum()['fp_soft']
fn = proposed[proposed['true'].between(60, 101)].sum()['fn_soft']

error = proposed[proposed['true'].between(60, 101)].mean()['error']
count = proposed[proposed['true'].between(60, 101)].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Between 60 and 100%: ", precision, recall, error, count)

Between 60 and 100%:  0.9931259104076989 0.9705223761236045 8.487672525657661 67816


In [14]:
proposed = pd.read_csv("../../data/metrics/proposed.csv")
print(proposed.mean()['soft_prec'], proposed.mean()['soft_rec'])

0.869002896348181 0.8620703300158101


# Load Random forest predictions

In [17]:
proposed = pd.read_csv("../../data/metrics/rf.csv")
print(proposed.mean()['soft_prec'], proposed.mean()['soft_rec'])

0.7993129320280367 0.7739658017311516


In [19]:
proposed = pd.read_csv("../../data/metrics/rf-sample.csv")
proposed = proposed.drop(['Unnamed: 0'], axis = 1)
proposed = proposed.join(continents, how = 'inner')
proposed.head(5)

Unnamed: 0.1,true,pred,f1_hard,error,tp,fp,fn,tp_soft,fp_soft,fn_soft,group,model,Unnamed: 0,lats,longs,id,CONTINENT,clouds,slopes
0,29.081633,37.755102,0.70229,8.673469,46.0,28,11,56.0,8.0,1.0,20,Random Forest,0,6.552393,37.070625,135505799,Africa,0.552239,0.223983
1,100.0,16.326531,0.280702,83.673469,32.0,0,164,110.0,0.0,86.0,90,Random Forest,1,6.495981,35.55612,135505800,Africa,0.636364,0.086151
2,11.22449,3.061224,0.071429,8.163265,1.0,5,21,3.0,4.0,19.0,10,Random Forest,2,8.384319,38.972473,135505801,Africa,0.307692,0.053171
3,3.061224,50.0,0.096154,46.938776,5.0,93,1,6.0,74.0,0.0,0,Random Forest,3,7.217695,33.701621,135505803,Africa,0.688525,0.041682
4,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0,Random Forest,4,9.565499,40.256333,135505804,Africa,0.24,0.048161


In [20]:
means_l = []
means = []
means_u = []
proposed['ccgroup'] = 0
proposed['ccgroup'][(proposed['group'] >= 30) & (proposed['group'] <= 70)] = 1
for i in range(0, 100, 10):
    group = proposed[proposed['group'] == i]
    ci = mean_confidence_interval(group['error'], 0.95)
    if i >= 30 and i <= 70:
        means_l.append(ci[0])
        means_u.append(ci[2])
        means.append(ci[1])
        print(i, ci)
print(np.mean(means_l), np.mean(means), np.mean(means_u))

print(mean_confidence_interval(proposed[proposed['ccgroup'] == 1]['error']))

30 (19.209183673469386, 15.04886414595278, 23.369503200985992)
40 (22.871217452498243, 17.401675805580712, 28.340759099415774)
50 (25.799319727891156, 20.47065463292259, 31.127984822859723)
60 (25.16458196181698, 17.692327047060818, 32.63683687657314)
70 (20.69387755102041, 13.303011188824048, 28.084743913216773)
22.747636073339233 16.78330656406819 28.711965582610283
(22.60039499670836, 20.05907216565266, 25.14171782776406)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [21]:
mean_confidence_interval(proposed['error'], 0.95)

(10.674505522970748, 9.726780665897085, 11.62223038004441)

In [22]:
sums = proposed.groupby("CONTINENT").sum()
for i in ['Africa', 'Asia', "Australia", "Europe", "North America", "South America"]:
    tp = sums[sums.index == i]['tp_soft']
    fp = sums[sums.index == i]['fp_soft']
    fn = sums[sums.index == i]['fn_soft']
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    print(i, float(precision), float(recall))

Africa 0.9101709649376992 0.8501662671100456
Asia 0.9038635677633565 0.8764817795990049
Australia 0.9214785423410253 0.9088568486096807
Europe 0.8456966238706609 0.9328612640965119
North America 0.9161523122210616 0.9599044871157099
South America 0.8334108887854816 0.947869806827203


In [23]:
tp = proposed[proposed['true'] < 20].sum()['tp_soft']
fp = proposed[proposed['true'] < 20].sum()['fp_soft']
fn = proposed[proposed['true'] < 20].sum()['fn_soft']
error = proposed[proposed['true'] < 20].mean()['error']
count = proposed[proposed['true'] < 20].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Below 20%: ", precision, recall, error, count)

Below 20%:  0.2740619902120718 0.47344461305007585 7.2393352571011365 115052


In [24]:
tp = proposed[proposed['true'].between(20, 40)].sum()['tp_soft']
fp = proposed[proposed['true'].between(20, 40)].sum()['fp_soft']
fn = proposed[proposed['true'].between(20, 40)].sum()['fn_soft']

error = proposed[proposed['true'].between(20, 40)].mean()['error']
count = proposed[proposed['true'].between(20, 40)].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Below 40%: ", precision, recall, error, count)

Below 40%:  0.7921177266576455 0.7552007740686986 17.96745725317154 21756


In [25]:
tp = proposed[proposed['true'].between(40, 60)].sum()['tp_soft']
fp = proposed[proposed['true'].between(40, 60)].sum()['fp_soft']
fn = proposed[proposed['true'].between(40, 60)].sum()['fn_soft']

error = proposed[proposed['true'].between(40, 60)].mean()['error']
count = proposed[proposed['true'].between(40, 60)].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("40 - 60 %: ", precision, recall, error, count)

40 - 60 %:  0.8788853161843515 0.8526863084922011 24.36008301625735 11564


In [26]:
tp = proposed[proposed['true'] >= 60].sum()['tp_soft']
fp = proposed[proposed['true'] >= 60].sum()['fp_soft']
fn = proposed[proposed['true'] >= 60].sum()['fn_soft']
error = proposed[proposed['true'] >= 60].mean()['error']
count = proposed[proposed['true'] >= 60].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Greater than 60%: ", precision, recall, error, count)

Greater than 60%:  0.9921236390111526 0.9589904353667509 11.829066886870354 67816


In [29]:
tp = proposed[proposed['clouds'] < 0.75].sum()['tp_soft']
fp = proposed[proposed['clouds'] < 0.75].sum()['fp_soft']
fn = proposed[proposed['clouds'] < 0.75].sum()['fn_soft']
error = proposed[proposed['clouds'] < 0.75].mean()['error']
count = proposed[proposed['clouds'] < 0.75].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Below 0.75 cloud: ", precision, recall, error, count)

Below 0.75 cloud:  0.899151446849415 0.9016619577862722 10.545589203423303 170128


In [27]:
tp = proposed[proposed['clouds'] >= 0.75].sum()['tp_soft']
fp = proposed[proposed['clouds'] >= 0.75].sum()['fp_soft']
fn = proposed[proposed['clouds'] >= 0.75].sum()['fn_soft']
error = proposed[proposed['clouds'] >= 0.75].mean()['error']
count = proposed[proposed['clouds'] >= 0.75].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Above 0.75 cloud: ", precision, recall, error, count)

Above 0.75 cloud:  0.8935887611749681 0.9237431347697508 11.150673035171515 46060


In [30]:
tp = proposed[proposed['slopes'] < 0.1].sum()['tp_soft']
fp = proposed[proposed['slopes'] < 0.1].sum()['fp_soft']
fn = proposed[proposed['slopes'] < 0.1].sum()['fn_soft']
error = proposed[proposed['slopes'] < 0.1].mean()['error']
count = proposed[proposed['slopes'] < 0.1].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Below 0.1 slope: ", precision, recall, error, count)

Above 0.1 slope:  0.893619174523926 0.8911390701742582 10.68934240362812 176400


In [28]:
tp = proposed[proposed['slopes'] >= 0.1].sum()['tp_soft']
fp = proposed[proposed['slopes'] >= 0.1].sum()['fp_soft']
fn = proposed[proposed['slopes'] >= 0.1].sum()['fn_soft']
error = proposed[proposed['slopes'] >= 0.1].mean()['error']
count = proposed[proposed['slopes'] >= 0.1].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Above 0.1 slope: ", precision, recall, error, count)

Above 0.1 slope:  0.9097740715601668 0.9547674773582986 10.608726249120338 39788


In [44]:
proposed = pd.read_csv("../../data/metrics/rf.csv")
print(proposed.mean()['soft_prec'], proposed.mean()['soft_rec'])

0.7993129320280367 0.7739658017311516


# Load U-Net forest predictions

In [46]:
proposed = pd.read_csv("../../data/metrics/unet-sample.csv")
proposed = proposed.drop(['Unnamed: 0'], axis = 1)
proposed = proposed.join(continents, how = 'inner')
proposed.head(5)

Unnamed: 0.1,true,pred,f1_hard,error,tp,fp,fn,tp_soft,fp_soft,fn_soft,group,model,Unnamed: 0,lats,longs,id,CONTINENT,clouds,slopes
0,29.081633,2.040816,0.065574,27.040816,2.0,2,55,9.0,0.0,48.0,20,U-net,0,6.552393,37.070625,135505799,Africa,0.552239,0.223983
1,100.0,0.0,0.0,100.0,0.0,0,196,0.0,0.0,196.0,90,U-net,1,6.495981,35.55612,135505800,Africa,0.636364,0.086151
2,11.22449,0.0,0.0,11.22449,0.0,0,22,0.0,0.0,22.0,10,U-net,2,8.384319,38.972473,135505801,Africa,0.307692,0.053171
3,3.061224,0.0,0.0,3.061224,0.0,0,6,0.0,0.0,6.0,0,U-net,3,7.217695,33.701621,135505803,Africa,0.688525,0.041682
4,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0,U-net,4,9.565499,40.256333,135505804,Africa,0.24,0.048161


In [47]:
means_l = []
means = []
means_u = []
proposed['ccgroup'] = 0
proposed['ccgroup'][(proposed['group'] >= 30) & (proposed['group'] <= 70)] = 1
for i in range(0, 100, 10):
    group = proposed[proposed['group'] == i]
    ci = mean_confidence_interval(group['error'], 0.95)
    if i >= 30 and i <= 70:
        means_l.append(ci[0])
        means_u.append(ci[2])
        means.append(ci[1])
        print(i, ci)
print(np.mean(means_l), np.mean(means), np.mean(means_u))

print(mean_confidence_interval(proposed[proposed['ccgroup'] == 1]['error']))

30 (22.44897959183674, 17.379012066109325, 27.518947117564153)
40 (35.063335679099225, 28.84918042890349, 41.277490929294956)
50 (29.251700680272112, 23.098195550089848, 35.405205810454376)
60 (31.797235023041477, 25.74165908138921, 37.852810964693745)
70 (28.326530612244895, 20.757763018099073, 35.89529820639072)
29.37755631729889 23.16516202891819 35.58995060567959
(28.94338380513496, 26.24811525587619, 31.63865235439373)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [48]:
mean_confidence_interval(proposed['error'], 0.95)

(11.333191481488333, 10.169887283995882, 12.496495678980784)

In [49]:
sums = proposed.groupby("CONTINENT").sum()
for i in ['Africa', 'Asia', "Australia", "Europe", "North America", "South America"]:
    tp = sums[sums.index == i]['tp_soft']
    fp = sums[sums.index == i]['fp_soft']
    fn = sums[sums.index == i]['fn_soft']
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    print(i, float(precision), float(recall))

Africa 0.9254333669632211 0.7802954141211044
Asia 0.901172280753821 0.888775062198156
Australia 0.9290932531155995 0.8906282183316169
Europe 0.8641059924638386 0.9322056123787045
North America 0.9104266328224153 0.9278181275494976
South America 0.9266632417159003 0.9546176237099762


In [50]:
tp = proposed[proposed['true'] < 20].sum()['tp_soft']
fp = proposed[proposed['true'] < 20].sum()['fp_soft']
fn = proposed[proposed['true'] < 20].sum()['fn_soft']
error = proposed[proposed['true'] < 20].mean()['error']
count = proposed[proposed['true'] < 20].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Below 20%: ", precision, recall, error, count)

Below 20%:  0.3298429319371728 0.3550834597875569 6.084205402774397 115052


In [51]:
tp = proposed[proposed['true'].between(20, 40)].sum()['tp_soft']
fp = proposed[proposed['true'].between(20, 40)].sum()['fp_soft']
fn = proposed[proposed['true'].between(20, 40)].sum()['fn_soft']

error = proposed[proposed['true'].between(20, 40)].mean()['error']
count = proposed[proposed['true'].between(20, 40)].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Between 20 and 40%: ", precision, recall, error, count)

Between 20 and 40%:  0.7590382902938557 0.6873084986292534 24.08990623276338 21756


In [55]:
tp = proposed[proposed['true'].between(40, 60)].sum()['tp_soft']
fp = proposed[proposed['true'].between(40, 60)].sum()['fp_soft']
fn = proposed[proposed['true'].between(40, 60)].sum()['fn_soft']

error = proposed[proposed['true'].between(40, 60)].mean()['error']
count = proposed[proposed['true'].between(40, 60)].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("40 - 60 %: ", precision, recall, error, count)

40 - 60 %:  0.8250167448091091 0.853899480069324 32.10826703562781 11564


In [56]:
tp = proposed[proposed['true'] >= 60].sum()['tp_soft']
fp = proposed[proposed['true'] >= 60].sum()['fp_soft']
fn = proposed[proposed['true'] >= 60].sum()['fn_soft']
error = proposed[proposed['true'] >= 60].mean()['error']
count = proposed[proposed['true'] >= 60].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Greater than 60%: ", precision, recall, error, count)

Greater than 60%:  0.9863413810568968 0.934407088704776 12.603220478943024 67816


In [52]:
tp = proposed[proposed['clouds'] >= 0.75].sum()['tp_soft']
fp = proposed[proposed['clouds'] >= 0.75].sum()['fp_soft']
fn = proposed[proposed['clouds'] >= 0.75].sum()['fn_soft']
error = proposed[proposed['clouds'] >= 0.75].mean()['error']
count = proposed[proposed['clouds'] >= 0.75].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Above 0.75 cloud: ", precision, recall, error, count)

Above 0.75 cloud:  0.90844327176781 0.90911491339248 10.514546244029527 46060


In [53]:
tp = proposed[proposed['slopes'] >= 0.1].sum()['tp_soft']
fp = proposed[proposed['slopes'] >= 0.1].sum()['fp_soft']
fn = proposed[proposed['slopes'] >= 0.1].sum()['fn_soft']
error = proposed[proposed['slopes'] >= 0.1].mean()['error']
count = proposed[proposed['slopes'] >= 0.1].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Above 0.1 slope: ", precision, recall, error, count)

Above 0.1 slope:  0.9299184373214193 0.9107560801872392 11.870413189906506 39788


In [54]:
tp = proposed[proposed['slopes'] < 0.1].sum()['tp_soft']
fp = proposed[proposed['slopes'] < 0.1].sum()['fp_soft']
fn = proposed[proposed['slopes'] < 0.1].sum()['fn_soft']
error = proposed[proposed['slopes'] < 0.1].mean()['error']
count = proposed[proposed['slopes'] < 0.1].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Below 0.1 slope: ", precision, recall, error, count)

Below 0.1 slope:  0.9082675911840662 0.863688353629819 11.212018140589569 176400


In [57]:
tp = proposed[proposed['clouds'] < 0.75].sum()['tp_soft']
fp = proposed[proposed['clouds'] < 0.75].sum()['fp_soft']
fn = proposed[proposed['clouds'] < 0.75].sum()['fn_soft']
error = proposed[proposed['clouds'] < 0.75].mean()['error']
count = proposed[proposed['clouds'] < 0.75].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Below 0.75 cloud: ", precision, recall, error, count)

Below 0.75 cloud:  0.9155420266394524 0.8647664949310288 11.554829304993886 170128


In [22]:
proposed = pd.read_csv("../../data/metrics/unet.csv")
print(proposed.mean()['soft_prec'], proposed.mean()['soft_rec'])

0.772557401796037 0.7363174550266559


# Load SVM predictions

In [31]:
proposed = pd.read_csv("../../data/metrics/SVM-sample.csv")
proposed = proposed.drop(['Unnamed: 0'], axis = 1)
proposed = proposed.join(continents, how = 'inner')
proposed.head(5)



Unnamed: 0.1,true,pred,f1_hard,error,tp,fp,fn,tp_soft,fp_soft,fn_soft,group,model,Unnamed: 0,lats,longs,id,CONTINENT,clouds,slopes
0,29.081633,41.836735,0.661871,12.755102,46.0,36,11,55.0,9.0,2.0,20,SVM,0,6.552393,37.070625,135505799,Africa,0.552239,0.223983
1,100.0,87.244898,0.93188,12.755102,171.0,0,25,195.0,0.0,1.0,90,SVM,1,6.495981,35.55612,135505800,Africa,0.636364,0.086151
2,11.22449,0.0,0.0,11.22449,0.0,0,22,0.0,0.0,22.0,10,SVM,2,8.384319,38.972473,135505801,Africa,0.307692,0.053171
3,3.061224,100.0,0.059406,96.938776,6.0,190,0,6.0,162.0,0.0,0,SVM,3,7.217695,33.701621,135505803,Africa,0.688525,0.041682
4,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0,SVM,4,9.565499,40.256333,135505804,Africa,0.24,0.048161


In [32]:
sums = proposed.groupby("CONTINENT").sum()
for i in ['Africa', 'Asia', "Australia", "Europe", "North America", "South America"]:
    tp = sums[sums.index == i]['tp_soft']
    fp = sums[sums.index == i]['fp_soft']
    fn = sums[sums.index == i]['fn_soft']
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    print(i, float(precision), float(recall))

Africa 0.791926307233812 0.7892381870781099
Asia 0.8606987554355976 0.8400409776086638
Australia 0.9462399495506858 0.6181256436663234
Europe 0.721084107392798 0.740397177946172
North America 0.8756900726392252 0.920863675714213
South America 0.7171118810774562 0.8471085291787694


In [33]:
tp = proposed[proposed['clouds'] < 0.75].sum()['tp_soft']
fp = proposed[proposed['clouds'] < 0.75].sum()['fp_soft']
fn = proposed[proposed['clouds'] < 0.75].sum()['fn_soft']
error = proposed[proposed['clouds'] < 0.75].mean()['error']
count = proposed[proposed['clouds'] < 0.75].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Below 0.75 cloud: ", precision, recall, error, count)

Below 0.75 cloud:  0.8171858658940113 0.7938971984409874 17.674927113702623 170128


In [34]:
tp = proposed[proposed['clouds'] >= 0.75].sum()['tp_soft']
fp = proposed[proposed['clouds'] >= 0.75].sum()['fp_soft']
fn = proposed[proposed['clouds'] >= 0.75].sum()['fn_soft']
error = proposed[proposed['clouds'] >= 0.75].mean()['error']
count = proposed[proposed['clouds'] >= 0.75].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Above 0.75 cloud: ", precision, recall, error, count)

Above 0.75 cloud:  0.8247766711161311 0.8586317477284875 16.79114198871038 46060


In [35]:
tp = proposed[proposed['true'] < 20].sum()['tp_soft']
fp = proposed[proposed['true'] < 20].sum()['fp_soft']
fn = proposed[proposed['true'] < 20].sum()['fn_soft']
error = proposed[proposed['true'] < 20].mean()['error']
count = proposed[proposed['true'] < 20].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Below 20%: ", precision, recall, error, count)

Below 20%:  0.13721286370597244 0.3940193491644679 12.968192370326738 115444


In [36]:
tp = proposed[proposed['true'].between(20, 40)].sum()['tp_soft']
fp = proposed[proposed['true'].between(20, 40)].sum()['fp_soft']
fn = proposed[proposed['true'].between(20, 40)].sum()['fn_soft']

error = proposed[proposed['true'].between(20, 40)].mean()['error']
count = proposed[proposed['true'].between(20, 40)].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Between 20 and 40%: ", precision, recall, error, count)

Between 20 and 40%:  0.7213085764809902 0.6640078137717728 26.205936920222637 21560


In [37]:
tp = proposed[proposed['true'].between(40, 60)].sum()['tp_soft']
fp = proposed[proposed['true'].between(40, 60)].sum()['fp_soft']
fn = proposed[proposed['true'].between(40, 60)].sum()['fn_soft']

error = proposed[proposed['true'].between(40, 60)].mean()['error']
count = proposed[proposed['true'].between(40, 60)].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Between 20 and 40%: ", precision, recall, error, count)

Between 20 and 40%:  0.8595428794686463 0.7365249414127888 27.868852459016395 11956


In [38]:
tp = proposed[proposed['true'] >= 60].sum()['tp_soft']
fp = proposed[proposed['true'] >= 60].sum()['fp_soft']
fn = proposed[proposed['true'] >= 60].sum()['fn_soft']
error = proposed[proposed['true'] >= 60].mean()['error']
count = proposed[proposed['true'] >= 60].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Greater than 60%: ", precision, recall, error, count)

Greater than 60%:  0.9904948954067926 0.8610730795870045 20.603022550127925 67228


In [41]:
tp = proposed[proposed['slopes'] < 0.1].sum()['tp_soft']
fp = proposed[proposed['slopes'] < 0.1].sum()['fp_soft']
fn = proposed[proposed['slopes'] < 0.1].sum()['fn_soft']
error = proposed[proposed['slopes'] < 0.1].mean()['error']
count = proposed[proposed['slopes'] < 0.1].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Below 0.1 slope: ", precision, recall, error, count)

Below 0.1 slope:  0.8046831183398766 0.7727731771359322 17.636621315192745 176400


In [40]:
tp = proposed[proposed['slopes'] >= 0.1].sum()['tp_soft']
fp = proposed[proposed['slopes'] >= 0.1].sum()['fp_soft']
fn = proposed[proposed['slopes'] >= 0.1].sum()['fn_soft']
error = proposed[proposed['slopes'] >= 0.1].mean()['error']
count = proposed[proposed['slopes'] >= 0.1].count()['error'] * 196



precision = tp / (tp + fp)
recall = tp / (tp + fn)
print("Above 0.1 slope: ", precision, recall, error, count)

Above 0.1 slope:  0.8586919851559112 0.9214418700868846 16.8216547702825 39788
