In [1]:
import pandas as pd
import os
import numpy as np
from numpy import dot
from numpy.linalg import norm
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

## Reading Grid Summary of All locations in GeoNames

In [2]:
dir_geonames_grids = os.path.dirname(os.getcwd())+'\\data\\grids-100sqkm-admin0-natural-earth\\grids_summary_GeoNames_poi.csv'

In [3]:
df_geonames_grids = pd.read_csv(dir_geonames_grids)
df_geonames_grids.head()

Unnamed: 0,OID_,Join_Count,TARGET_FID,PageName,PageNumber,featurecla,scalerank,LABELRANK,SOVEREIGNT,SOV_A3,...,FCLASS_PL,FCLASS_GR,FCLASS_IT,FCLASS_NL,FCLASS_SE,FCLASS_BD,FCLASS_UA,Shape_Length,Shape_Area,Point_Count
0,1,0,1456,A1456,1,,,,,,...,,,,,,,,40000.0,100000000.0,1
1,2,0,1457,A1457,2,,,,,,...,,,,,,,,40000.0,100000000.0,0
2,3,0,1458,A1458,3,,,,,,...,,,,,,,,40000.0,100000000.0,0
3,4,0,1459,A1459,4,,,,,,...,,,,,,,,40000.0,100000000.0,0
4,5,0,1461,A1461,5,,,,,,...,,,,,,,,40000.0,100000000.0,1


In [4]:
## Only columns that are needed for measuring representation biases are kept.
## The column Point_Count stands for the number of locations within the grid with a TARGET_FID.
df_geonames_grids = df_geonames_grids[['OID_','TARGET_FID','SOVEREIGNT','Point_Count']]
df_geonames_grids.head()

Unnamed: 0,OID_,TARGET_FID,SOVEREIGNT,Point_Count
0,1,1456,,1
1,2,1457,,0
2,3,1458,,0
3,4,1459,,0
4,5,1461,,1


In [5]:
## The same preprocessing is carried out for all the other grid summary datasets below.

## Measuring Spatial (Diversity) Misalignment between GeoWiki and GeoNames

In [6]:
## all comments listed in this section can also be applied to all the other spatial (diversity) misalignment calculations

In [7]:
dir_geowiki_grids = os.path.dirname(os.getcwd())+'\\data\\grids-100sqkm-admin0-natural-earth\\grids_summary_GeoWiki_poi.csv'

In [8]:
df_geowiki_grids = pd.read_csv(dir_geowiki_grids)
df_geowiki_grids.head()

Unnamed: 0,OID_,Join_Count,TARGET_FID,PageName,PageNumber,featurecla,scalerank,LABELRANK,SOVEREIGNT,SOV_A3,...,FCLASS_PL,FCLASS_GR,FCLASS_IT,FCLASS_NL,FCLASS_SE,FCLASS_BD,FCLASS_UA,Shape_Length,Shape_Area,Point_Count
0,1,0,1456,A1456,1,,,,,,...,,,,,,,,40000.0,100000000.0,0
1,2,0,1457,A1457,2,,,,,,...,,,,,,,,40000.0,100000000.0,0
2,3,0,1458,A1458,3,,,,,,...,,,,,,,,40000.0,100000000.0,0
3,4,0,1459,A1459,4,,,,,,...,,,,,,,,40000.0,100000000.0,0
4,5,0,1461,A1461,5,,,,,,...,,,,,,,,40000.0,100000000.0,0


In [9]:
df_geowiki_grids = df_geowiki_grids[['OID_','TARGET_FID','SOVEREIGNT','Point_Count']]
df_geowiki_grids.head()

Unnamed: 0,OID_,TARGET_FID,SOVEREIGNT,Point_Count
0,1,1456,,0
1,2,1457,,0
2,3,1458,,0
3,4,1459,,0
4,5,1461,,0


In [10]:
## admin0_list is a list containing all Natural Earth Admin 0 regions that are included in the geowiki grid summary,
admin0_list = list(df_geowiki_grids['SOVEREIGNT'].unique())

In [11]:
## merge two grid summary datasets for spatial (diversity) misalignment calculation,
df_geowiki_geonames_grids_merged = df_geonames_grids.merge(df_geowiki_grids, on=['OID_','SOVEREIGNT','TARGET_FID'])
df_geowiki_geonames_grids_merged.head()

Unnamed: 0,OID_,TARGET_FID,SOVEREIGNT,Point_Count_x,Point_Count_y
0,1,1456,,1,0
1,2,1457,,0,0
2,3,1458,,0,0
3,4,1459,,0,0
4,5,1461,,1,0


In [12]:
## calculate spatial misalignemnt
sm_geowiki = {}
for admin0 in admin0_list:
    df_geowiki_geonames_grids_merged_admin0 = df_geowiki_geonames_grids_merged[df_geowiki_geonames_grids_merged['SOVEREIGNT'] == admin0]
    gc = list(df_geowiki_geonames_grids_merged_admin0['Point_Count_x'].values)
    gp = list(df_geowiki_geonames_grids_merged_admin0['Point_Count_y'].values)
    if (np.isnan(1 - dot(gp, gc)/(norm(gp)*norm(gc))) == False):
        sm_geowiki[admin0] = 1 - dot(gp, gc)/(norm(gp)*norm(gc))
    else:
        continue

sm_geowiki = sorted(sm_geowiki.items(), key = lambda kv:(kv[1], kv[0]))

In [13]:
sm_geowiki

[('Barbados', 0.0),
 ('Federated States of Micronesia', 0.0),
 ('Saint Lucia', 0.0),
 ('Comoros', 0.009169831955701091),
 ('Luxembourg', 0.02008656540351006),
 ('Slovenia', 0.022833663200008014),
 ('Northern Cyprus', 0.028014686549853396),
 ('United Kingdom', 0.04773332815928455),
 ('Cyprus', 0.05395764881467724),
 ('Cabo Verde', 0.0618173443358514),
 ('Slovakia', 0.06615253967160784),
 ('Czechia', 0.07045340599556493),
 ('Estonia', 0.07732695663432188),
 ('São Tomé and Principe', 0.08051068557917795),
 ('Moldova', 0.09396088741919872),
 ('Poland', 0.10432917086463656),
 ('Mauritius', 0.1336966723668891),
 ('Nepal', 0.14045200096284582),
 ('Spain', 0.15661301694773155),
 ('Trinidad and Tobago', 0.15842393201886362),
 ('Italy', 0.16715368566494226),
 ('Costa Rica', 0.16783548792711478),
 ('Netherlands', 0.17349858130962958),
 ('Hungary', 0.1761168268096871),
 ('France', 0.19763911060240957),
 ('Lebanon', 0.20902390733990417),
 ('Azerbaijan', 0.2189315709170726),
 ('Jamaica', 0.220471175

In [14]:
## get summary statistics of sm_geowiki
pd.Series([sm[1] for sm in sm_geowiki]).describe()

count    181.000000
mean       0.471373
std        0.218703
min        0.000000
25%        0.309278
50%        0.494639
75%        0.628910
max        0.865160
dtype: float64

In [15]:
sdm_geowiki = {}
for admin0 in admin0_list:
    df_geowiki_geonames_grids_merged_admin0 = df_geowiki_geonames_grids_merged[df_geowiki_geonames_grids_merged['SOVEREIGNT'] == admin0]
    gc = list(df_geowiki_geonames_grids_merged_admin0['Point_Count_x'].values)
    gp = list(df_geowiki_geonames_grids_merged_admin0['Point_Count_y'].values)
    gc /= np.sum(gc)
    gp /= np.sum(gp)
    sdi_p = 0
    for i in range(len(gp)):
        if gp[i] != 0:
            sdi_p -= gp[i]*np.log(gp[i])
    sdi_c = 0
    for i in range(len(gc)):
        if gc[i] != 0:
            sdi_c -= gc[i]*np.log(gc[i])
    if (max(sdi_p, sdi_c) == 0):
        continue
    elif (np.isnan((sdi_p-sdi_c)/max(sdi_p, sdi_c)) == False):
        sdm_geowiki[admin0] = (sdi_p-sdi_c)/max(sdi_p, sdi_c)
    else:
        continue

sdm_geowiki = sorted(sdm_geowiki.items(), key = lambda kv:(kv[1], kv[0]))

In [16]:
sdm_geowiki

[('Kashmir', -1.0),
 ('Kuwait', -0.6231406268051903),
 ('Western Sahara', -0.6116617632683333),
 ('The Bahamas', -0.5949032926407826),
 ('Mauritania', -0.5031359167936578),
 ('Chad', -0.4512511428131118),
 ('Republic of the Congo', -0.4489557868799401),
 ('Sudan', -0.44075278027908316),
 ('Turkmenistan', -0.42870762749452035),
 ('South Sudan', -0.4094576999747741),
 ('Mozambique', -0.4034487063079176),
 ('Oman', -0.39845617180118315),
 ('Madagascar', -0.39488095457185984),
 ('Angola', -0.3920090306960699),
 ('Kazakhstan', -0.3900324407987438),
 ('Sierra Leone', -0.3882137965493693),
 ('Democratic Republic of the Congo', -0.387402485571611),
 ('Laos', -0.3866074927622442),
 ('Somaliland', -0.38512340485237495),
 ('Saudi Arabia', -0.3824381245374625),
 ('Zambia', -0.38052525991574276),
 ('Venezuela', -0.3797528551252116),
 ('Central African Republic', -0.378800378798432),
 ('Guyana', -0.37785963757059743),
 ('Guinea-Bissau', -0.3754470124012201),
 ('Namibia', -0.37475705118626856),
 ('Ga

In [17]:
## get summary statistics of sdm_geowiki
pd.Series([sdm[1] for sdm in sdm_geowiki]).describe()

count    178.000000
mean      -0.223448
std        0.146347
min       -1.000000
25%       -0.303085
50%       -0.216342
75%       -0.143549
max        0.428039
dtype: float64

## Measuring Spatial (Diversity) Misalignment between WikToR and GeoNames

In [18]:
dir_wiktor_grids = os.path.dirname(os.getcwd())+'\\data\\grids-100sqkm-admin0-natural-earth\\grids_summary_WikToR_poi.csv'

In [19]:
df_wiktor_grids = pd.read_csv(dir_wiktor_grids)
df_wiktor_grids.head()

Unnamed: 0,OID_,Join_Count,TARGET_FID,PageName,PageNumber,featurecla,scalerank,LABELRANK,SOVEREIGNT,SOV_A3,...,FCLASS_PL,FCLASS_GR,FCLASS_IT,FCLASS_NL,FCLASS_SE,FCLASS_BD,FCLASS_UA,Shape_Length,Shape_Area,Point_Count
0,1,0,1456,A1456,1,,,,,,...,,,,,,,,40000.0,100000000.0,0
1,2,0,1457,A1457,2,,,,,,...,,,,,,,,40000.0,100000000.0,0
2,3,0,1458,A1458,3,,,,,,...,,,,,,,,40000.0,100000000.0,0
3,4,0,1459,A1459,4,,,,,,...,,,,,,,,40000.0,100000000.0,0
4,5,0,1461,A1461,5,,,,,,...,,,,,,,,40000.0,100000000.0,0


In [20]:
df_wiktor_grids = df_wiktor_grids[['OID_','TARGET_FID','SOVEREIGNT','Point_Count']]
df_wiktor_grids.head()

Unnamed: 0,OID_,TARGET_FID,SOVEREIGNT,Point_Count
0,1,1456,,0
1,2,1457,,0
2,3,1458,,0
3,4,1459,,0
4,5,1461,,0


In [21]:
admin0_list = list(df_wiktor_grids['SOVEREIGNT'].unique())

In [22]:
df_wiktor_geonames_grids_merged = df_geonames_grids.merge(df_wiktor_grids, on=['OID_','SOVEREIGNT','TARGET_FID'])
df_wiktor_geonames_grids_merged.head()

Unnamed: 0,OID_,TARGET_FID,SOVEREIGNT,Point_Count_x,Point_Count_y
0,1,1456,,1,0
1,2,1457,,0,0
2,3,1458,,0,0
3,4,1459,,0,0
4,5,1461,,1,0


In [23]:
sm_wiktor = {}
for admin0 in admin0_list:
    df_wiktor_geonames_grids_merged_admin0 = df_wiktor_geonames_grids_merged[df_wiktor_geonames_grids_merged['SOVEREIGNT'] == admin0]
    gc = list(df_wiktor_geonames_grids_merged_admin0['Point_Count_x'].values)
    gp = list(df_wiktor_geonames_grids_merged_admin0['Point_Count_y'].values)
    if (np.isnan(1 - dot(gp, gc)/(norm(gp)*norm(gc))) == False):
        sm_wiktor[admin0] = 1 - dot(gp, gc)/(norm(gp)*norm(gc))
    else:
        continue

sm_wiktor = sorted(sm_wiktor.items(), key = lambda kv:(kv[1], kv[0]))

In [24]:
sm_wiktor

[('Australia', 0.4925566700743349),
 ('El Salvador', 0.4982565185722486),
 ('United Kingdom', 0.5079330828352608),
 ('Hungary', 0.6209303547660514),
 ('Philippines', 0.6520331630934006),
 ('United States of America', 0.6739461461903019),
 ('Israel', 0.7030762400040729),
 ('France', 0.7359596275426319),
 ('Switzerland', 0.7744039092126801),
 ('Argentina', 0.783506566178089),
 ('Lebanon', 0.7844801001497417),
 ('Ireland', 0.8000126012256825),
 ('Cyprus', 0.801960034836821),
 ('Egypt', 0.8034375539249907),
 ('Chile', 0.8035242039563254),
 ('Luxembourg', 0.8035749420295191),
 ('Afghanistan', 0.8080521811211696),
 ('Bhutan', 0.8101149346889834),
 ('Taiwan', 0.8112574017379999),
 ('Nicaragua', 0.8129342562577387),
 ('Uganda', 0.8198756923360152),
 ('Slovenia', 0.825112558123186),
 ('Cuba', 0.8263644925810846),
 ('Costa Rica', 0.8326663729814856),
 ('Iraq', 0.8334747302527087),
 ('Sri Lanka', 0.8340918735169158),
 ('Spain', 0.8382227444695993),
 ('Panama', 0.8421377777851042),
 ('Canada', 0.8

In [25]:
pd.Series([sm[1] for sm in sm_wiktor]).describe()

count    124.000000
mean       0.892736
std        0.096244
min        0.492557
25%        0.866875
50%        0.916329
75%        0.956252
max        1.000000
dtype: float64

In [26]:
sdm_wiktor = {}
for admin0 in admin0_list:
    df_wiktor_geonames_grids_merged_admin0 = df_wiktor_geonames_grids_merged[df_wiktor_geonames_grids_merged['SOVEREIGNT'] == admin0]
    gc = list(df_wiktor_geonames_grids_merged_admin0['Point_Count_x'].values)
    gp = list(df_wiktor_geonames_grids_merged_admin0['Point_Count_y'].values)
    gc /= np.sum(gc)
    gp /= np.sum(gp)
    sdi_p = 0
    for i in range(len(gp)):
        if gp[i] != 0:
            sdi_p -= gp[i]*np.log(gp[i])
    sdi_c = 0
    for i in range(len(gc)):
        if gc[i] != 0:
            sdi_c -= gc[i]*np.log(gc[i])
    if (max(sdi_p, sdi_c) == 0):
        continue
    elif (np.isnan((sdi_p-sdi_c)/max(sdi_p, sdi_c)) == False):
        sdm_wiktor[admin0] = (sdi_p-sdi_c)/max(sdi_p, sdi_c)
    else:
        continue

sdm_wiktor = sorted(sdm_wiktor.items(), key = lambda kv:(kv[1], kv[0]))

In [27]:
sdm_wiktor

[('Albania', -1.0),
 ('Algeria', -1.0),
 ('Belarus', -1.0),
 ('Benin', -1.0),
 ('Bhutan', -1.0),
 ('Bosnia and Herzegovina', -1.0),
 ('Brunei', -1.0),
 ('Croatia', -1.0),
 ('Cyprus', -1.0),
 ('Djibouti', -1.0),
 ('East Timor', -1.0),
 ('Ghana', -1.0),
 ('Guinea', -1.0),
 ('Guyana', -1.0),
 ('Hungary', -1.0),
 ('Kosovo', -1.0),
 ('Lesotho', -1.0),
 ('Liberia', -1.0),
 ('Libya', -1.0),
 ('Luxembourg', -1.0),
 ('Malawi', -1.0),
 ('Montenegro', -1.0),
 ('Mozambique', -1.0),
 ('Namibia', -1.0),
 ('North Macedonia', -1.0),
 ('Qatar', -1.0),
 ('Republic of the Congo', -1.0),
 ('Romania', -1.0),
 ('Slovakia', -1.0),
 ('Somalia', -1.0),
 ('Togo', -1.0),
 ('Trinidad and Tobago', -1.0),
 ('Turkmenistan', -1.0),
 ('Vanuatu', -1.0),
 ('Iran', -0.9242115430174368),
 ('Saudi Arabia', -0.9214006080091494),
 ('United Republic of Tanzania', -0.9161335217942945),
 ('Sweden', -0.9132667455903506),
 ('Zimbabwe', -0.9111344036324205),
 ('Paraguay', -0.9014280389669703),
 ('Gabon', -0.9011757216231875),
 ('P

In [28]:
pd.Series([sdm[1] for sdm in sdm_wiktor]).describe()

count    124.000000
mean      -0.812131
std        0.166701
min       -1.000000
25%       -1.000000
50%       -0.833592
75%       -0.702824
max       -0.261689
dtype: float64

## Measuring Spatial (Diversity) Misalignment between GeoCorpora and GeoNames

In [29]:
dir_geocorpora_grids = os.path.dirname(os.getcwd())+'\\data\\grids-100sqkm-admin0-natural-earth\\grids_summary_GeoCorpora_poi.csv'

In [30]:
df_geocorpora_grids = pd.read_csv(dir_geocorpora_grids)
df_geocorpora_grids.head()

Unnamed: 0,OID_,Join_Count,TARGET_FID,PageName,PageNumber,featurecla,scalerank,LABELRANK,SOVEREIGNT,SOV_A3,...,FCLASS_PL,FCLASS_GR,FCLASS_IT,FCLASS_NL,FCLASS_SE,FCLASS_BD,FCLASS_UA,Shape_Length,Shape_Area,Point_Count
0,1,0,1456,A1456,1,,,,,,...,,,,,,,,40000.0,100000000.0,0
1,2,0,1457,A1457,2,,,,,,...,,,,,,,,40000.0,100000000.0,0
2,3,0,1458,A1458,3,,,,,,...,,,,,,,,40000.0,100000000.0,0
3,4,0,1459,A1459,4,,,,,,...,,,,,,,,40000.0,100000000.0,0
4,5,0,1461,A1461,5,,,,,,...,,,,,,,,40000.0,100000000.0,0


In [31]:
df_geocorpora_grids = df_geocorpora_grids[['OID_','TARGET_FID','SOVEREIGNT','Point_Count']]
df_geocorpora_grids.head()

Unnamed: 0,OID_,TARGET_FID,SOVEREIGNT,Point_Count
0,1,1456,,0
1,2,1457,,0
2,3,1458,,0
3,4,1459,,0
4,5,1461,,0


In [32]:
admin0_list = list(df_geocorpora_grids['SOVEREIGNT'].unique())

In [33]:
df_geocorpora_geonames_grids_merged = df_geonames_grids.merge(df_geocorpora_grids, on=['OID_','SOVEREIGNT','TARGET_FID'])
df_geocorpora_geonames_grids_merged.head()

Unnamed: 0,OID_,TARGET_FID,SOVEREIGNT,Point_Count_x,Point_Count_y
0,1,1456,,1,0
1,2,1457,,0,0
2,3,1458,,0,0
3,4,1459,,0,0
4,5,1461,,1,0


In [34]:
sm_geocorpora = {}
for admin0 in admin0_list:
    df_geocorpora_geonames_grids_merged_admin0 = df_geocorpora_geonames_grids_merged[df_geocorpora_geonames_grids_merged['SOVEREIGNT'] == admin0]
    gc = list(df_geocorpora_geonames_grids_merged_admin0['Point_Count_x'].values)
    gp = list(df_geocorpora_geonames_grids_merged_admin0['Point_Count_y'].values)
    if (np.isnan(1 - dot(gp, gc)/(norm(gp)*norm(gc))) == False):
        sm_geocorpora[admin0] = 1 - dot(gp, gc)/(norm(gp)*norm(gc))
    else:
        continue
        
sm_geocorpora = sorted(sm_geocorpora.items(), key = lambda kv:(kv[1], kv[0]))

In [35]:
sm_geocorpora

[('Greece', 0.35083925393247495),
 ('United Kingdom', 0.38757852137981086),
 ('France', 0.39938106829190745),
 ('Australia', 0.5071440807727251),
 ('Syria', 0.5126939341159864),
 ('Italy', 0.6646059448149255),
 ('United States of America', 0.7045352378950709),
 ('Iraq', 0.7221713886667772),
 ('Jordan', 0.7338413690503784),
 ('Egypt', 0.7384502236613346),
 ('Spain', 0.7596415237187403),
 ('Republic of Serbia', 0.7775027653823088),
 ('Israel', 0.786623989516731),
 ('Kuwait', 0.8047200527442997),
 ('Malaysia', 0.8165033265875271),
 ('Philippines', 0.8248085943546257),
 ('Cyprus', 0.8302514584315609),
 ('Libya', 0.8540500750899789),
 ('Ireland', 0.8556545562413276),
 ('Belgium', 0.8596239775561308),
 ('Costa Rica', 0.8650672422911417),
 ('Lebanon', 0.8704073119265568),
 ('Ukraine', 0.8806574510030248),
 ('Pakistan', 0.8814053045822475),
 ('Qatar', 0.8828732651162372),
 ('Netherlands', 0.8851494069129785),
 ('Bangladesh', 0.8917012486222581),
 ('Liberia', 0.8954797982853817),
 ('Canada', 0.

In [36]:
pd.Series([sm[1] for sm in sm_geocorpora]).describe()

count    110.000000
mean       0.910579
std        0.128534
min        0.350839
25%        0.895627
50%        0.957071
75%        0.987779
max        0.998944
dtype: float64

In [37]:
sdm_geocorpora = {}
for admin0 in admin0_list:
    df_geocorpora_geonames_grids_merged_admin0 = df_geocorpora_geonames_grids_merged[df_geocorpora_geonames_grids_merged['SOVEREIGNT'] == admin0]
    gc = list(df_geocorpora_geonames_grids_merged_admin0['Point_Count_x'].values)
    gp = list(df_geocorpora_geonames_grids_merged_admin0['Point_Count_y'].values)
    gc /= np.sum(gc)
    gp /= np.sum(gp)
    sdi_p = 0
    for i in range(len(gp)):
        if gp[i] != 0:
            sdi_p -= gp[i]*np.log(gp[i])
    sdi_c = 0
    for i in range(len(gc)):
        if gc[i] != 0:
            sdi_c -= gc[i]*np.log(gc[i])
    if (max(sdi_p, sdi_c) == 0):
        continue
    elif (np.isnan((sdi_p-sdi_c)/max(sdi_p, sdi_c)) == False):
        sdm_geocorpora[admin0] = (sdi_p-sdi_c)/max(sdi_p, sdi_c)
    else:
        continue

sdm_geocorpora = sorted(sdm_geocorpora.items(), key = lambda kv:(kv[1], kv[0]))

In [38]:
sdm_geocorpora

[('Albania', -1.0),
 ('Angola', -1.0),
 ('Argentina', -1.0),
 ('Armenia', -1.0),
 ('Azerbaijan', -1.0),
 ('Belarus', -1.0),
 ('Benin', -1.0),
 ('Botswana', -1.0),
 ('Burkina Faso', -1.0),
 ('Colombia', -1.0),
 ('Costa Rica', -1.0),
 ('Croatia', -1.0),
 ('Cuba', -1.0),
 ('Cyprus', -1.0),
 ('Dominican Republic', -1.0),
 ('El Salvador', -1.0),
 ('Ethiopia', -1.0),
 ('Fiji', -1.0),
 ('Ghana', -1.0),
 ('Greece', -1.0),
 ('Guinea', -1.0),
 ('Haiti', -1.0),
 ('Honduras', -1.0),
 ('Hungary', -1.0),
 ('Ivory Coast', -1.0),
 ('Jordan', -1.0),
 ('Kuwait', -1.0),
 ('Lebanon', -1.0),
 ('Malawi', -1.0),
 ('Mongolia', -1.0),
 ('Mozambique', -1.0),
 ('Myanmar', -1.0),
 ('Nicaragua', -1.0),
 ('Niger', -1.0),
 ('Norway', -1.0),
 ('Panama', -1.0),
 ('Peru', -1.0),
 ('Poland', -1.0),
 ('Qatar', -1.0),
 ('Republic of Serbia', -1.0),
 ('Romania', -1.0),
 ('Rwanda', -1.0),
 ('Saudi Arabia', -1.0),
 ('Senegal', -1.0),
 ('Slovenia', -1.0),
 ('Somalia', -1.0),
 ('Sri Lanka', -1.0),
 ('Sweden', -1.0),
 ('Switzer

In [39]:
pd.Series([sdm[1] for sdm in sdm_geocorpora]).describe()

count    110.000000
mean      -0.906054
std        0.127746
min       -1.000000
25%       -1.000000
50%       -1.000000
75%       -0.860386
max       -0.431835
dtype: float64