In [1]:
import pandas as pd
import sqlite3
import pycountry_convert as pc

In [2]:
# read msm from all public resolvers + metadata
with sqlite3.connect('../data/main.db') as conn:
    df = pd.read_sql('select * \
                      from results \
                      where msm_id in (select distinct msm_id \
                                       from measurements \
                                       where resolver != "")', con=conn)

    resolvers = pd.read_sql('select * \
                             from resolvers', con=conn)

    probes = pd.read_sql('select * \
                             from probes', con=conn)

In [3]:
df.head()

Unnamed: 0,msm_id,domain,resolver,prb_id,rt,size,src_addr,dst_addr,timestamp,is_tls,ttl,return_code,err,err_msg
0,22202833,google.com,185.228.168.168,10006,24.121,44.0,192.168.0.106,185.228.168.168,1562122818,0,66.0,NOERROR,0,
1,22202833,google.com,185.228.168.168,10007,84.187,44.0,192.168.7.24,185.228.168.168,1562122829,0,275.0,NOERROR,0,
2,22202833,google.com,185.228.168.168,10019,47.384,124.0,192.168.31.41,185.228.168.168,1562122816,0,252.0,NOERROR,0,
3,22202833,google.com,185.228.168.168,10023,16.192,44.0,172.31.253.253,185.228.168.168,1562122821,0,290.0,NOERROR,0,
4,22202833,google.com,185.228.168.168,10031,16.253,44.0,100.64.1.223,185.228.168.168,1562122818,0,293.0,NOERROR,0,


In [4]:
resolvers.head()

Unnamed: 0,id,name,ipv4,ipv4_2,tls
0,1,CleanBrowsing,185.228.168.168,185.228.169.168,True
1,2,Cloudflare 1.1.1.1,1.1.1.1,1.0.0.1,True
2,3,Comodo Secure DNS,8.26.56.26,8.20.247.20,False
3,4,CZ.NIC ODVR,217.31.204.130,193.29.206.206,False
4,5,Oracle + Dyn,216.146.35.35,216.146.36.36,False


In [5]:
# re-arranging resolvers for merging
pub_resolvers = pd.concat([resolvers[['ipv4', 'name']].rename(columns={'name' : 'resolver_name',
                                                                      'ipv4' : 'resolver'}
                                                            ),
                          resolvers[['ipv4_2', 'name']].rename(columns={'name' : 'resolver_name',
                                                                        'ipv4_2' : 'resolver'}
                                                            )
                          ])

In [6]:
pub_resolvers

Unnamed: 0,resolver,resolver_name
0,185.228.168.168,CleanBrowsing
1,1.1.1.1,Cloudflare 1.1.1.1
2,8.26.56.26,Comodo Secure DNS
3,217.31.204.130,CZ.NIC ODVR
4,216.146.35.35,Oracle + Dyn
5,84.200.69.80,DNS.WATCH
6,8.8.8.8,Google Public DNS
7,156.154.70.1,Neustar UltraRecursive
8,208.67.222.222,OpenDNS
9,185.121.177.177,OpenNIC


In [7]:
probes.head()

Unnamed: 0,id,probe_id,country_code
0,1,10003,NL
1,2,10006,GB
2,3,10007,US
3,4,10019,FR
4,5,10023,DE


### filtering and pre-processing probes for merging

In [8]:
probes[probes['country_code'] == '']

Unnamed: 0,id,probe_id,country_code
5226,5227,35735,


In [9]:
probes = probes[probes['country_code'] != '']

In [10]:
probes = probes[['probe_id', 'country_code']]

In [11]:
probes['continent_code'] = probes['country_code'].map(pc.country_alpha2_to_continent_code)

In [12]:
probes = probes.rename(columns={'probe_id' : 'prb_id'})

In [13]:
probes.head()

Unnamed: 0,prb_id,country_code,continent_code
0,10003,NL,EU
1,10006,GB,EU
2,10007,US,
3,10019,FR,EU
4,10023,DE,EU


In [14]:
df = df.merge(probes, on='prb_id', how='left')

In [15]:
df = df.merge(pub_resolvers, on='resolver', how='left')

In [16]:
df.head()

Unnamed: 0,msm_id,domain,resolver,prb_id,rt,size,src_addr,dst_addr,timestamp,is_tls,ttl,return_code,err,err_msg,country_code,continent_code,resolver_name
0,22202833,google.com,185.228.168.168,10006,24.121,44.0,192.168.0.106,185.228.168.168,1562122818,0,66.0,NOERROR,0,,GB,EU,CleanBrowsing
1,22202833,google.com,185.228.168.168,10007,84.187,44.0,192.168.7.24,185.228.168.168,1562122829,0,275.0,NOERROR,0,,US,,CleanBrowsing
2,22202833,google.com,185.228.168.168,10019,47.384,124.0,192.168.31.41,185.228.168.168,1562122816,0,252.0,NOERROR,0,,FR,EU,CleanBrowsing
3,22202833,google.com,185.228.168.168,10023,16.192,44.0,172.31.253.253,185.228.168.168,1562122821,0,290.0,NOERROR,0,,DE,EU,CleanBrowsing
4,22202833,google.com,185.228.168.168,10031,16.253,44.0,100.64.1.223,185.228.168.168,1562122818,0,293.0,NOERROR,0,,DE,EU,CleanBrowsing


In [17]:
df.columns

Index(['msm_id', 'domain', 'resolver', 'prb_id', 'rt', 'size', 'src_addr',
       'dst_addr', 'timestamp', 'is_tls', 'ttl', 'return_code', 'err',
       'err_msg', 'country_code', 'continent_code', 'resolver_name'],
      dtype='object')

In [18]:
len(df.columns)

17

In [19]:
# re-arranging for saving
cols = ['msm_id', 'timestamp', 'domain',
        'prb_id', 'country_code', 'continent_code',
        'src_addr',
        'dst_addr', 'resolver', 'resolver_name',
        'is_tls',
        'err', 'err_msg', 
        'size', 'return_code', 'rt', 'ttl']

In [20]:
len(cols)

17

In [21]:
df = df[cols]

In [22]:
df.head()

Unnamed: 0,msm_id,timestamp,domain,prb_id,country_code,continent_code,src_addr,dst_addr,resolver,resolver_name,is_tls,err,err_msg,size,return_code,rt,ttl
0,22202833,1562122818,google.com,10006,GB,EU,192.168.0.106,185.228.168.168,185.228.168.168,CleanBrowsing,0,0,,44.0,NOERROR,24.121,66.0
1,22202833,1562122829,google.com,10007,US,,192.168.7.24,185.228.168.168,185.228.168.168,CleanBrowsing,0,0,,44.0,NOERROR,84.187,275.0
2,22202833,1562122816,google.com,10019,FR,EU,192.168.31.41,185.228.168.168,185.228.168.168,CleanBrowsing,0,0,,124.0,NOERROR,47.384,252.0
3,22202833,1562122821,google.com,10023,DE,EU,172.31.253.253,185.228.168.168,185.228.168.168,CleanBrowsing,0,0,,44.0,NOERROR,16.192,290.0
4,22202833,1562122818,google.com,10031,DE,EU,100.64.1.223,185.228.168.168,185.228.168.168,CleanBrowsing,0,0,,44.0,NOERROR,16.253,293.0


In [23]:
with sqlite3.connect('../data/pub-res.db') as conn:
    df.to_sql('public_resolvers', index=False, if_exists='replace', con=conn)

In [24]:
df

Unnamed: 0,msm_id,timestamp,domain,prb_id,country_code,continent_code,src_addr,dst_addr,resolver,resolver_name,is_tls,err,err_msg,size,return_code,rt,ttl
0,22202833,1562122818,google.com,10006,GB,EU,192.168.0.106,185.228.168.168,185.228.168.168,CleanBrowsing,0,0,,44.0,NOERROR,24.121,66.0
1,22202833,1562122829,google.com,10007,US,,192.168.7.24,185.228.168.168,185.228.168.168,CleanBrowsing,0,0,,44.0,NOERROR,84.187,275.0
2,22202833,1562122816,google.com,10019,FR,EU,192.168.31.41,185.228.168.168,185.228.168.168,CleanBrowsing,0,0,,124.0,NOERROR,47.384,252.0
3,22202833,1562122821,google.com,10023,DE,EU,172.31.253.253,185.228.168.168,185.228.168.168,CleanBrowsing,0,0,,44.0,NOERROR,16.192,290.0
4,22202833,1562122818,google.com,10031,DE,EU,100.64.1.223,185.228.168.168,185.228.168.168,CleanBrowsing,0,0,,44.0,NOERROR,16.253,293.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136978202,22299012,1563191807,sberbank.ru,34979,IE,EU,172.17.21.247,91.239.100.100,91.239.100.100,UncensoredDNS,1,0,,257.0,NOERROR,494.714,
136978203,22299012,1563192076,sberbank.ru,11223,GB,EU,192.168.0.197,91.239.100.100,91.239.100.100,UncensoredDNS,1,0,,257.0,NOERROR,414.241,
136978204,22299012,1563192398,sberbank.ru,33346,CN,AS,192.168.1.236,91.239.100.100,91.239.100.100,UncensoredDNS,1,0,,257.0,NOERROR,929.706,
136978205,22299012,1563193186,sberbank.ru,34961,DE,EU,192.168.188.30,91.239.100.100,91.239.100.100,UncensoredDNS,1,0,,257.0,NOERROR,201.561,


In [25]:
home_probes_only = pd.read_csv('../metadata/home-probes-merged.csv')

In [26]:
home_probes_only

Unnamed: 0,prb_id
0,10006
1,10007
2,10080
3,10087
4,10092
...,...
3226,35719
3227,35723
3228,35724
3229,35735


In [27]:
faulty_probes = pd.read_csv('../metadata/faulty-probes.csv')

In [28]:
faulty_probes

Unnamed: 0,prb_id
0,10048
1,10058
2,11608
3,11743
4,12087
5,12321
6,12880
7,15355
8,15762
9,16759


In [29]:
# filter by home probes and faulty probes
df = df[df['prb_id'].isin(home_probes_only['prb_id'].unique())]
df = df[~(df['prb_id'].isin(faulty_probes['prb_id'].unique()))]

1) group by resolvers  
--> 5th percentile for each probe  
--> group probes by continent: heatmap

2) failure rate/availability analysis  
--> count number of requests in total, count number of failures, percentage

In [30]:
len(df)

84887798

In [101]:
len(df['prb_id'].unique())

3151

# Do53 vs DoT response times (5th percentiles vs medians)

In [31]:
do53 = df[df['is_tls'] == 0]

In [32]:
dot = df[df['is_tls'] == 1]

In [33]:
do53_5pct = do53[do53['err'] == 0].groupby(['prb_id', 'country_code', 'continent_code', 'resolver_name']
                                          )['rt'].quantile(0.05).reset_index()

In [34]:
dot_5pct = dot[dot['err'] == 0].groupby(['prb_id', 'country_code', 'continent_code', 'resolver_name']
                                       )['rt'].quantile(0.05).reset_index()

In [35]:
import numpy as np

In [36]:
pd.crosstab(do53_5pct['continent_code'], do53_5pct['resolver_name'], values=do53_5pct['rt'], aggfunc=np.median).round(3)

resolver_name,CZ.NIC ODVR,CleanBrowsing,Cloudflare 1.1.1.1,Comodo Secure DNS,DNS.WATCH,Google Public DNS,Neustar UltraRecursive,OpenDNS,OpenNIC,Oracle + Dyn,Quad9,SafeDNS,UncensoredDNS,VeriSign Public DNS,Yandex.DNS
continent_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
AF,176.236,162.91,23.424,148.193,160.66,24.655,2.834,46.058,146.363,176.013,7.697,54.791,174.243,152.953,182.147
AS,174.117,22.839,10.27,27.661,174.785,13.894,21.35,17.783,41.072,23.845,35.896,65.225,200.946,95.231,203.302
EU,30.415,22.133,10.234,23.654,21.402,11.965,1.522,16.283,19.979,28.177,18.058,23.46,34.187,23.529,43.409
,139.972,24.797,11.675,18.512,123.583,12.758,1.525,13.244,22.635,29.253,27.726,20.412,140.084,24.714,146.882
OC,314.858,20.155,11.41,26.659,309.804,27.014,17.047,27.597,19.056,32.696,23.747,26.301,325.253,171.814,339.206
SA,246.965,71.002,14.885,129.085,218.701,18.392,1.169,50.609,130.05,131.808,134.04,133.284,235.217,145.783,248.534


In [37]:
pd.crosstab(dot_5pct['continent_code'], dot_5pct['resolver_name'], values=dot_5pct['rt'], aggfunc=np.median).round(3)

resolver_name,CleanBrowsing,Cloudflare 1.1.1.1,Google Public DNS,Quad9,UncensoredDNS
continent_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AF,1171.424,147.615,315.091,114.299,1057.639
AS,240.62,128.102,167.358,294.983,1266.293
EU,220.229,128.456,122.924,161.317,679.282
,244.005,136.307,133.937,201.38,1596.602
OC,175.358,131.633,266.292,177.029,1561.195
SA,367.315,146.344,160.625,622.634,1135.852


In [38]:
with sqlite3.connect('../data/pub-res.db') as conn:
    do53_5pct.to_sql('do53_5pct', index=False, if_exists='replace', con=conn)
    dot_5pct.to_sql('dot_5pct', index=False, if_exists='replace', con=conn)

In [39]:
do53_median = do53[do53['err'] == 0].groupby(['prb_id', 'country_code', 'continent_code', 'resolver_name']
                                            )['rt'].median().reset_index()

In [40]:
dot_median = dot[dot['err'] == 0].groupby(['prb_id', 'country_code', 'continent_code', 'resolver_name']
                                         )['rt'].median().reset_index()

In [41]:
pd.crosstab(do53_median['continent_code'], do53_median['resolver_name'],
            values=do53_median['rt'], aggfunc=np.median).round(3)

resolver_name,CZ.NIC ODVR,CleanBrowsing,Cloudflare 1.1.1.1,Comodo Secure DNS,DNS.WATCH,Google Public DNS,Neustar UltraRecursive,OpenDNS,OpenNIC,Oracle + Dyn,Quad9,SafeDNS,UncensoredDNS,VeriSign Public DNS,Yandex.DNS
continent_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
AF,181.157,186.135,25.455,151.214,171.206,173.49,3.073,47.962,148.599,182.393,31.761,58.511,182.553,160.104,188.166
AS,186.582,35.758,12.798,37.75,185.95,24.134,25.148,23.082,72.086,27.318,42.96,84.182,215.791,103.9,212.188
EU,34.561,25.43,11.665,25.523,23.167,17.027,1.666,17.814,22.991,30.61,20.408,25.171,37.682,25.796,45.78
,142.24,27.91,13.166,20.116,129.29,20.712,1.637,15.175,29.092,31.816,29.21,22.272,146.993,27.18,150.692
OC,321.573,21.215,13.003,28.756,315.805,28.597,17.246,29.115,20.45,34.086,26.175,27.948,336.21,174.706,343.544
SA,254.333,76.978,17.1,133.173,228.507,33.654,1.614,65.287,134.642,140.89,140.53,139.63,241.418,162.634,254.414


In [42]:
pd.crosstab(dot_median['continent_code'], dot_median['resolver_name'],
            values=dot_median['rt'], aggfunc=np.median).round(3)

resolver_name,CleanBrowsing,Cloudflare 1.1.1.1,Google Public DNS,Quad9,UncensoredDNS
continent_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AF,1873.595,163.894,465.44,163.75,1470.474
AS,303.432,134.736,178.486,326.569,2137.064
EU,450.952,134.892,132.142,169.934,1670.944
,266.958,142.558,144.391,208.318,2946.969
OC,238.927,137.269,272.527,185.361,1949.118
SA,594.688,154.807,184.282,646.138,1301.647


In [43]:
with sqlite3.connect('../data/pub-res.db') as conn:
    do53_median.to_sql('do53_median', index=False, if_exists='replace', con=conn)
    dot_median.to_sql('dot_median', index=False, if_exists='replace', con=conn)

# Delta

### between do53 and dot (5th percentiles aggregated, see db)

In [90]:
do53_5pct.groupby('resolver_name')['rt'].median().sort_values(ascending=False)

resolver_name
Yandex.DNS                51.875550
UncensoredDNS             44.934200
CZ.NIC ODVR               41.185550
DNS.WATCH                 31.302000
Oracle + Dyn              29.556525
VeriSign Public DNS       26.199700
SafeDNS                   24.199800
Comodo Secure DNS         23.426150
CleanBrowsing             23.069050
OpenNIC                   21.913700
Quad9                     20.263675
OpenDNS                   16.035000
Google Public DNS         12.615675
Cloudflare 1.1.1.1        10.854850
Neustar UltraRecursive     2.399050
Name: rt, dtype: float64

In [91]:
dot_5pct.groupby('resolver_name')['rt'].median().sort_values(ascending=False)

resolver_name
UncensoredDNS         1059.014400
CleanBrowsing          226.995425
Quad9                  170.444400
Cloudflare 1.1.1.1     131.866875
Google Public DNS      129.263400
Name: rt, dtype: float64

In [44]:
do53_5pct

Unnamed: 0,prb_id,country_code,continent_code,resolver_name,rt
0,10006,GB,EU,CZ.NIC ODVR,50.22660
1,10006,GB,EU,CleanBrowsing,22.13310
2,10006,GB,EU,Cloudflare 1.1.1.1,19.16870
3,10006,GB,EU,Comodo Secure DNS,26.33950
4,10006,GB,EU,DNS.WATCH,31.75100
...,...,...,...,...,...
43795,35742,ZA,AF,Quad9,2.29200
43796,35742,ZA,AF,SafeDNS,12.11000
43797,35742,ZA,AF,UncensoredDNS,196.55605
43798,35742,ZA,AF,VeriSign Public DNS,190.77650


In [45]:
dot_5pct

Unnamed: 0,prb_id,country_code,continent_code,resolver_name,rt
0,10006,GB,EU,CleanBrowsing,297.26800
1,10006,GB,EU,Cloudflare 1.1.1.1,164.38140
2,10006,GB,EU,Google Public DNS,155.04670
3,10006,GB,EU,Quad9,177.32480
4,10006,GB,EU,UncensoredDNS,958.34600
...,...,...,...,...,...
14666,35724,KR,AS,UncensoredDNS,1343.76425
14667,35742,ZA,AF,CleanBrowsing,1258.35970
14668,35742,ZA,AF,Cloudflare 1.1.1.1,96.99155
14669,35742,ZA,AF,Google Public DNS,279.46590


In [46]:
delta_5pct = do53_5pct.merge(dot_5pct.drop(columns=['country_code', 'continent_code']),
                             how='outer', on=['prb_id', 'resolver_name'], suffixes=('_do53', '_dot'))

In [47]:
delta_5pct.dropna()['resolver_name'].unique()

array(['CleanBrowsing', 'Cloudflare 1.1.1.1', 'Google Public DNS',
       'Quad9', 'UncensoredDNS'], dtype=object)

In [48]:
delta_5pct = delta_5pct.dropna()

In [49]:
delta_5pct

Unnamed: 0,prb_id,country_code,continent_code,resolver_name,rt_do53,rt_dot
1,10006,GB,EU,CleanBrowsing,22.13310,297.26800
2,10006,GB,EU,Cloudflare 1.1.1.1,19.16870,164.38140
5,10006,GB,EU,Google Public DNS,21.56290,155.04670
9,10006,GB,EU,Quad9,22.00785,177.32480
11,10006,GB,EU,UncensoredDNS,42.83000,958.34600
...,...,...,...,...,...,...
43783,35724,KR,AS,UncensoredDNS,19.22300,1343.76425
43787,35742,ZA,AF,CleanBrowsing,180.02050,1258.35970
43788,35742,ZA,AF,Cloudflare 1.1.1.1,2.10350,96.99155
43791,35742,ZA,AF,Google Public DNS,12.23660,279.46590


In [50]:
delta_5pct['rt_delta'] = delta_5pct['rt_do53'] - delta_5pct['rt_dot'] 

In [51]:
delta_5pct

Unnamed: 0,prb_id,country_code,continent_code,resolver_name,rt_do53,rt_dot,rt_delta
1,10006,GB,EU,CleanBrowsing,22.13310,297.26800,-275.13490
2,10006,GB,EU,Cloudflare 1.1.1.1,19.16870,164.38140,-145.21270
5,10006,GB,EU,Google Public DNS,21.56290,155.04670,-133.48380
9,10006,GB,EU,Quad9,22.00785,177.32480,-155.31695
11,10006,GB,EU,UncensoredDNS,42.83000,958.34600,-915.51600
...,...,...,...,...,...,...,...
43783,35724,KR,AS,UncensoredDNS,19.22300,1343.76425,-1324.54125
43787,35742,ZA,AF,CleanBrowsing,180.02050,1258.35970,-1078.33920
43788,35742,ZA,AF,Cloudflare 1.1.1.1,2.10350,96.99155,-94.88805
43791,35742,ZA,AF,Google Public DNS,12.23660,279.46590,-267.22930


In [52]:
with sqlite3.connect('../data/pub-res.db') as conn:
    delta_5pct.to_sql('delta_5pct', index=False, if_exists='replace', con=conn)

# Failures

In [53]:
do53['err_msg'].value_counts()

{u'timeout': 5000}                                       4914786
{u'socket': u'connect failed Network is unreachable'}       1586
{u'senderror': u'AF Network is unreachable, AF_INET'}          1
Name: err_msg, dtype: int64

In [54]:
dot['err_msg'].value_counts()

{u'timeout': 5000}                                                  2885827
{u'TUCONNECT': u'Connection reset by peer'}                         1806596
{u'TUCONNECT': u'Connection refused'}                                 49895
{u'TUCONNECT': u'No route to host'}                                    7036
{u'TUCONNECT': u'Success'}                                             4097
{u'TUCONNECT': u'Network is unreachable'}                              3977
{u'TUCONNECT': u'error:1408F10B:lib(20):func(143):reason(267)'}        1142
{u'idmismatch': u'mismatch id from tcp fd 2'}                            28
{u'TUCONNECT': u'error:1407741A:lib(20):func(119):reason(1050)'}         17
{u'idmismatch': u'mismatch id from tcp fd 256'}                           9
{u'idmismatch': u'mismatch id from tcp fd 99'}                            6
{u'idmismatch': u'mismatch id from tcp fd 103'}                           6
{u'idmismatch': u'mismatch id from tcp fd 105'}                           5
{u'idmismatc

In [55]:
len(do53[do53['err'] != 0]) / len(do53)

0.07671200169477596

--> 7.7% FAILURE rate for Do53 in general (for all public resolvers)

In [56]:
len(dot[dot['err'] != 0]) / len(dot)

0.22879341217363777

--> 22.9% FAILURE rate for DoT in general (for all public resolvers)

In [57]:
do53_failure_counts = do53.groupby(['continent_code', 'resolver_name', 'err'], as_index=False).size()

In [58]:
dot_failure_counts = dot.groupby(['continent_code', 'resolver_name', 'err'], as_index=False).size()

In [59]:
do53_failure_counts = do53_failure_counts.rename(columns={'size' : 'num'})
dot_failure_counts = dot_failure_counts.rename(columns={'size' : 'num'})

In [60]:
do53_failure_counts.head()

Unnamed: 0,continent_code,resolver_name,err,num
0,AF,CZ.NIC ODVR,0,65102
1,AF,CZ.NIC ODVR,1,989
2,AF,CleanBrowsing,0,65931
3,AF,CleanBrowsing,1,210
4,AF,Cloudflare 1.1.1.1,0,61593


In [61]:
dot_failure_counts.head()

Unnamed: 0,continent_code,resolver_name,err,num
0,AF,CleanBrowsing,0,39417
1,AF,CleanBrowsing,1,17822
2,AF,Cloudflare 1.1.1.1,0,51540
3,AF,Cloudflare 1.1.1.1,1,5614
4,AF,Google Public DNS,0,54294


In [62]:
do53_failure_counts = do53_failure_counts.merge(
    do53_failure_counts.groupby(['continent_code', 'resolver_name'])['num'].sum().rename('total'),
    on = ['continent_code', 'resolver_name'],
    how = 'left')

In [63]:
dot_failure_counts = dot_failure_counts.merge(
    dot_failure_counts.groupby(['continent_code', 'resolver_name'])['num'].sum().rename('total'),
    on = ['continent_code', 'resolver_name'],
    how = 'left')

In [64]:
do53_failure_counts['percentage'] = do53_failure_counts['num'] / do53_failure_counts['total']
dot_failure_counts['percentage'] = dot_failure_counts['num'] / dot_failure_counts['total']

In [65]:
do53_failure_rates = do53_failure_counts[do53_failure_counts['err'] != 0]
do53_success_rates = do53_failure_counts[do53_failure_counts['err'] == 0]

dot_failure_rates = dot_failure_counts[dot_failure_counts['err'] != 0]
dot_success_rates = dot_failure_counts[dot_failure_counts['err'] == 0]

In [66]:
with sqlite3.connect('../data/pub-res.db') as conn:
    do53_failure_counts.to_sql('do53_failure_counts', index=False, if_exists='replace', con=conn)
    dot_failure_counts.to_sql('dot_failure_counts', index=False, if_exists='replace', con=conn)
    
    do53_failure_rates.to_sql('do53_failure_rates', index=False, if_exists='replace', con=conn)
    do53_success_rates.to_sql('do53_success_rates', index=False, if_exists='replace', con=conn)

    dot_failure_rates.to_sql('dot_failure_rates', index=False, if_exists='replace', con=conn)
    dot_success_rates.to_sql('dot_success_rates', index=False, if_exists='replace', con=conn)
    

In [67]:
len(do53['prb_id'].unique())

3151

In [68]:
dot[(dot['resolver_name'] == 'UncensoredDNS') & (dot['err'] != 0)]['err_msg'].value_counts()

{u'timeout': 5000}                             2231766
{u'TUCONNECT': u'Connection reset by peer'}    1805560
{u'TUCONNECT': u'Success'}                        2344
{u'TUCONNECT': u'Network is unreachable'}          324
{u'TUCONNECT': u'No route to host'}                270
Name: err_msg, dtype: int64

In [69]:
do53[(do53['resolver_name'] == 'Neustar UltraRecursive') & (do53['err'] != 0)]['err_msg'].value_counts()

{u'timeout': 5000}                                       4191770
{u'socket': u'connect failed Network is unreachable'}        103
Name: err_msg, dtype: int64

In [70]:
len(do53['country_code'].unique())

127