*This notebook gets the one-off measurement data from `ra_one_off_parameters` table and filters the NATed and residential probe IDs, then inserts entire dataframe (probeid, timestamp, if_nat, if_residential, reverse_dns column) in `ra_one_off_extensions` table. Corresponding access_types derived from reverse_dns should be a separate notebook*
- - - 

In [2]:
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))

In [3]:
import sqlite3
import pandas as pd
import requests
import ipaddress
import time

In [4]:
DB_LOCATION = 'lastmile.db'
RA_PROBE_API = 'ra_probe_api'
RA_ASN = 'ra_asn'
RA_ONE_OFF_PARAMETERS = 'ra_one_off_parameters'
RA_ONE_OFF_EXTENSIONS = 'ra_one_off_extensions'

In [5]:
con = sqlite3.connect(DB_LOCATION)

In [6]:
timestamp = int(time.time())
query = '''SELECT    probeid
                   , %d as timestamp 
                   , GROUP_CONCAT(probe_ip) as probe_ip
                   , GROUP_CONCAT(result_hop_no, ';') as hop_no
                   , GROUP_CONCAT(result_hop_blob, ';') as result_hop_blob
                   , '' as hop1
                   , '' as hop2
                   , 0  as if_residential
                   , 0  as if_nat
           FROM      %s
           WHERE     result_hop_no == 1 OR result_hop_no == 2
           GROUP BY  probeid
        '''%(timestamp, RA_ONE_OFF_PARAMETERS)

df = pd.read_sql(query, con)

In [7]:
def return_all_from_field_values(json_string):
    import json
    try: json_data = json.loads(json_string)
    except Exception as e: print(e, type(e), json_string)
    try: from_field_values = ', '.join([item['from'] for item in json_data])
    except Exception as e: print('*', end=""); return None
    else: return from_field_values

In [8]:
def return_concat_from_field_values(concat_result_blob):
    try: a, b = concat_result_blob.split(';')
    except Exception as e: return '%s; %s'%(None,None)
    a = a.replace("'", '"')
    b = b.replace("'", '"')
    a = return_all_from_field_values(a)
    b = return_all_from_field_values(b)
    concat_from_field_values = '%s; %s'%(a,b)
    return concat_from_field_values

In [9]:
df.head()

Unnamed: 0,probeid,timestamp,probe_ip,hop_no,result_hop_blob,hop1,hop2,if_residential,if_nat
0,3,1446062329,"83.160.101.12,83.160.101.12",1;2,"[{'ttl': 64, 'size': 68, 'rtt': 1.942, 'from':...",,,0,0
1,4,1446062329,"83.163.50.165,83.163.50.165",1;2,"[{'ttl': 64, 'size': 68, 'rtt': 1.925, 'from':...",,,0,0
2,5,1446062329,"83.163.239.181,83.163.239.181",1;2,"[{'ttl': 64, 'size': 68, 'rtt': 3.094, 'from':...",,,0,0
3,7,1446062329,"84.28.33.3,84.28.33.3",1;2,"[{'ttl': 64, 'size': 68, 'rtt': 15.795, 'from'...",,,0,0
4,8,1446062329,"83.68.21.139,83.68.21.139",1;2,"[{'ttl': 64, 'size': 68, 'rtt': 16.722, 'from'...",,,0,0


In [10]:
for i, row in df.iterrows():
    result_hop_blob = df.ix[i, 'result_hop_blob']
    result_hop_blob = result_hop_blob.replace('True', '"True"')
    froms = return_concat_from_field_values(result_hop_blob)
    h1, h2 = froms.split(';')
    df.ix[i, 'hop1'] = h1
    df.ix[i, 'hop2'] = h2

****************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************

In [11]:
del df['hop_no']
del df['result_hop_blob']

In [12]:
def shrink_probe_ip(probe_ip_str):
    probe_ip_list = probe_ip_str.split(',');
    probe_ip_set = set(probe_ip_list)
    if len(probe_ip_set) == 1: return probe_ip_set.pop()
    else: return None
df['probe_ip'] = df['probe_ip'].apply(shrink_probe_ip)

In [13]:
df.count()

probeid           6819
timestamp         6819
probe_ip          6819
hop1              6819
hop2              6819
if_residential    6819
if_nat            6819
dtype: int64

In [14]:
def get_rdns_from_ip(ip):
    import socket
    try: rdns,_ = socket.getnameinfo( (str(ip), 0) ,0 )
    except Exception as e: print(e, type(e), ip)    
    else: print('.', end=''); return rdns 
df['reverse_dns'] = df['probe_ip'].apply(get_rdns_from_ip)

.................................................................................................................................................................................................................[Errno -3] Temporary failure in name resolution <class 'socket.gaierror'> 91.193.68.45
.................................................................[Errno -3] Temporary failure in name resolution <class 'socket.gaierror'> 194.190.158.21
..........................................................................................................[Errno -3] Temporary failure in name resolution <class 'socket.gaierror'> 38.66.26.182
................................................................................................................................................................................................................................................................................................................................[Errno -3] Temporary failure in name 

In [15]:
df.head()

Unnamed: 0,probeid,timestamp,probe_ip,hop1,hop2,if_residential,if_nat,reverse_dns
0,3,1446062329,83.160.101.12,"192.168.178.1, 192.168.178.1, 192.168.178.1","194.109.5.218, 194.109.5.218, 194.109.5.218",0,0,idante.xs4all.nl
1,4,1446062329,83.163.50.165,"192.168.178.1, 192.168.178.1, 192.168.178.1","194.109.5.218, 194.109.5.218, 194.109.5.218",0,0,soenda.xs4all.nl
2,5,1446062329,83.163.239.181,"192.168.178.1, 192.168.178.1, 192.168.178.1","194.109.5.218, 194.109.5.218, 194.109.5.218",0,0,a83-163-239-181.adsl.xs4all.nl
3,7,1446062329,84.28.33.3,"10.71.1.11, 10.71.1.11, 10.71.1.11","10.252.4.1, 10.252.4.1, 10.252.4.1",0,0,541C2103.cm-5-5a.dynamic.ziggo.nl
4,8,1446062329,83.68.21.139,"10.71.6.11, 10.71.6.11, 10.71.6.11","194.109.5.175, 194.109.5.175, 194.109.5.175",0,0,dfk.xs4all.nl


In [16]:
def check_if_nated(hop1):
    if hop1 is None or hop1 == 'None': return None
    if_nat = []
    queries = hop1.split(',')
    for query in queries:
        ip = ipaddress.ip_address(query.strip())
        if_nat.append(1 if ip.is_private else 0)
    return set(if_nat)

In [17]:
for index, row in df.iterrows():
    hop1 = row['hop1']
    if_nat = check_if_nated(hop1)    
    if if_nat is not None and len(if_nat) == 1: 
        value = if_nat.pop()
        df.ix[index, 'if_nat'] = value

In [18]:
def check_if_residential(hop1, hop2):
    if hop1 is None or hop1 == 'None': print('.', end=""); return None
    if hop2 is None or hop2 == 'None': print('-', end=""); return None    
    
    if_residential = []
    h1_queries = hop1.split(','); h2_queries = hop2.split(',')    
    if len(h1_queries) != len(h2_queries): print('^', end=""); return None
    
    num_queries = len(h1_queries)          # = len(h2_queries)
    for index in range(num_queries):
        h1_ip = ipaddress.ip_address(h1_queries[index].strip())
        h2_ip = ipaddress.ip_address(h2_queries[index].strip())            
        if_residential.append(1 if h1_ip.is_private and not h2_ip.is_private else 0)            

    return set(if_residential)

In [19]:
for index, row in df.iterrows():
    hop1 = row['hop1']; hop2 = row['hop2']
    if_residential = check_if_residential(hop1, hop2)
    if if_residential is not None and len(if_residential) == 1: 
        value = if_residential.pop()
        df.ix[index, 'if_residential'] = value

^^^.^^.^.^^^.^^^^^^^^..^.^^^^.^^^.^^^^.^^^.^^^.^.^..^^^^^^^^^^^^^^^^^^^^.^^^^^^..^^^^^^^^^^^^^.^^^^^^^^.^^^^..^^^.^^^^^.^^^^^^.^^^.^^.^^^^^^..^^^^^^.^^^^^^.^^^^^.^..^^^.^.^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^..^.^^.^.^^^.^^^.^^.^^^.^^^.^^.^^^.^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^.^.^^^^^^^^^^^^.^^.^^^^.^^..^.^^^.^^^^^^^^..^^^^^^^^^^^^^^^^^^^^^^^^^^.^^^^^^^^^^.^^^^^^^^^.^^^.^^^^.^^.^..^^^^.^^^^^^^^.^^^..^^^.^^^^^^^^^^^^^^^^^^^^^^^.^^^..^^^^^^..^..^^^^^^^^^^^^^^^^^^^^^^^.^.^^^^^^^^^^^^^^^^^^^^^^^^^^^^^.^^^^.^^^^^^^^^^^^^^^^^^^.^^^.^^^^.^^.^.^^^^^.^^^^^^.^^^^.^^^^.^^^^^.^^..^^^^^^^^^^^.^^^^^.^..^.^^^.^.^.^^^^.^.^.^^^^.^^.^^^^^.^^..^^.^^^.^^^^^.^^^^^^^^^^^^..^^^^^..^^^^.^^^^..^.^^^^^^^.^^^^^^^^^^...^^^^^^.^..^^^^..^^^^^^.^^^.^^^^^^^^..^^^..^^^^^^^...^^^^^..^^.^^^^^^^^^.^.^^^.^.^^.^^^^^^^^.^^^^^^..^^^.^^^^^^.^^^^^.^^^^^^^.^^.^^^^^^^^^^^^^^^^^^^^^^^^^^.^^^^^^^^^^.^^^^^.^^^.^^^^^.^^^^^^^^^^^^^^^^^^^^^^^^^^^^^.^^^^^^^..^.^^^^^.^^.^.

In [20]:
del df['hop1']
del df['hop2']

In [21]:
df.head()

Unnamed: 0,probeid,timestamp,probe_ip,if_residential,if_nat,reverse_dns
0,3,1446062329,83.160.101.12,1,1,idante.xs4all.nl
1,4,1446062329,83.163.50.165,1,1,soenda.xs4all.nl
2,5,1446062329,83.163.239.181,1,1,a83-163-239-181.adsl.xs4all.nl
3,7,1446062329,84.28.33.3,0,1,541C2103.cm-5-5a.dynamic.ziggo.nl
4,8,1446062329,83.68.21.139,1,1,dfk.xs4all.nl


In [22]:
df.count()

probeid           6819
timestamp         6819
probe_ip          6819
if_residential    6819
if_nat            6819
reverse_dns       6744
dtype: int64

In [23]:
df['if_residential'].sum()

3079

In [24]:
df['if_nat'].sum()

4789

In [25]:
cur = con.execute('pragma foreign_keys=ON')

In [26]:
df = df.set_index('probeid')

In [27]:
del df['probe_ip']

In [28]:
df.head()

Unnamed: 0_level_0,timestamp,if_residential,if_nat,reverse_dns
probeid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3,1446062329,1,1,idante.xs4all.nl
4,1446062329,1,1,soenda.xs4all.nl
5,1446062329,1,1,a83-163-239-181.adsl.xs4all.nl
7,1446062329,0,1,541C2103.cm-5-5a.dynamic.ziggo.nl
8,1446062329,1,1,dfk.xs4all.nl


In [29]:
df.to_sql(  '%s'%RA_ONE_OFF_EXTENSIONS
          , con
          , flavor='sqlite'
          , if_exists = 'append'
          , index_label = 'probeid'
         )

In [30]:
con.commit()
con.close()