*This notebook gets the raw atlas traceroute measurement data from `ra_traceroute_parameters` table; prunes the data to fetch (probeid, timestamp, h1_latencies, h2_latencies) then inserts entire dataframe in `ra_latencies` table.*
- - - 

In [126]:
import sqlite3
import pandas as pd
import requests
import ipaddress
import time

In [127]:
DB_LOCATION = 'lastmile.db'
RA_TRACEROUTE_PARAMETERS = 'ra_traceroute_parameters'
RA_LATENCIES = 'ra_latencies'

In [128]:
con = sqlite3.connect(DB_LOCATION)

In [129]:
query = '''SELECT    probeid
                   , starttime as timestamp
                   , GROUP_CONCAT(result_hop_no, ';') as hop_no
                   , GROUP_CONCAT(result_hop_blob, ';') as result_hop_blob
                   , '' as h1_latencies
                   , '' as h2_latencies
           FROM      %s
           WHERE     result_hop_no IN (1, 2)
           GROUP BY  probeid, starttime
        '''%(RA_TRACEROUTE_PARAMETERS)

df = pd.read_sql(query, con)

In [130]:
def return_all_rtt_field_values(json_string):
    import json
    try: json_data = json.loads(json_string)
    except Exception as e: print(e, type(e), json_string)
    try: 
        rtt_field_values = ', '.join([str(item['rtt']) for item in json_data])        
    except Exception as e: print('*', end=""); return None
    else: return rtt_field_values

In [131]:
def return_concat_from_field_values(concat_result_blob):
    try: a, b = concat_result_blob.split(';')
    except Exception as e: return '%s; %s'%(None,None)
    a = a.replace("'", '"')
    b = b.replace("'", '"')  
    a = return_all_rtt_field_values(a)
    b = return_all_rtt_field_values(b)
    concat_rtt_field_values = '%s; %s'%(a,b)
    return concat_rtt_field_values

In [132]:
df.head()

Unnamed: 0,probeid,timestamp,hop_no,result_hop_blob,h1_latencies,h2_latencies
0,10006,1406548034,1;2,"[{'size': 76, 'rtt': 0.559, 'ttl': 64, 'from':...",,
1,10006,1406562432,1;2,"[{'size': 76, 'rtt': 0.579, 'ttl': 64, 'from':...",,
2,10006,1406576835,1;2,"[{'size': 76, 'rtt': 0.58, 'ttl': 64, 'from': ...",,
3,10006,1406591230,1;2,"[{'size': 76, 'rtt': 0.769, 'ttl': 64, 'from':...",,
4,10006,1406605631,1;2,"[{'size': 76, 'rtt': 0.534, 'ttl': 64, 'from':...",,


In [133]:
def get_rtts(result_hop_blob):
    result_hop_blob = result_hop_blob.replace('True', '"True"')
    rtts = return_concat_from_field_values(result_hop_blob)
    return rtts    
df['rtts_h1_h2'] = df['result_hop_blob'].apply(get_rtts)

****************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************

In [134]:
def get_h1_rtt(rtts): h1, h2 = rtts.split(';'); return h1
def get_h2_rtt(rtts): h1, h2 = rtts.split(';'); return h2

df['h1_latencies'] = df['rtts_h1_h2'].apply(get_h1_rtt)
df['h2_latencies'] = df['rtts_h1_h2'].apply(get_h2_rtt)

In [135]:
del df['hop_no']
del df['result_hop_blob']
del df['rtts_h1_h2']

In [136]:
df.head()

Unnamed: 0,probeid,timestamp,h1_latencies,h2_latencies
0,10006,1406548034,"0.559, 0.429, 0.412","20.49, 27.571, 17.716"
1,10006,1406562432,"0.579, 0.431, 0.423","17.7, 17.281, 51.434"
2,10006,1406576835,"0.58, 0.438, 0.42","34.678, 17.776, 17.473"
3,10006,1406591230,"0.769, 0.453, 0.478","17.577, 18.21, 17.533"
4,10006,1406605631,"0.534, 0.422, 0.416","17.442, 37.561, 17.526"


In [137]:
df.count()

probeid         137017
timestamp       137017
h1_latencies    137017
h2_latencies    137017
dtype: int64

In [138]:
cur = con.execute('pragma foreign_keys=ON')

In [139]:
index_label = ['probeid', 'timestamp']
df = df.set_index(index_label)

In [140]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,h1_latencies,h2_latencies
probeid,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1
10006,1406548034,"0.559, 0.429, 0.412","20.49, 27.571, 17.716"
10006,1406562432,"0.579, 0.431, 0.423","17.7, 17.281, 51.434"
10006,1406576835,"0.58, 0.438, 0.42","34.678, 17.776, 17.473"
10006,1406591230,"0.769, 0.453, 0.478","17.577, 18.21, 17.533"
10006,1406605631,"0.534, 0.422, 0.416","17.442, 37.561, 17.526"


In [141]:
df.to_sql(  '%s'%RA_LATENCIES
          , con
          , flavor='sqlite'
          , if_exists = 'append'
          , index_label = index_label
         )

In [142]:
con.commit()
con.close()