*This notebook reads the probe API data from a `lastmile.db` sqlite3 database; fetches the holder names for each ASN from the RIPE stat API; the network type information for each probe from the peeringDB API and stores all such information in `ra_asn` table*
- - - 

In [2]:
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))

In [3]:
import sqlite3
import pandas as pd

In [1]:
DB_LOCATION = 'lastmile.db'
PROBE_API = 'ra_probe_api'
RA_ASN = 'ra_asn'

In [5]:
con = sqlite3.connect(DB_LOCATION)

In [6]:
query = '''SELECT distinct(asn_v4) as asn FROM %s
           WHERE asn_v4 <> ''
           
           UNION
           
           SELECT distinct(asn_v6) as asn FROM %s
           WHERE asn_v6 <> ''
           
        '''%(PROBE_API, PROBE_API)

In [7]:
df = pd.read_sql(query, con)
df = df.sort()

  from ipykernel import kernelapp as app


In [8]:
df.count()

asn    3795
dtype: int64

In [9]:
import time
timestamp = int(time.time())
df['timestamp'] = timestamp

In [10]:
df.head()

Unnamed: 0,asn,timestamp
0,2,1446038087
1,3,1446038087
2,4,1446038087
3,17,1446038087
4,25,1446038087


In [11]:
import requests
import sys
import numpy as np

In [12]:
def get_json_resource_from_absolute_uri(url, query_params):
    try: res = requests.get(url, params = query_params)
    except Exception as e: print(e, file=sys.stderr)
    else:
        try: res_json = res.json()
        except Exception as e: print(e, file=sys.stderr)
        else: 
            return res_json

In [13]:
def get_holder_from_asn(asn):
    if asn is None or asn is '': return None
    try: asn=int(asn)
    except ValueError as e: return None    
    base_uri = 'https://stat.ripe.net'; url = '%s/data/as-overview/data.json'%base_uri
    params = {'resource' : asn}
    try: res = get_json_resource_from_absolute_uri(url, params)
    except Exception as e: print(e, file=sys.stderr)
    try: holder = res['data']['holder']
    except Exception as e: print('%s, %s'%(asn,e), file=sys.stderr, end=''); holder=None
    else: print('.', end='')
    return holder

In [14]:
df['asn_holder_name'] = df['asn'].apply(get_holder_from_asn)

........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

In [15]:
df.head()

Unnamed: 0,asn,timestamp,asn_holder_name
0,2,1446038087,"UDEL-DCN - University of Delaware,US"
1,3,1446038087,MIT-GATEWAYS - Massachusetts Institute of Tech...
2,4,1446038087,"ISI-AS - University of Southern California,US"
3,17,1446038087,"PURDUE - Purdue University,US"
4,25,1446038087,"UCB - University of California at Berkeley,US"


In [16]:
df.count()

asn                3795
timestamp          3795
asn_holder_name    3795
dtype: int64

In [17]:
import pymysql

In [18]:
d = {}
con = pymysql.connect (   host='peeringdb.com'
                        , user='peeringdb'
                        , passwd='peeringdb'
                        , db='Peering'
                       )
cur = con.cursor()    
query = 'select asn, name, info_type from peerParticipants where asn is not null;'
num_records = cur.execute(query)
for index, (asn, name, info_type) in enumerate(cur.fetchall()): 
    d[asn] = (name, info_type)

In [19]:
def get_network_type_from_asn(asn):
    if asn == None or asn == 'None': return None
    try: name, info_type = d[int(asn)]
    except Exception as e: return None
    else: return info_type

In [20]:
try: df['network_type'] = df['asn'].apply(get_network_type_from_asn)
except Exception as e: None

In [21]:
df.count()

asn                3795
timestamp          3795
asn_holder_name    3795
network_type       1537
dtype: int64

In [22]:
def rename_cable_dsl_isp_to_nsp(type_name):
    if type_name == "Cable/DSL/ISP": return "NSP"
    else: return type_name

In [23]:
try: df['network_type'] = df['network_type'].apply(rename_cable_dsl_isp_to_nsp)
except Exception as e: None

In [24]:
df['asn_access_type'] = None

In [25]:
df = df.set_index('asn')

In [26]:
df.head()

Unnamed: 0_level_0,timestamp,asn_holder_name,network_type,asn_access_type
asn,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,1446038087,"UDEL-DCN - University of Delaware,US",,
3,1446038087,MIT-GATEWAYS - Massachusetts Institute of Tech...,Educational/Research,
4,1446038087,"ISI-AS - University of Southern California,US",,
17,1446038087,"PURDUE - Purdue University,US",,
25,1446038087,"UCB - University of California at Berkeley,US",,


In [27]:
df.count()

timestamp          3795
asn_holder_name    3795
network_type       1537
asn_access_type       0
dtype: int64

In [28]:
import sqlite3
con = sqlite3.connect(DB_LOCATION)
cur = con.execute('pragma foreign_keys=ON')

In [29]:
df.to_sql(  '%s'%RA_ASN
          , con
          , flavor='sqlite'
          , if_exists = 'append'
          , index_label = 'asn'
         )

In [30]:
con.commit()
con.close()