*This notebook reads the `metadata_snapshot_extensions` data from a `lastmile.db` sqlite3 database; fetches the holder names for each ASN from the RIPE stat API; the network type information for each probe from the peeringDB API and stores all such information in `sk_asn` table*
- - - 

In [1]:
import sqlite3
import pandas as pd

In [2]:
DB_LOCATION = 'lastmile.db'
METADATA_SNAPSHOT_EXTENSIONS = 'metadata_snapshot_extensions'
SK_ASN = 'sk_asn'

In [29]:
con = sqlite3.connect(DB_LOCATION)

In [4]:
query = '''SELECT distinct(asn) as asn FROM %s
        '''%(METADATA_SNAPSHOT_EXTENSIONS)

In [5]:
df = pd.read_sql(query, con)
df = df.sort()

In [6]:
df.count()

asn    43
dtype: int64

In [7]:
import time
timestamp = int(time.time())
df['timestamp'] = timestamp

In [8]:
df.head()

Unnamed: 0,asn,timestamp
0,5089,1429899546
1,2856,1429899546
2,12390,1429899546
3,12576,1429899546
4,5607,1429899546


In [9]:
import requests
import sys
import numpy as np

In [10]:
def get_json_resource_from_absolute_uri(url, query_params):
    try: res = requests.get(url, params = query_params)
    except Exception as e: print(e, file=sys.stderr)
    else:
        try: res_json = res.json()
        except Exception as e: print(e, file=sys.stderr)
        else: 
            return res_json

In [11]:
def get_holder_from_asn(asn):
    if asn is None or asn is '': return None
    try: asn=int(asn)
    except ValueError as e: return None    
    base_uri = 'https://stat.ripe.net'; url = '%s/data/as-overview/data.json'%base_uri
    params = {'resource' : asn}
    try: res = get_json_resource_from_absolute_uri(url, params)
    except Exception as e: print(e, file=sys.stderr)
    try: holder = res['data']['holder']
    except Exception as e: print('%s, %s'%(asn,e), file=sys.stderr, end=''); holder=None
    else: print('.', end='')
    return holder

In [12]:
df['asn_holder_name'] = df['asn'].apply(get_holder_from_asn)

...........................................

In [13]:
df.head()

Unnamed: 0,asn,timestamp,asn_holder_name
0,5089,1429899546,"NTL Virgin Media Limited,GB"
1,2856,1429899546,"BT-UK-AS BT Public Internet Service,GB"
2,12390,1429899546,KINGSTON-UK-AS KCOM Group Public Limited Compa...
3,12576,1429899546,ORANGE-PCS Orange Personal Communications Serv...
4,5607,1429899546,BSKYB-BROADBAND-AS British Sky Broadcasting Li...


In [14]:
df.count()

asn                43
timestamp          43
asn_holder_name    43
dtype: int64

In [15]:
import pymysql

In [16]:
d = {}
con = pymysql.connect (   host='peeringdb.com'
                        , user='peeringdb'
                        , passwd='peeringdb'
                        , db='Peering'
                       )
cur = con.cursor()    
query = 'select asn, name, info_type from peerParticipants where asn is not null;'
num_records = cur.execute(query)
for index, (asn, name, info_type) in enumerate(cur.fetchall()): 
    d[asn] = (name, info_type)

In [17]:
def get_network_type_from_asn(asn):
    if asn == None or asn == 'None': return None
    try: name, info_type = d[int(asn)]
    except Exception as e: return None
    else: return info_type

In [18]:
try: df['network_type'] = df['asn'].apply(get_network_type_from_asn)
except Exception as e: None

In [19]:
df.count()

asn                43
timestamp          43
asn_holder_name    43
network_type       31
dtype: int64

In [20]:
def rename_cable_dsl_isp_to_nsp(type_name):
    if type_name == "Cable/DSL/ISP": return "NSP"
    else: return type_name

In [21]:
try: df['network_type'] = df['network_type'].apply(rename_cable_dsl_isp_to_nsp)
except Exception as e: None

In [22]:
df['asn_access_type'] = None

In [23]:
df = df.set_index('asn')

In [24]:
df.head()

Unnamed: 0_level_0,timestamp,asn_holder_name,network_type,asn_access_type
asn,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
5089,1429899546,"NTL Virgin Media Limited,GB",NSP,
2856,1429899546,"BT-UK-AS BT Public Internet Service,GB",NSP,
12390,1429899546,KINGSTON-UK-AS KCOM Group Public Limited Compa...,NSP,
12576,1429899546,ORANGE-PCS Orange Personal Communications Serv...,NSP,
5607,1429899546,BSKYB-BROADBAND-AS British Sky Broadcasting Li...,NSP,


In [25]:
df.count()

timestamp          43
asn_holder_name    43
network_type       31
asn_access_type     0
dtype: int64

In [26]:
import sqlite3
con = sqlite3.connect(DB_LOCATION)
cur = con.execute('pragma foreign_keys=ON')

In [30]:
df.to_sql(  '%s'%SK_ASN
          , con
          , flavor='sqlite'
          , if_exists = 'append'
          , index_label = 'asn'
         )

In [31]:
con.commit()
con.close()