In [1]:
%matplotlib inline
from ggplot import *
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
from datetime import datetime
import csv
from pandas import DataFrame
from pymongo import MongoClient, ASCENDING, DESCENDING
from bson.code import Code

client = MongoClient()
anycast = client.anycast_monitoring

root_list = 'acdfijklm'

# D-root should not be included here, since they use commercial providers (PCH and Maxgigapop) as their upstream.
# besides, they do not provide information related to their peering policy, which means that they implement the closed one
# F-root should be categorized as root with single upstream, since the penultimate AS is their location identifier
# C-root should be excluded since it is operated by commercial organization and does not implement open peering policy
# I-root uses upstream AS 8674 (Netnod) which is used to peer with I-root
root_with_single_upstream = 'cfi'

# to find shorter IPv4 path
where_query = """
function() {
    if(this.path4.length > this.path6.length ) {
        return true;
    } else {
        return false;
    }
}
"""

# Introduction

What causes shorter IPv4 (or in general, shorter IPv6)?
So far from what I see, there are several possible causes:
1. direct peering only for IPv6 traffic
2. (?)caused by the network, including the peer itself --> Root Server' ASN and its penultimate ASN are identical, but other AS along the path are different (hypothetical)
3. (?)caused by root server itself

For no.2 and 3, it is impossible to tell which one is the cause because the only information available is AS path. We need the policy information for each intermediate AS to tell about this

## 1. Direct peering

In [3]:
for root in root_list:
    coll = anycast['{}_root'.format(root)]
    total = coll.count()
    shorter_v6 = coll.find({'$where': where_query}).count()
    
    counter = 0
    for item in coll.find({'$where': where_query}):
        if root in root_with_single_upstream:  # it means that 
            if len(item['path6']) == 3:
                counter += 1
        else:
            if len(item['path6']) == 2:
                counter += 1
    
    print('\n{}-Root'.format(root))
    print('\tTotal data:\t{}'.format(total))
    print('\tshorter IPv6:\t{}'.format(shorter_v6))
    print('\tpercentage:\t{:.2f}%'.format(shorter_v6 / total * 100))
    print('\tshorter IPv6 due to direct peering: {} ({:.2f}%)'.format(counter, counter / shorter_v6 * 100))
        


a-Root
	Total data:	4236
	shorter IPv6:	1157
	percentage:	27.31%
	shorter IPv6 due to direct peering: 98 (8.47%)

c-Root
	Total data:	1741
	shorter IPv6:	47
	percentage:	2.70%
	shorter IPv6 due to direct peering: 15 (31.91%)

d-Root
	Total data:	3593
	shorter IPv6:	376
	percentage:	10.46%
	shorter IPv6 due to direct peering: 0 (0.00%)

f-Root
	Total data:	2408
	shorter IPv6:	299
	percentage:	12.42%
	shorter IPv6 due to direct peering: 101 (33.78%)

i-Root
	Total data:	2667
	shorter IPv6:	472
	percentage:	17.70%
	shorter IPv6 due to direct peering: 74 (15.68%)

j-Root
	Total data:	4362
	shorter IPv6:	596
	percentage:	13.66%
	shorter IPv6 due to direct peering: 100 (16.78%)

k-Root
	Total data:	3576
	shorter IPv6:	174
	percentage:	4.87%
	shorter IPv6 due to direct peering: 137 (78.74%)

l-Root
	Total data:	4545
	shorter IPv6:	213
	percentage:	4.69%
	shorter IPv6 due to direct peering: 58 (27.23%)

m-Root
	Total data:	4003
	shorter IPv6:	1795
	percentage:	44.84%
	shorter IPv6 due to dire