In [3]:
%matplotlib inline
from ggplot import *
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
from datetime import datetime
import csv
from pandas import DataFrame
from pymongo import MongoClient, ASCENDING, DESCENDING
from bson.code import Code

client = MongoClient()
anycast = client.anycast_monitoring

root_list = 'acdfijklm'

# F-root should be categorized as root with single upstream, since the penultimate AS is their location identifier
# C-Root is run by commercial organization, and uses Cogent (the operator themselves) as the upstream. So this is similar case with I-Root
# I-root uses upstream AS 8674 (Netnod) which is used to peer with I-root
root_with_single_upstream = 'cfi'

# to find shorter IPv4 path
where_query = """
function() {
    if(this.path4.length < this.path6.length ) {
        return true;
    } else {
        return false;
    }
}
"""

# Introduction

What causes shorter IPv4 (or in general, shorter IPv6)?
So far from what I see, there are several possible causes:
1. direct peering only for IPv6 traffic
2. caused by the network, including the peer itself --> Root Server' ASN and its penultimate ASN are identical, but other AS along the path are different (hypothetical)
3. caused by root server itself

For no.2 and 3, it is impossible to tell which one is the cause because the only information available is AS path. We need the policy information for each intermediate AS to tell about this

## 1. Direct peering. How much is it?

In [4]:
for root in root_list:
    coll = anycast['{}_root'.format(root)]
    total = coll.count()
    shorter_v4 = coll.find({'$where': where_query}).count()
    
    counter = 0
    for item in coll.find({'$where': where_query}):
        if root in root_with_single_upstream:  # it means that 
            if len(item['path4']) == 3:
                counter += 1
        else:
            if len(item['path4']) == 2:
                counter += 1
    
    print('\n{}-Root'.format(root))
    print('\tTotal data:\t{}'.format(total))
    print('\tshorter IPv4:\t{}'.format(shorter_v4))
    print('\tpercentage:\t{:.2f}%'.format(shorter_v4 / total * 100))
    print('\tshorter IPv4 due to direct peering: {} ({:.2f}%)'.format(counter, counter / shorter_v4 * 100))
        


a-Root
	Total data:	4213
	shorter IPv4:	714
	percentage:	16.95%
	shorter IPv4 due to direct peering: 7 (0.98%)

c-Root
	Total data:	1741
	shorter IPv4:	147
	percentage:	8.44%
	shorter IPv4 due to direct peering: 57 (38.78%)

d-Root
	Total data:	3593
	shorter IPv4:	266
	percentage:	7.40%
	shorter IPv4 due to direct peering: 0 (0.00%)

f-Root
	Total data:	2408
	shorter IPv4:	315
	percentage:	13.08%
	shorter IPv4 due to direct peering: 169 (53.65%)

i-Root
	Total data:	2667
	shorter IPv4:	588
	percentage:	22.05%
	shorter IPv4 due to direct peering: 383 (65.14%)

j-Root
	Total data:	4362
	shorter IPv4:	1859
	percentage:	42.62%
	shorter IPv4 due to direct peering: 634 (34.10%)

k-Root
	Total data:	3576
	shorter IPv4:	484
	percentage:	13.53%
	shorter IPv4 due to direct peering: 336 (69.42%)

l-Root
	Total data:	4545
	shorter IPv4:	556
	percentage:	12.23%
	shorter IPv4 due to direct peering: 286 (51.44%)

m-Root
	Total data:	4003
	shorter IPv4:	238
	percentage:	5.95%
	shorter IPv4 due to dir

## 2. Root Server' ASN and its penultimate ASN are identical, but other AS along the path are different (hypothetical). How much is it?

In [25]:
j = anycast['c_root']

for item in j.find({'$where': where_query}):
    if item['path4'][-2:] == item['path6'][-2:]:
        print('{}:\t{} {}'.format(item['peer'], item['path4'], item['path6']))

52888:	[52888, 1916, 3356, 174, 2149] [52888, 1251, 20080, 2914, 174, 2149]
1916:	[1916, 3356, 174, 2149] [1916, 20080, 11537, 3549, 174, 2149]
12637:	[12637, 174, 2149] [12637, 3549, 174, 2149]
52888:	[52888, 1916, 3356, 174, 2149] [52888, 1251, 20080, 2914, 174, 2149]
1916:	[1916, 3356, 174, 2149] [1916, 20080, 11537, 3549, 174, 2149]
12637:	[12637, 174, 2149] [12637, 3549, 174, 2149]
15469:	[15469, 6762, 174, 2149] [15469, 13030, 2828, 174, 2149]
52888:	[52888, 1916, 3356, 174, 2149] [52888, 1251, 20080, 2914, 174, 2149]
1916:	[1916, 3356, 174, 2149] [1916, 20080, 11537, 3549, 174, 2149]
12637:	[12637, 174, 2149] [12637, 3549, 174, 2149]
15469:	[15469, 174, 2149] [15469, 13030, 2828, 174, 2149]
52888:	[52888, 1916, 3356, 174, 2149] [52888, 1251, 20080, 2914, 174, 2149]
1916:	[1916, 3356, 174, 2149] [1916, 20080, 11537, 3549, 174, 2149]
12637:	[12637, 174, 2149] [12637, 3549, 174, 2149]
28917:	[28917, 174, 2149] [28917, 3356, 174, 2149]
52888:	[52888, 1916, 3356, 174, 2149] [52888, 1

In [None]:
for root in root_list:
    coll = anycast['{}_root'.format(root)]
    total = coll.count()
    shorter_v4 = coll.find({'$where': where_query}).count()
    
    counter = 0
    for item in coll.find({'$where': where_query}):
        if root in root_with_single_upstream:  # it means that 
            if len(item['path4']) == 3:
                counter += 1
        else:
            if len(item['path4']) == 2:  # only contains Root Server's AS and the peer (direct peering)
                counter += 1
    
    print('\n{}-Root'.format(root))
    print('\tTotal data:\t{}'.format(total))
    print('\tshorter IPv4:\t{}'.format(shorter_v4))
    print('\tpercentage:\t{:.2f}%'.format(shorter_v4 / total * 100))
    print('\tshorter IPv4 due to direct peering: {} ({:.2f}%)'.format(counter, counter / shorter_v4 * 100))
        

## 3. For peers with discrepancy >1, why?

In [33]:
m = anycast['f_root']

for item in m.find({'$where': where_query}):
#     if len(item['path6']) - len(item['path4']) > 1 and item['path4'][-2:] == item['path6'][-2:]:
    if len(item['path6']) - len(item['path4']) > 1:
        print('{}:\t{} {}'.format(item['peer'], item['path4'], item['path6']))

6762:	[6762, 174, 1280, 3557] [6762, 6175, 2497, 3257, 27319, 3557]
12779:	[12779, 174, 1280, 3557] [12779, 6175, 2497, 3257, 27319, 3557]
34695:	[34695, 30134, 3557] [34695, 3549, 6939, 33071, 3557]
559:	[559, 30132] [559, 6939, 33071, 3557]
8447:	[8447, 30132] [8447, 6939, 33071, 3557]
559:	[559, 30132] [559, 6939, 33071, 3557]
22548:	[22548, 30122, 3557] [22548, 16735, 12956, 6762, 1280]
8447:	[8447, 30132] [8447, 6939, 33071, 3557]
8447:	[8447, 30132] [8447, 6939, 33071, 3557]
1930:	[1930, 30129, 3557] [1930, 20965, 3549, 6939, 33071, 3557]
22548:	[22548, 30122, 3557] [22548, 16735, 12956, 6762, 1280]
1916:	[1916, 22548, 30122, 3557] [1916, 20080, 23148, 6939, 33071, 3557]
1916:	[1916, 22548, 30122, 3557] [1916, 20080, 23148, 6939, 33071, 3557]
7575:	[7575, 23708, 3557] [7575, 24490, 24489, 9270, 7660, 2500, 24047, 3557]
7575:	[7575, 23708, 3557] [7575, 24490, 24489, 9270, 7660, 2500, 24047, 3557]
22548:	[22548, 30122, 3557] [22548, 3549, 6939, 1280, 3557]
1916:	[1916, 22548, 30122

## 4. Special case. 'intermediate' AS (e.g., 6939 for J-Root), how many of them?

In [45]:
for root in root_list:
    coll = anycast['{}_root'.format(root)]
    
    count_total = 0
    count = 0
    for item in coll.find({'$where': where_query}):
        if len(item['path6']) - len(item['path4']) == 1:
            count_total += 1
            # if there is 'intermediate' AS....
            if len(set(item['path6']).intersection(item['path4'])) == len(item['path6']) - 1:
                count += 1
#                 print('{}:\t{} {}'.format(item['peer'], item['path4'], item['path6']))

    print('{}-Root, total of peers with 1 different hop: {}\t\tfound: {} ({:.2f}%)'.format(root, count_total, count, count / count_total * 100))

a-Root, total of peers with 1 different hop: 662		found: 196 (29.61%)
c-Root, total of peers with 1 different hop: 84		found: 39 (46.43%)
d-Root, total of peers with 1 different hop: 239		found: 188 (78.66%)
f-Root, total of peers with 1 different hop: 243		found: 82 (33.74%)
i-Root, total of peers with 1 different hop: 469		found: 390 (83.16%)
j-Root, total of peers with 1 different hop: 1226		found: 149 (12.15%)
k-Root, total of peers with 1 different hop: 451		found: 333 (73.84%)
l-Root, total of peers with 1 different hop: 452		found: 286 (63.27%)
m-Root, total of peers with 1 different hop: 232		found: 123 (53.02%)


# M-Root

![alt text](img/shorter-ipv4-m.png)

**Notable feature:**
- Consistently 1 hop difference

In [2]:
m = anycast['m_root']

for item in m.find({'$where': where_query}):
    diff = len(item['path4']) - len(item['path6'])
    print('[{}] {}: ({})\t{} {}'.format(item['timestamp'], item['peer'], diff, item['path4'], item['path6']))

[1204329600] 286: (-1)	[286, 7500] [286, 3257, 7500]
[1204329600] 34225: (-1)	[34225, 1299, 7500] [34225, 41692, 3257, 7500]
[1204329600] 8447: (-1)	[8447, 1299, 7500] [8447, 6175, 3257, 7500]
[1209600000] 34225: (-1)	[34225, 1299, 7500] [34225, 41692, 3257, 7500]
[1251763200] 286: (-1)	[286, 7500] [286, 3257, 7500]
[1254355200] 286: (-1)	[286, 7500] [286, 3257, 7500]
[1257033600] 286: (-1)	[286, 7500] [286, 3257, 7500]
[1259625600] 286: (-1)	[286, 7500] [286, 3257, 7500]
[1262304000] 1103: (-1)	[1103, 3257, 7500] [1103, 20965, 2200, 7500]
[1262304000] 286: (-1)	[286, 7500] [286, 3257, 7500]
[1264982400] 513: (-2)	[513, 6730, 7500] [513, 559, 20965, 2200, 7500]
[1264982400] 196613: (-1)	[196613, 1125, 1103, 3257, 7500] [196613, 1125, 1103, 20965, 2200, 7500]
[1264982400] 286: (-1)	[286, 7500] [286, 3257, 7500]
[1267401600] 513: (-2)	[513, 6730, 7500] [513, 559, 20965, 2200, 7500]
[1267401600] 196613: (-1)	[196613, 1125, 1103, 3257, 7500] [196613, 1125, 1103, 20965, 2200, 7500]
[1267401

It seems that mostly the difference occurs due to direct peering session between M-Root and the peer. 

Now, try to quantify this

In [18]:
# get total peer data for M-root
print('General introduction...\n')
total = m.count()
shorter_v4 = m.find({'$where': where_query}).count()
print('Total data:\t{}'.format(total))

print('shorter IPv4:\t{}'.format(shorter_v4))
print('percentage:\t{:.2f}%'.format(shorter_v4 / total * 100))

# find out how many peer with shorter IPv4, that it happens due to direct peering
print('\nFind out how many peer with shorter IPv4, that it happens due to direct peering\n')

counter = 0
for item in m.find({'$where': where_query}):
    if len(item['path4']) == 2:
        counter += 1
print('total peer with shorter IPv4 due to direct peering: {} ({:.2f}%)'.format(counter, counter / shorter_v4 * 100))
print('')

General introduction...

Total data:	4003
shorter IPv4:	238
percentage:	5.95%

Find out how many peer with shorter IPv4, that it happens due to direct peering

total peer with shorter IPv4 due to direct peering: 113 (47.48%)

