In [51]:
import pandas as pd
import json
import numpy as np
from pandas.io.json import json_normalize
from scipy.spatial.distance import cosine
import csv
import ast

In [3]:
# loading the raw data
df = pd.read_csv('../../data/CDR/hash/sample.csv') 
df.columns = ['index','time','source','dest','call']

In [11]:
df.index = df.source

In [5]:

# loading the region-cell data
table = pd.read_csv('../../data/CDR/hash/intersect.csv', header = None) 
table.columns = ['region', 'proportions']
table.index = table.region
table.sort_values(['region'], inplace=True)

# loading the cell-proportion data
prop_table = pd.read_csv('../../data/CDR/hash/cell_intersect.csv', header = None) 
prop_table.columns = ['cell', 'proportions']
prop_table.index = prop_table.cell
prop_table.sort_values(['cell'], inplace=True)


In [44]:
prop_table.head()

Unnamed: 0_level_0,cell,proportions
cell,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1,{}
2,2,{}
3,3,{}
4,4,{}
5,5,{}


In [96]:


def get_cells_per_region(table, region_id):
    ids = table.iloc[region_id].proportions
    ids = ast.literal_eval(table.get_value(region_id, "proportions"))
    return ids.keys()

def get_call_data(source, dest):        
    source_dict = get_cells_per_region(table, source)
    dest_dict = get_cells_per_region(table, dest)

    subset = df[df.index.isin(source_dict)]
    subset.index = subset.dest
    subset = subset[subset.index.isin(dest_dict)]
            
    return subset

def calculate_actual_call(s_cell, d_cell, call, s_region, d_region):
    """
        Create another column on the subset DataFrame that is proportional to the regions.
    """
    source_prop = ast.literal_eval(prop_table.get_value(s_cell, "proportions"))
    dest_prop = ast.literal_eval(prop_table.get_value(d_cell, "proportions"))

    try:
        final = source_prop[str(s_region)] * dest_prop[str(d_region)] * call
    except:
        final = 0
    
    return final

In [98]:
region_network = pd.DataFrame(columns=['time','source_region','dest_region','adjusted_call'])

for s in range(52,81):
    for d in range(52,81):
        # get a subset of records for the source and dest
        subdf = get_call_data(s, d)        
        subdf["source_region"] = s
        subdf["dest_region"] = d
        print (s, d)
        # create a column with adjusted call values
        try:
            subdf["adjusted_call"] = np.vectorize(calculate_actual_call)(subdf["source"], subdf["dest"], subdf["call"], subdf["source_region"], subdf["dest_region"])
        except:
            print ("error in making a column...")
            continue
        
        # do aggregation for 
        subdf = subdf.groupby("time").agg({
                    "source_region": "first",
                    "dest_region": "first",               
                    "adjusted_call": "sum"
                })
        region_network = region_network.append(subdf)

        
region_network.time = region_network.index        
output_filename = '../../data/CDR/generated/region_network.csv'
region_network.to_csv(output_filename, encoding='utf-8', index=False)

52 52
52 53
52 54
52 55
52 56
52 57
52 58
52 59
52 60
52 61
52 62
52 63
52 64
52 65
52 66
52 67
52 68
52 69
52 70
52 71
52 72
52 73
52 74
52 75
52 76
52 77
52 78
52 79
52 80
53 52
53 53
53 54
53 55
53 56
error in making a column...
53 57
53 58
53 59
53 60
53 61
53 62
53 63
53 64
53 65
53 66
53 67
53 68
53 69
53 70
53 71
53 72
53 73
53 74
53 75
53 76
53 77
53 78
53 79
53 80
54 52
error in making a column...
54 53
error in making a column...
54 54
error in making a column...
54 55
error in making a column...
54 56
error in making a column...
54 57
error in making a column...
54 58
error in making a column...
54 59
error in making a column...
54 60
error in making a column...
54 61
error in making a column...
54 62
error in making a column...
54 63
error in making a column...
54 64
error in making a column...
54 65
error in making a column...
54 66
error in making a column...
54 67
error in making a column...
54 68
error in making a column...
54 69
error in making a column...
54 70
error 