## Tower Data
The table below highlights lat/lon entries in the cellular data that may be erroneous.
Namely overly large lat and lon values that seem to have their decimals in the wrong place.
These have been cleaned up in the following way:
* Lat values in the -150_000 range have been trasnformed into -1.5... by shifting the decimal
* Lon values in the 3_000_000 range have been turned into 30... by shifting the decimal

In [2]:
import pandas as pd
from giga.schemas.cellular import CellTowerTable

towers = CellTowerTable.from_csv('parameter_workspace/IHS-RWD.csv')
dft = pd.DataFrame(list(map(lambda x: dict(x), towers.towers)))
dft

Unnamed: 0,tower_id,operator,outdoor,lat,lon,height,technologies
0,IHS_STH_069M,IHS,True,-2.61522,29.46072,36.0,"[2G, 3G, 4G]"
1,IHS_STH_042M,IHS,False,-2.59830,29.74199,6.0,"[2G, 3G]"
2,IHS_STH_025M,IHS,False,-2.48961,29.77220,36.0,"[2G, 3G, 4G]"
3,IHS_STH_036M,IHS,True,-2.48449,29.85669,36.0,"[2G, 3G]"
4,IHS_STH_092M,IHS,True,-2.78618,29.67667,54.0,"[2G, 3G, 4G]"
...,...,...,...,...,...,...,...
1202,IHS_KGL_385M,IHS,True,-1.92117,30.05715,18.0,"[2G, 3G]"
1203,IHS_KGL_395M,IHS,True,-1.94995,30.08169,30.0,"[2G, 3G]"
1204,IHS_KGL_478M,IHS,True,-1.91317,30.07819,7.0,"[2G, 3G]"
1205,IHS_KGL_480M,IHS,True,-1.93399,30.16299,30.0,"[2G, 3G]"


In [2]:
dft.query('lat < -3')

Unnamed: 0,tower_id,operator,outdoor,lat,lon,height,technologies
853,IHS_NTH_114M,IHS,True,-152506.0,30.09266,36.0,"[2G, 3G]"
854,IHS_NTH_115M,IHS,True,-149783.0,30.0012,36.0,"[2G, 3G]"
898,IHS_NTH_125M,IHS,True,-139734.0,3007836.0,36.0,"[2G, 3G]"


In [3]:
dft.query('lon > 35')

Unnamed: 0,tower_id,operator,outdoor,lat,lon,height,technologies
898,IHS_NTH_125M,IHS,True,-139734.0,3007836.0,36.0,"[2G, 3G]"
916,IHS_STH_155M,IHS,True,-2.28374,29657623.0,36.0,"[2G, 3G]"


## Cleaned Up Data
Cleaned up data and plots below.

In [4]:
import pandas as pd
from giga.schemas.cellular import CellTowerTable

towers = CellTowerTable.from_csv('parameter_workspace/IHS-RWD-clean.csv')
dft = pd.DataFrame(list(map(lambda x: dict(x), towers.towers)))
dft

Unnamed: 0,tower_id,operator,outdoor,lat,lon,height,technologies
0,IHS_STH_069M,IHS,True,-2.61522,29.46072,36.0,"[2G, 3G, 4G]"
1,IHS_STH_042M,IHS,False,-2.59830,29.74199,6.0,"[2G, 3G]"
2,IHS_STH_025M,IHS,False,-2.48961,29.77220,36.0,"[2G, 3G, 4G]"
3,IHS_STH_036M,IHS,True,-2.48449,29.85669,36.0,"[2G, 3G]"
4,IHS_STH_092M,IHS,True,-2.78618,29.67667,54.0,"[2G, 3G, 4G]"
...,...,...,...,...,...,...,...
1202,IHS_KGL_385M,IHS,True,-1.92117,30.05715,18.0,"[2G, 3G]"
1203,IHS_KGL_395M,IHS,True,-1.94995,30.08169,30.0,"[2G, 3G]"
1204,IHS_KGL_478M,IHS,True,-1.91317,30.07819,7.0,"[2G, 3G]"
1205,IHS_KGL_480M,IHS,True,-1.93399,30.16299,30.0,"[2G, 3G]"


In [None]:
from giga.schemas.school import GigaSchoolTable

schools = GigaSchoolTable.from_csv('sample_workspace/rwanda/schools.csv')
pd.DataFrame(list(map(lambda x: dict(x), schools.schools)))

In [None]:
from giga.models.nodes.graph.pairwise_distance_model import PairwiseDistanceModel

tower_coords = towers.to_coordinates()
school_coords = schools.to_coordinates()

m = PairwiseDistanceModel()
distances = m.run((school_coords, tower_coords))

In [None]:
closest = {}
for d in distances:
    sid = d.coordinate1.coordinate_id
    if sid not in closest:
        closest[sid] = [d.distance]
    else:
        closest[sid].append(d.distance)

In [None]:
cl = [min(v) / 1000.0 for v in closest.values()]

In [None]:
import matplotlib.pyplot as plt

plt.hist(cl, bins=20)
plt.grid()
plt.xlabel('Distance to Closest Cell Tower (km)')
plt.ylabel('School Counts')
plt.show()

In [None]:
from giga.viz.notebooks.fiber import plot_data_map, default_rwanda_map

plot_data_map([], tower_coords, school_coords, m=default_rwanda_map())