In [30]:
import sys, os, importlib
import rasterio, pycountry
import reverse_geocode

import geopandas as gpd
import pandas as pd

from shapely.geometry import Point
from shapely import wkt

# Import GOST libraries; sys.path.append will be unnecessary if libraries are already installed
sys.path.append("../../../../gostrocks/src")
sys.path.append("../../../../GOST_Urban")

import GOSTRocks.rasterMisc as rMisc
from GOSTRocks.misc import tPrint
import src.UrbanRaster as urban
%matplotlib inline  

In [2]:
wbes_coords = "/home/wb411133/data/Projects/ICT_Distance/Data/Masked ES GPS April 15-2021.dta"
inD = pd.read_stata(wbes_coords)
print(inD.shape)
# filter data with broken coordinates
inD = inD.loc[(inD['lat_mask'] > -90) & (inD['lat_mask'] < 90) & (inD['lon_mask'] > -180) & (inD['lon_mask'] < 180)]
print(inD.shape)

(121335, 6)
(121330, 6)


In [3]:
geoms = [Point(row['lon_mask'], row['lat_mask']) for idx, row in inD.iterrows()]
inD = gpd.GeoDataFrame(inD, geometry=geoms, crs = 'epsg:4326')

In [4]:
global_bounds = "/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp"
inG = gpd.read_file(global_bounds)
inG = inG.to_crs('epsg:4326')

# Open and load infrastructure data

In [5]:
openCellID = "/home/public/Data/GLOBAL/INFRA/OPENCELLID/cell_towers_2020-04-15-T000000.csv"    
ixp_file = '/home/public/Data/GLOBAL/INFRA/IXPS/ixps_global_geocoded.csv'
colocation_file = '/home/public/Data/GLOBAL/INFRA/DATA_CENTERS/Data_Centers.geojson'
cell_coverage_folder = '/home/public/Data/GLOBAL/INFRA/GSMA/2019/MCE/Data_MCE/Global'
cell_files = ['MCE_Global2G_2020.tif', 'MCE_Global3G_2020.tif', 'MCE_Global4G_2020.tif']
submarine_cable_file = '/home/public/Data/GLOBAL/INFRA/SUBMARINE_CABLES/landing-point-geo.json'

In [6]:
inCell = pd.read_csv(openCellID)
inD_geom = [Point(x) for x in zip(inCell['lon'], inCell['lat'])]
inCell = gpd.GeoDataFrame(inCell, geometry = inD_geom, crs = {'init':'epsg:4326'})

  return _prepare_from_string(" ".join(pjargs))


In [7]:
cell_sindex = inCell.sindex

In [8]:
inCol = gpd.read_file(colocation_file)

In [31]:
inCables = gpd.read_file(submarine_cable_file)
def try_country(x):
    try:
        return(pycountry.countries.search_fuzzy(x.split(",")[-1])[0].alpha_3)
    except:
        return('')

inCables['ISO3'] = inCables['name'].apply(lambda x: try_country(x))

In [32]:
inCables

Unnamed: 0,slug,name,id,geometry,ISO3
0,aasiaat-greenland,"Aasiaat, Greenland",16718,POINT (-52.85912 68.70697),GRL
1,aberdeen-united-kingdom,"Aberdeen, United Kingdom",3537,POINT (-2.10681 57.15379),GBR
2,abidjan-cte-divoire,"Abidjan, Côte d'Ivoire",3316,POINT (-4.02624 5.32352),CIV
3,abidjan-cte-divoire,"Abidjan, Côte d'Ivoire",3316,POINT (-4.02625 5.32349),CIV
4,abu-dhabi-united-arab-emirates,"Abu Dhabi, United Arab Emirates",3858,POINT (54.41888 24.44379),ARE
5,abu-talat-egypt,"Abu Talat, Egypt",9485,POINT (29.70249 31.07185),EGY
6,accra-ghana,"Accra, Ghana",4181,POINT (-0.20091 5.55850),GHA
7,accra-ghana,"Accra, Ghana",4181,POINT (-0.20110 5.55828),GHA
8,achnaba-united-kingdom,"Achnaba, United Kingdom",15840,POINT (-5.36579 56.01942),GBR
9,aden-yemen,"Aden, Yemen",5969,POINT (45.03354 12.80068),YEM


In [10]:
inIXP = pd.read_csv(ixp_file, index_col=0)
inD_geom = [Point(x) for x in zip(inIXP['Lon'], inIXP['Lat'])]
inIXP = gpd.GeoDataFrame(inIXP, geometry = inD_geom, crs = {'init':'epsg:4326'})

In [11]:
gsma2g_R = rasterio.open(os.path.join(cell_coverage_folder, cell_files[0]))
gsma3g_R = rasterio.open(os.path.join(cell_coverage_folder, cell_files[1]))
gsma4g_R = rasterio.open(os.path.join(cell_coverage_folder, cell_files[2]))

# Calculate distances

In [None]:
inD['WBCode'].unique()

array(['YEM', 'BOL', 'ECU', 'PER', 'URY', 'ARG', 'CHL', 'COL', 'PRY',
       'VEN', 'PAN', 'NIC', 'HND', 'SLV', 'GTM', 'CRI', 'MEX', 'CAF',
       'IRQ', 'ZWE', 'RUS', 'RWA', 'ETH', 'CHN', 'BLR', 'NPL', 'PSE',
       'GEO', 'AZE', 'KGZ', 'KAZ', 'BGD', 'ARM', 'AFG', 'UGA', 'UKR',
       'UZB', 'MNG', 'MDA', 'TZA', 'KEN', 'BGR', 'SRB', 'ZMB', 'BIH',
       'ALB', 'XKX', 'HRV', 'MKD', 'SVN', 'MNE', 'DJI', 'LBN', 'ISR',
       'JOR', 'ROU', 'LVA', 'MMR', 'COD', 'TJK', 'EST', 'HUN', 'LTU',
       'POL', 'CZE', 'SVK', 'TUR', 'SWE', 'GHA', 'MDG', 'IND', 'TUN',
       'EGY', 'SEN', 'MRT', 'BDI', 'SSD', 'NAM', 'SDN', 'PAK', 'MAR',
       'MWI', 'NGA', 'BTN', 'IDN', 'VNM', 'SLB', 'PHL', 'KHM', 'TLS',
       'LAO', 'MYS', 'PNG', 'THA', 'LSO', 'BEN', 'MLI', 'CMR', 'GIN',
       'SWZ', 'CIV', 'TGO', 'DOM', 'NER', 'LBR', 'SLE', 'TCD', 'GMB',
       'MOZ', 'SUR', 'GRC', 'CYP', 'MLT', 'ITA', 'HTI', 'PRT', 'SOM',
       'LUX', 'BEL', 'ZAF'], dtype=object)

In [62]:
broken_countries = ['CHL','RUS','CHN','IDN']

epsg = 'epsg:6933'
out_folder =  "/home/wb411133/temp/ICT_distance"
if not os.path.exists(out_folder):
    os.makedirs(out_folder)
    
for cur_country in inD['WBCode'].unique():
    out_file = os.path.join(out_folder, "%s_ICT_distance.csv" % cur_country)
    tPrint(f"***** Processing {cur_country}")
    if not cur_country in broken_countries: # not os.path.exists(out_file) and 
        curB = inG.loc[inG['ISO3'] == cur_country]
        if os.path.exists(out_file):
            curD = pd.read_csv(out_file, index_col=0)
            curD_geom = curD['geometry'].apply(wkt.loads)
            distD = gpd.GeoDataFrame(curD, geometry=curD_geom, crs=epsg)
        else:
            curD = inD.loc[inD['WBCode'] == cur_country]
            distD = curD.to_crs(epsg)
        total_bound = curB.unary_union
        if curB.shape[0] > 0:
            curN = inG.loc[inG.intersects(curB.buffer(0.01).unary_union)]
            curN = curN.loc[curN['ISO3'] != cur_country]        
            if not 'col_dist' in distD.columns:
                selCol = inCol.loc[inCol['geometry'].apply(lambda x: x.intersects(total_bound))]
                if selCol.shape[0] > 0:
                    selCol = selCol.to_crs(epsg)
                    distD['col_dist'] = distD.distance(selCol.unary_union)
                else:
                    distD['col_dist'] = -1

            if not "ixp_dist" in distD.columns:
                selIXP = inIXP.loc[inIXP['geometry'].apply(lambda x: x.intersects(total_bound))]
                if selIXP.shape[0] > 0:
                    selIXP = selIXP.to_crs(epsg)
                    distD['ixp_dist'] = distD.distance(selIXP.unary_union)
                else:
                    distD['ixp_dist'] = -1

            if not 'cables_dist' in distD.columns:
                selCables = inCables.loc[inCables['ISO3'] == cur_country]
                if selCables.shape[0] > 0:
                    selCables = selCables.to_crs(epsg)
                    distD['cables_dist'] = distD.distance(selCables.unary_union)
                else:
                    distD['cables_dist'] = -1

            # Calculate distance to nearest neighbouring country
            if not "ngh1_dist" in distD.columns:
                cnt = 1
                for idx, row in curN.iterrows():
                    distD['ngh%s' % cnt] = row['ISO3']
                    distD['ngh%s_dist' % cnt] = tempD.distance(row['geometry'])    
                    #Calculate distance to submarine cables
                    selCables = inCables.loc[inCables['ISO3'] == row['ISO3']]
                    if selCables.shape[0] > 0:
                        distD['ngh%s_cbl_dist' % cnt] = tempD.distance(selCables.unary_union)
                    else:
                        distD['ngh%s_cbl_dist' % cnt] = -1
                    cnt = cnt +1
            
            tPrint(f"Completed distances to easy measures")
            
            if not 'cell_dist' in distD.columns:
                potential_matches = inCell.loc[list(cell_sindex.intersection(total_bound.bounds))]
                selCell = potential_matches.loc[potential_matches.intersects(total_bound)]
                selCell = selCell.to_crs(epsg)
                distD['cell_dist'] = distD.distance(selCell.unary_union)

            if not "gsma2g" in distD.columns:
                coords = [[x.x,x.y] for x in curD['geometry']]
                distD['gsma2g'] = [x[0] for x in list(gsma2g_R.sample(coords))]
                distD['gsma3g'] = [x[0] for x in list(gsma3g_R.sample(coords))]
                distD['gsma4g'] = [x[0] for x in list(gsma4g_R.sample(coords))]

            pd.DataFrame(distD).to_csv(out_file)

16:15:16	***** Processing YEM





16:15:17	Completed distances to easy measures
16:15:17	***** Processing BOL
16:15:23	Completed distances to easy measures
16:15:23	***** Processing ECU
16:15:24	Completed distances to easy measures
16:15:24	***** Processing PER
16:15:30	Completed distances to easy measures
16:15:30	***** Processing URY
16:15:32	Completed distances to easy measures
16:15:32	***** Processing ARG
16:15:40	Completed distances to easy measures
16:15:40	***** Processing CHL
16:15:40	***** Processing COL
16:15:44	Completed distances to easy measures
16:15:44	***** Processing PRY
16:15:46	Completed distances to easy measures
16:15:46	***** Processing VEN
16:15:49	Completed distances to easy measures
16:15:49	***** Processing PAN
16:15:51	Completed distances to easy measures
16:15:51	***** Processing NIC
16:15:51	Completed distances to easy measures
16:15:51	***** Processing HND
16:15:52	Completed distances to easy measures
16:15:52	***** Processing SLV
16:15:52	Completed distances to easy measures
16:15:52	***

16:20:28	Completed distances to easy measures
16:20:28	***** Processing MLT
16:20:28	Completed distances to easy measures
16:20:29	***** Processing ITA
16:20:31	Completed distances to easy measures
16:20:31	***** Processing HTI
16:20:32	Completed distances to easy measures
16:20:32	***** Processing PRT
16:20:33	Completed distances to easy measures
16:20:33	***** Processing SOM
16:20:33	Completed distances to easy measures
16:20:33	***** Processing LUX
16:20:35	Completed distances to easy measures
16:20:35	***** Processing BEL
16:20:36	Completed distances to easy measures
16:20:36	***** Processing ZAF
16:20:39	Completed distances to easy measures


In [60]:
distD.head()

Unnamed: 0,idstd,lat_mask,lon_mask,survey,WBCode,type,geometry,col_dist,ixp_dist,cables_dist,...,gsma4g,ngh1,ngh1_dist,ngh1_cbl_dist,ngh2,ngh2_dist,ngh2_cbl_dist,ngh3,ngh3_dist,ngh3_cbl_dist
40962,557272.0,5.535672,-0.180664,Ghana2013,GHA,Enterprise Survey,POINT (-17431.550 705134.955),3496.35767,3021.862031,-1,...,3,BFA,705335.289167,-1,CIV,705339.495464,705344.962033,TGO,705339.244329,705344.289625
40963,557273.0,5.65502,-0.249684,Ghana2013,GHA,Enterprise Survey,POINT (-24091.059 720290.020),11293.173948,13563.46713,-1,...,3,BFA,720677.689149,-1,CIV,720681.845936,720687.330568,TGO,720681.648187,720686.70415
40964,557274.0,5.559282,-0.200195,Ghana2013,GHA,Enterprise Survey,POINT (-19316.117 708133.303),120.885942,522.235936,-1,...,3,BFA,708381.607284,-1,CIV,708385.799105,708391.27101,TGO,708385.563613,708390.612075
40965,557275.0,5.665509,-0.015049,Ghana2013,GHA,Enterprise Survey,POINT (-1451.999 721621.723),12322.893732,22386.432865,-1,...,3,BFA,721608.099923,-1,CIV,721612.434232,721617.851814,TGO,721612.044169,721617.059889
40966,557276.0,5.555793,-0.232442,Ghana2013,GHA,Enterprise Survey,POINT (-22427.509 707690.235),2963.224312,3429.481417,-1,...,3,BFA,708030.42594,-1,CIV,708034.592711,708040.073743,TGO,708034.384224,708039.438072


In [58]:
tempD.head()

Unnamed: 0,idstd,lat_mask,lon_mask,survey,WBCode,type,geometry,col_dist,ixp_dist,cables_dist,...,ngh_dist,ngh1,ngh1_dist,ngh1_cbl_dist,ngh2,ngh2_dist,ngh2_cbl_dist,ngh3,ngh3_dist,ngh3_cbl_dist
40962,557272.0,5.535672,-0.180664,Ghana2013,GHA,Enterprise Survey,POINT (-17431.550 705134.955),3496.35767,3021.862031,-1,...,144893.290359,BFA,548793.877043,-1,CIV,246153.29794,705344.962033,TGO,144893.290359,705344.289625
40963,557273.0,5.65502,-0.249684,Ghana2013,GHA,Enterprise Survey,POINT (-24091.059 720290.020),11293.173948,13563.46713,-1,...,138489.539924,BFA,532268.163592,-1,CIV,241171.181429,720687.330568,TGO,138489.539924,720686.70415
40964,557274.0,5.559282,-0.200195,Ghana2013,GHA,Enterprise Survey,POINT (-19316.117 708133.303),120.885942,522.235936,-1,...,143943.181644,BFA,545266.826355,-1,CIV,244522.157618,708391.27101,TGO,143943.181644,708390.612075
40965,557275.0,5.665509,-0.015049,Ghana2013,GHA,Enterprise Survey,POINT (-1451.999 721621.723),12322.893732,22386.432865,-1,...,121943.746236,BFA,541795.634032,-1,CIV,263779.495087,721617.851814,TGO,121943.746236,721617.059889
40966,557276.0,5.555793,-0.232442,Ghana2013,GHA,Enterprise Survey,POINT (-22427.509 707690.235),2963.224312,3429.481417,-1,...,146350.518748,BFA,544258.735298,-1,CIV,241383.772402,708040.073743,TGO,146350.518748,708039.438072


In [43]:
distD

Unnamed: 0,idstd,lat_mask,lon_mask,survey,WBCode,type,geometry,col_dist,ixp_dist,cables_dist,cell_dist,gsma2g,gsma3g,gsma4g,ngh_dist
40962,557272.0,5.535672,-0.180664,Ghana2013,GHA,Enterprise Survey,POINT (-17431.550 705134.955),3496.357670,3021.862031,-1,1512.672453,3,3,3,144893.290359
40963,557273.0,5.655020,-0.249684,Ghana2013,GHA,Enterprise Survey,POINT (-24091.059 720290.020),11293.173948,13563.467130,-1,110.452581,3,3,3,138489.539924
40964,557274.0,5.559282,-0.200195,Ghana2013,GHA,Enterprise Survey,POINT (-19316.117 708133.303),120.885942,522.235936,-1,44.741239,3,3,3,143943.181644
40965,557275.0,5.665509,-0.015049,Ghana2013,GHA,Enterprise Survey,POINT (-1451.999 721621.723),12322.893732,22386.432865,-1,60.710794,3,3,3,121943.746236
40966,557276.0,5.555793,-0.232442,Ghana2013,GHA,Enterprise Survey,POINT (-22427.509 707690.235),2963.224312,3429.481417,-1,38.970416,3,3,3,146350.518748
40967,557277.0,5.585322,-0.096141,Ghana2013,GHA,Enterprise Survey,POINT (-9276.324 711440.041),721.038938,10409.602428,-1,94.505224,3,3,3,134658.552916
40968,557278.0,5.616839,-0.237031,Ghana2013,GHA,Enterprise Survey,POINT (-22870.238 715442.025),6340.540837,8639.317164,-1,60.946591,3,3,3,141022.813886
40969,557279.0,5.569413,-0.194975,Ghana2013,GHA,Enterprise Survey,POINT (-18812.456 709419.805),1440.302848,1710.887247,-1,69.442328,3,3,3,142646.621890
40970,557280.0,5.642561,-0.272832,Ghana2013,GHA,Enterprise Survey,POINT (-26324.508 718708.062),10894.903068,13207.364224,-1,395.040476,3,3,3,141197.812960
40971,557281.0,5.529931,-0.227839,Ghana2013,GHA,Enterprise Survey,POINT (-21983.338 704405.863),4462.555439,4459.641579,-1,71.919320,3,3,3,148503.053919


In [20]:
curB.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich