In [1]:
import sys, os, importlib
import rasterio
import reverse_geocode

import geopandas as gpd
import pandas as pd

from shapely.geometry import Point

# Import GOST libraries; sys.path.append will be unnecessary if libraries are already installed
sys.path.append("../../../../gostrocks/src")
sys.path.append("../../../../GOST_Urban")

import GOSTRocks.rasterMisc as rMisc
from GOSTRocks.misc import tPrint
import src.UrbanRaster as urban
%matplotlib inline  

  shapely_geos_version, geos_capi_version_string


In [2]:
wbes_coords = "/home/wb411133/data/Projects/ICT_Distance/Data/Masked ES GPS April 15-2021.dta"
inD = pd.read_stata(wbes_coords)
print(inD.shape)
# filter data with broken coordinates
inD = inD.loc[(inD['lat_mask'] > -90) & (inD['lat_mask'] < 90) & (inD['lon_mask'] > -180) & (inD['lon_mask'] < 180)]
print(inD.shape)

(121335, 6)
(121330, 6)


In [3]:
geoms = [Point(row['lon_mask'], row['lat_mask']) for idx, row in inD.iterrows()]
inD = gpd.GeoDataFrame(inD, geometry=geoms, crs = 'epsg:4326')

In [4]:
global_bounds = "/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp"
inG = gpd.read_file(global_bounds)
inG = inG.to_crs('epsg:4326')

# Open and load infrastructure data

In [5]:
openCellID = "/home/public/Data/GLOBAL/INFRA/OPENCELLID/cell_towers_2020-04-15-T000000.csv"    
ixp_file = '/home/public/Data/GLOBAL/INFRA/IXPS/ixps_global_geocoded.csv'
colocation_file = '/home/public/Data/GLOBAL/INFRA/DATA_CENTERS/Data_Centers.geojson'
cell_coverage_folder = '/home/public/Data/GLOBAL/INFRA/GSMA/2019/MCE/Data_MCE/Global'
cell_files = ['MCE_Global2G_2020.tif', 'MCE_Global3G_2020.tif', 'MCE_Global4G_2020.tif']
submarine_cable_file = '/home/public/Data/GLOBAL/INFRA/SUBMARINE_CABLES/landing-point-geo.json'

In [6]:
inCell = pd.read_csv(openCellID)
inD_geom = [Point(x) for x in zip(inCell['lon'], inCell['lat'])]
inCell = gpd.GeoDataFrame(inCell, geometry = inD_geom, crs = {'init':'epsg:4326'})

  return _prepare_from_string(" ".join(pjargs))


In [7]:
cell_sindex = inCell.sindex

In [8]:
inCol = gpd.read_file(colocation_file)

In [None]:
inCables = gpd.read_file(submarine_cable_file)
def try_country(x):
    try:
        return(pycountry.countries.search_fuzzy(x.split(",")[-1])[0].alpha_3)
    except:
        return('')

inCables['ISO3'] = inCables['name'].apply(lambda x: try_country(x))

In [None]:
inIXP = pd.read_csv(ixp_file, index_col=0)
inD_geom = [Point(x) for x in zip(inIXP['Lon'], inIXP['Lat'])]
inIXP = gpd.GeoDataFrame(inIXP, geometry = inD_geom, crs = {'init':'epsg:4326'})

In [None]:
gsma2g_R = rasterio.open(os.path.join(cell_coverage_folder, cell_files[0]))
gsma3g_R = rasterio.open(os.path.join(cell_coverage_folder, cell_files[1]))
gsma4g_R = rasterio.open(os.path.join(cell_coverage_folder, cell_files[2]))

# Calculate distances

In [None]:
inD['WBCode'].unique()

array(['YEM', 'BOL', 'ECU', 'PER', 'URY', 'ARG', 'CHL', 'COL', 'PRY',
       'VEN', 'PAN', 'NIC', 'HND', 'SLV', 'GTM', 'CRI', 'MEX', 'CAF',
       'IRQ', 'ZWE', 'RUS', 'RWA', 'ETH', 'CHN', 'BLR', 'NPL', 'PSE',
       'GEO', 'AZE', 'KGZ', 'KAZ', 'BGD', 'ARM', 'AFG', 'UGA', 'UKR',
       'UZB', 'MNG', 'MDA', 'TZA', 'KEN', 'BGR', 'SRB', 'ZMB', 'BIH',
       'ALB', 'XKX', 'HRV', 'MKD', 'SVN', 'MNE', 'DJI', 'LBN', 'ISR',
       'JOR', 'ROU', 'LVA', 'MMR', 'COD', 'TJK', 'EST', 'HUN', 'LTU',
       'POL', 'CZE', 'SVK', 'TUR', 'SWE', 'GHA', 'MDG', 'IND', 'TUN',
       'EGY', 'SEN', 'MRT', 'BDI', 'SSD', 'NAM', 'SDN', 'PAK', 'MAR',
       'MWI', 'NGA', 'BTN', 'IDN', 'VNM', 'SLB', 'PHL', 'KHM', 'TLS',
       'LAO', 'MYS', 'PNG', 'THA', 'LSO', 'BEN', 'MLI', 'CMR', 'GIN',
       'SWZ', 'CIV', 'TGO', 'DOM', 'NER', 'LBR', 'SLE', 'TCD', 'GMB',
       'MOZ', 'SUR', 'GRC', 'CYP', 'MLT', 'ITA', 'HTI', 'PRT', 'SOM',
       'LUX', 'BEL', 'ZAF'], dtype=object)

In [None]:
broken_countries = [] #['CHL','RUS','CHN','IDN']

epsg = 'epsg:6933'
out_folder =  "/home/wb411133/temp/ICT_distance"
if not os.path.exists(out_folder):
    os.makedirs(out_folder)
    
for cur_country in inD['WBCode'].unique():
    out_file = os.path.join(out_folder, "%s_ICT_distance.csv" % cur_country)
    tPrint(f"***** Processing {cur_country}")
    if not os.path.exists(out_file) and not cur_country in broken_countries:
        curD = inD.loc[inD['WBCode'] == cur_country]
        curB = inG.loc[inG['ISO3'] == cur_country]
        distD = curD.to_crs(epsg)
        total_bound = curB.unary_union
        if curB.shape[0] > 0:
            selCol = inCol.loc[inCol['geometry'].apply(lambda x: x.intersects(total_bound))]
            if selCol.shape[0] > 0:
                selCol = selCol.to_crs(epsg)
                distD['col_dist'] = distD.distance(selCol.unary_union)
            else:
                distD['col_dist'] = -1

            selIXP = inIXP.loc[inIXP['geometry'].apply(lambda x: x.intersects(total_bound))]
            if selIXP.shape[0] > 0:
                selIXP = selIXP.to_crs(epsg)
                distD['ixp_dist'] = distD.distance(selIXP.unary_union)
            else:
                distD['ixp_dist'] = -1

            selCables = inCables.loc[inCables['ISO3'] == cur_country]
            if selCables.shape[0] > 0:
                selCables = selCables.to_crs(epsg)
                distD['cables_dist'] = distD.distance(selCables.unary_union)
            else:
                distD['cables_dist'] = -1

            tPrint(f"Completed distances to easy measures")

            potential_matches = inCell.loc[list(cell_sindex.intersection(total_bound.bounds))]
            selCell = potential_matches.loc[potential_matches.intersects(total_bound)]
            selCell = selCell.to_crs(epsg)
            distD['cell_dist'] = distD.distance(selCell.unary_union)

            coords = [[x.x,x.y] for x in curD['geometry']]
            distD['gsma2g'] = [x[0] for x in list(gsma2g_R.sample(coords))]
            distD['gsma3g'] = [x[0] for x in list(gsma3g_R.sample(coords))]
            distD['gsma4g'] = [x[0] for x in list(gsma4g_R.sample(coords))]

            pd.DataFrame(distD).to_csv(out_file)

10:49:28	***** Processing YEM
10:49:28	***** Processing BOL
10:49:28	***** Processing ECU
10:49:28	***** Processing PER
10:49:28	***** Processing URY
10:49:28	***** Processing ARG
10:49:28	***** Processing CHL
10:49:39	Completed distances to easy measures
