# Turkey Urbanization Review

In [5]:
import sys, os, importlib
import rasterio
import reverse_geocode

import geopandas as gpd
import pandas as pd

# Import GOST libraries; sys.path.append will be unnecessary if libraries are already installed
sys.path.append("../../../../gostrocks/src")
sys.path.append("../../../../GOST_Urban")

import GOSTRocks.rasterMisc as rMisc
from GOSTRocks.misc import tPrint
import src.UrbanRaster as urban
%matplotlib inline  

In [6]:
# define and extract focal data
iso3 = "TUR"
pop_file = "/home/public/Data/COUNTRY/TUR/Population/ppp_2020_1km_Aggregated.tif"
turkey_wsf = "/home/public/Data/GLOBAL/WSF/Turkey/Turkey_WSF_evolution.tif"
global_friction = "/home/public/Data/GLOBAL/INFRA/FRICTION_2020/2020_motorized_friction_surface.geotiff"
global_population = "/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020/ppp_2020_1km_Aggregated.tif"
if not os.path.exists(pop_file):
    outFile = os.path.join("/home/wb411133/temp/TUR", os.path.basename(pop_file))
    rMisc.clipRaster(rasterio.open(global_population), inD, outFile)

urban_raster = "/home/public/Data/COUNTRY/TUR/urbanization/tur_urban.tif"
hd_urban_raster = "/home/public/Data/COUNTRY/TUR/urbanization/tur_urban_hd.tif"

tt_folder = "/home/public/Data/COUNTRY/TUR/travel_time"
tt_airports = os.path.join(tt_folder, "least_cost_travel_time_airports.tif")
tt_hospitals = os.path.join(tt_folder, "least_cost_travel_time_hospitals.tif")

metadata = {
            'LEVEL': 'Field added to extracted NUTS geospatial data; useful in naming output files in loops',
            'wp_pop': 'Total population summarized from WorldPop 2020 global dataset',
            'urban_cells': 'Total number of cells in urban areas',
            'hd_cells': 'Total number of cells in high density urban areas',
            'urban_pop': 'Total WorldPop population in urban areas',
            'hd_pop': 'Total WorldPop population in high density urban areas',
            'FxxYYYY': 'DMSP nighttime lights sum of lights',
            'YR_YYYY_SUM':'VIIRS nighttime lights sum of lights',
            'bYYYY':'Total number of cells that were built in year YYYY, as calculated by the World Settlement Footprint'
           }


In [7]:
# Extract NUTS divisions
nuts_file = "/home/public/Data/GLOBAL/ADMIN/NUTS/NUTS_RG_01M_2021_4326.geojson"
inN = gpd.read_file(nuts_file)

inT1 = inN.loc[(inN['CNTR_CODE'] == "TR") & (inN['LEVL_CODE'] == 1)]
inT2 = inN.loc[(inN['CNTR_CODE'] == "TR") & (inN['LEVL_CODE'] == 2)]
inT3 = inN.loc[(inN['CNTR_CODE'] == "TR") & (inN['LEVL_CODE'] == 3)]


In [23]:
inT1 = inT1.to_crs('EPSG:5637')
inT1['area'] = inT1['geometry'].apply(lambda x: x.area/1000000)
inT1 = inT1.to_crs('epsg:4326')
inT1['LEVEL'] = 'NUTS1'

inT2 = inT2.to_crs('EPSG:5637')
inT2['area'] = inT2['geometry'].apply(lambda x: x.area/1000000)
inT2 = inT2.to_crs('epsg:4326')
inT2['LEVEL'] = 'NUTS2'

inT3 = inT3.to_crs('EPSG:5637')
inT3['area'] = inT3['geometry'].apply(lambda x: x.area/1000000)
inT3 = inT3.to_crs('epsg:4326')
inT3['LEVEL'] = 'NUTS3'

In [None]:
# Standardize rasters to the population layer
popR = rasterio.open(pop_file)
for rFile in [hd_urban_raster, urban_raster, tt_airports, tt_hospitals]:
    curR = rasterio.open(rFile)
    out_file = os.path.join("/home/wb411133/temp/TUR", os.path.basename(rFile))
    if not os.path.exists(out_file):
        rMisc.standardizeInputRasters(rasterio.open(rFile), popR, out_file)


In [None]:
# Name urban extents
urban_extents = "/home/public/Data/COUNTRY/TUR/urbanization/tur_urban.shp"
named_file = "/home/wb411133/temp/tur_urban_named.shp"
if not os.path.exists(named_file):
    in_ext = gpd.read_file(urban_extents)
    centroids = [(x.centroid.y, x.centroid.x) for x in in_ext['geometry']]
    res = reverse_geocode.search(centroids)
    city_names = [x['city'] for x in res]
    in_ext['CITY'] = city_names
    in_ext.to_file(named_file)

# Summarize population in urban and travel times

In [24]:
# summarize urbanization
popR = rasterio.open(pop_file)
popD = popR.read()

urbanR = rasterio.open(urban_raster)
urbanD = urbanR.read()
urbanPop = urbanD * popD

hdR = rasterio.open(hd_urban_raster)
hdD = hdR.read()
hdPop = hdD * popD

for inD in [inT1, inT2, inT3]:
    res = rMisc.zonalStats(inD, popR, minVal=0)
    res = pd.DataFrame(res, columns=['SUM', 'MIN', 'MAX', 'MEAN'])
    inD['wp_pop'] = res['SUM'].values
    
    res = rMisc.zonalStats(inD, urbanR, minVal=0)
    res = pd.DataFrame(res, columns=['SUM', 'MIN', 'MAX', 'MEAN'])
    inD['urban_cells'] = res['SUM'].values
    
    res = rMisc.zonalStats(inD, hdR, minVal=0)
    res = pd.DataFrame(res, columns=['SUM', 'MIN', 'MAX', 'MEAN'])
    inD['hd_cells'] = res['SUM'].values
    
    with rMisc.create_rasterio_inmemory(popR.profile, urbanPop) as urbanPopR:
        res = rMisc.zonalStats(inD, urbanPopR, minVal=0)
        res = pd.DataFrame(res, columns=['SUM', 'MIN', 'MAX', 'MEAN'])
        inD['urban_pop'] = res['SUM'].values
        
    with rMisc.create_rasterio_inmemory(popR.profile, hdPop) as urbanPopR:
        res = rMisc.zonalStats(inD, urbanPopR, minVal=0)
        res = pd.DataFrame(res, columns=['SUM', 'MIN', 'MAX', 'MEAN'])
        inD['hd_pop'] = res['SUM'].values    

In [25]:
#Summarize travel time
thresh = 60
popD = popR.read()

for inD in [inT1, inT2, inT3]:
    for tt_file in [tt_airports, tt_hospitals]:
        field_name = "%s_%s" % (tt_file.split("_")[-1].replace(".tif", ""), thresh)
        ttR = rasterio.open(tt_file)
        ttD = ttR.read() < thresh
        ttPop = popD * ttD
        with rMisc.create_rasterio_inmemory(popR.profile, ttPop) as urbanPopR:
            res = rMisc.zonalStats(inD, urbanPopR, minVal=0)
            res = pd.DataFrame(res, columns=['SUM', 'MIN', 'MAX', 'MEAN'])
            inD[field_name] = res['SUM'].values

In [26]:
pd.DataFrame(inT1).drop(['geometry'], axis=1).to_csv("/home/wb411133/temp/TUR/NUTS1_urbanization_traveltime.csv")
pd.DataFrame(inT2).drop(['geometry'], axis=1).to_csv("/home/wb411133/temp/TUR/NUTS2_urbanization_traveltime.csv")
pd.DataFrame(inT3).drop(['geometry'], axis=1).to_csv("/home/wb411133/temp/TUR/NUTS3_urbanization_traveltime.csv")

# Summarize nighttime lights SOL

In [29]:
def summarize_ntl(inD, clip = False, out_folder = "/home/wb411133/temp/TUR"):
    ''' Zonal stats for all VIIRS and DMSP data'''
    if not os.path.exists(out_folder):
        os.makedirs(out_folder)
    
    viirs_folder = "/home/public/Data/GLOBAL/NighttimeLights/VIIRS_CORRECTED"
    viirs_vrts = []
    for root, dirs, files in os.walk(viirs_folder):
        for f in files:
            if f[-4:] == ".vrt":
                viirs_vrts.append(os.path.join(root, f))

    tPrint("**** run zonal stats on DMSP nighttime lights")
    ntl_folder = "/home/public/Data/GLOBAL/NighttimeLights/DMSP"
    viirs_tifs = os.listdir(ntl_folder)
    dmsp_res = {}
    for v_tiff in viirs_tifs:        
        inR = rasterio.open(os.path.join(ntl_folder, v_tiff))
        if clip and ("1992" in v_tiff or "2000" in v_tiff or "2012" in v_tiff):
            out_tiff = os.path.join(out_folder, v_tiff)
            tPrint("Clipping %s" % out_tiff)
            if not os.path.exists(out_tiff):
                rMisc.clipRaster(inR, inD, out_tiff)                       
        title = v_tiff.split(".")[0]
        res = rMisc.zonalStats(inD, inR, minVal=0)
        res = pd.DataFrame(res, columns=["%s_%s" % (title, x) for x in ['SUM','MIN','MAX','MEAN']])
        dmsp_res[title] = res
    
    for key, vals in dmsp_res.items():
        xx = vals.iloc[:,[0,3]]
        try:        
            final = final.join(xx)
        except:
            final = xx
        
    tPrint("Run zonal stats on VIIRS")
    all_res = {}
    for vrt in viirs_vrts:
        tPrint(vrt)
        yr = os.path.basename(vrt).split("_")[0]
        res = rMisc.zonalStats(inD, vrt, minVal=0.5)
        res = pd.DataFrame(res, columns=["YR_%s_%s" % (yr, x) for x in ['SUM','MIN','MAX','MEAN']])
        all_res[yr] = res
        if clip and (yr == '2012' or yr == "2015" or yr == "2019"):
            out_file = os.path.join(out_folder, os.path.basename(vrt).replace(".vrt", ".tif"))
            tPrint("Clipping %s" % out_file)
            if not os.path.exists(out_file):
                rMisc.clipRaster(rasterio.open(vrt), inD, out_file)
    
    for key, vals in all_res.items():
        xx = vals.iloc[:,[0,3]]
        try:        
            final = final.join(xx)
        except:
            final = xx
    
    return(final)

In [30]:
final = summarize_ntl(inT1, clip=True)

08:53:00	**** run zonal stats on DMSP nighttime lights
08:53:00	Clipping /home/wb411133/temp/TUR/F182012.v4c_web.stable_lights.avg_vis_ElvidgeCorrected_gt3.tif
08:53:01	Clipping /home/wb411133/temp/TUR/F152000.v4b_web.stable_lights.avg_vis_ElvidgeCorrected_gt3.tif
08:53:02	Clipping /home/wb411133/temp/TUR/F101992.v4b_web.stable_lights.avg_vis_ElvidgeCorrected_gt3.tif
08:53:02	Run zonal stats on VIIRS
08:53:02	/home/public/Data/GLOBAL/NighttimeLights/VIIRS_CORRECTED/2012_VIIRS_annual_composite.vrt
08:53:03	Clipping /home/wb411133/temp/TUR/2012_VIIRS_annual_composite.tif
08:53:03	/home/public/Data/GLOBAL/NighttimeLights/VIIRS_CORRECTED/2013_VIIRS_annual_composite.vrt
08:53:03	/home/public/Data/GLOBAL/NighttimeLights/VIIRS_CORRECTED/2014_VIIRS_annual_composite.vrt
08:53:04	/home/public/Data/GLOBAL/NighttimeLights/VIIRS_CORRECTED/2015_VIIRS_annual_composite.vrt
08:53:04	Clipping /home/wb411133/temp/TUR/2015_VIIRS_annual_composite.tif
08:53:05	/home/public/Data/GLOBAL/NighttimeLights/VIIRS_

In [None]:
for inT in [inT1, inT2, inT3]:    
    final = summarize_ntl(inT)
    final['NUTS_ID'] = inT['NUTS_ID'].values
    final['areaKM'] = inT['area'].values
    final.to_csv("/home/wb411133/temp/TUR/NUTS%s_VIIRS.csv" % inT.iloc[0,2])

# Summarize WSF at NUTS 2 and city level

In [None]:
# generate the vector urban extents
urban_pop = urban.urbanGriddedPop(pop_file)
urban_extents = urban_pop.calculateUrban(densVal=300, totalPopThresh=5000)
urban_extents_file = os.path.join("/home/wb411133/temp/TUR", os.path.basename(urban_raster).replace(".tif", ".shp"))
urban_extents.to_file(urban_extents_file)

In [None]:
# Summarize WSF change in cities
res = rMisc.zonalStats(urban_extents_file, turkey_wsf, rastType='C', unqVals=list(range(1985,2016)))
resD = pd.DataFrame(res, columns = ["b%s" % x for x in list(range(1985,2016))])
city_built = resD.cumsum(axis=1)

In [15]:
# Summarize WSF change in NUTS
for inD in [inT1, inT2, inT3]:
    nuts_out = "/home/wb411133/temp/TUR/%s_WSF.csv" % inD['LEVEL'].iloc[0]
    if not os.path.exists(nuts_out):
        nuts_res = rMisc.zonalStats(inD, turkey_wsf, rastType='C', unqVals=list(range(1985,2016)))
        nuts_res = pd.DataFrame(nuts_res, columns = ["b%s" % x for x in list(range(1985,2016))])
        nuts_built = nuts_res.cumsum(axis=1)
        nuts_built['NUTS_ID'] = inD['NUTS_ID']
        nuts_built.to_csv(nuts_out)
    

In [10]:
nuts_built.head()

Unnamed: 0,b1985,b1986,b1987,b1988,b1989,b1990,b1991,b1992,b1993,b1994,...,b2006,b2007,b2008,b2009,b2010,b2011,b2012,b2013,b2014,b2015
0,1502347,1502347,1557153,1590807,1625434,1660047,1686464,1725350,1747886,1771147,...,2125691,2159651,2193451,2228827,2266116,2297888,2328516,2364077,2403222,2445167
1,793111,793111,838124,881769,912038,942631,958489,997164,1041520,1068048,...,1389760,1413887,1442817,1458831,1476653,1497912,1513385,1541730,1563116,1583972
2,1396069,1396069,1452071,1506468,1547410,1585033,1615848,1664232,1719708,1769399,...,2291005,2330908,2379021,2419876,2465892,2517825,2555007,2612482,2655510,2709075
3,1397433,1397433,1417347,1446446,1497011,1523295,1546763,1564507,1584365,1611210,...,1960583,1984445,2024711,2053117,2086731,2107579,2132841,2163428,2203844,2242001
4,370147,370147,389560,399581,414230,424068,433584,443375,452882,464468,...,614151,624611,638390,654672,670928,680938,691495,705856,724368,739100
