# Calculating LEI

This script is used for exploring LEI methods - in order to calculate LEI proper, look for the LEIFast.py script in GOST_Rocks/Urban; this implements multi-threading.

In [2]:
import os, sys, logging

import geojson, rasterio
import rasterio.features

import pandas as pd
import numpy as np

from shapely.geometry import shape, GeometryCollection
from shapely.wkt import loads
from matplotlib import pyplot
from rasterio.plot import show, show_hist

#Import GOST urban functions
sys.path.append("../../")
from src.LEI import *


In [3]:
calculate_LEI?

In [None]:
LEI_folder = '/home/wb411133/data/Projects/LEI'
results = {}
GHSL_files = []
for root, dirs, files in os.walk(LEI_folder):
    if os.path.exists(os.path.join(root, "GHSL.tif")):
        GHSL_files.append(os.path.join(root, "GHSL.tif"))
    try:
        results[os.path.basename(root)] = [len(files), os.stat(os.path.join(root, "GHSL.tif")).st_size]
        if len(files) != 6:
            print("%s - %s" % (os.path.basename(root), os.stat(os.path.join(root, "GHSL.tif")).st_size))
    except:
        pass

# Vizualize raster data - GHSL

In [None]:
root = '/home/wb411133/data/Projects/LEI/634/'
inputGHSL = os.path.join(root, "GHSL.tif")
inRaster = rasterio.open(inputGHSL)
inR = inRaster.read()

newR = (inR == 3).astype('int')
oldR = (np.isin(inR, [4,5,6])).astype('int')

In [None]:
fig, (axr, axg) = pyplot.subplots(1, 2, figsize=(20,20))
show(oldR, ax=axr, title='OLD')
show(newR, ax=axg, title='NEW')

In [None]:
#write out raster to file
outProperties = inRaster.profile
outRaster = outRaster.astype('int32')
outProperties['dtype'] = 'int32'
with rasterio.open(inputGHSL.replace(".tif", "_LEI.tif"), 'w', **outProperties) as out:
    out.write(outRaster)

# Calculate LEI

In [None]:
for ghsl_file in GHSL_files:
    print(f'{ghsl_file}')
    out_file = ghsl_file.replace(".tif", "new_LEI_90_00.csv")
    if not os.path.exists(out_file):
        lei = calculate_LEI(ghsl_file, old_list = [5,6], new_list=[4])
        xx = pd.DataFrame(lei, columns=['geometry', 'old', 'total'])
        xx['LEI'] = xx['old'] / xx['total']
        xx.to_csv(out_file)

In [None]:
# Process LEI results
base_folder = '/home/wb411133/data/Projects/LEI'
all_results_files = []
for root, folders, files in os.walk(base_folder):
    for f in files:
        if "GHSLnew_LEI_90_00" in f:
            all_results_files.append(os.path.join(root, f))


In [None]:
summarized_results = {}
for res_file in all_results_files:
    res = summarize_LEI(res_file)
    baseName = os.path.basename(os.path.dirname(res_file))
    summarized_results[baseName] = res

In [None]:
all_results = pd.DataFrame(summarized_results).transpose()
# Old test to determine which files were not processed correctly
#bas_res = all_results[all_results['Expansion'] == 123282000.0].index
all_results.head()

In [None]:
all_results.to_csv(os.path.join(LEI_folder, "Summarized_LEI_Results_90_00.csv"))

# Summarize total built per city

In [None]:
all_res = {}
for g_file in GHSL_files:
    city = os.path.basename(os.path.dirname(g_file))
    inR = rasterio.open(g_file)
    inD = inR.read()
    built2014 = (inD >= 3).sum() * (30 * 30)
    built2000 = (inD >= 4).sum() * (30 * 30)
    built1990 = (inD >= 5).sum() * (30 * 30)
    built1975 = (inD >= 6).sum() * (30 * 30)
    all_res[city] = [built1975, built1990, built2000, built2014]
    print(city)

In [None]:
xx = pd.DataFrame(all_res).head().transpose()
xx.columns = ['built75', 'built90', 'built00', 'built14']
#xx[xx.index.isin(['1'])]
xx.head()

In [None]:
xx.to_csv("/home/wb411133/temp/LEI_cities_built.csv")

# Combining results

In [None]:
csv_files = [x for x in os.listdir(LEI_folder) if x[-4:] == ".csv"]
lei0014 = pd.read_csv(os.path.join(LEI_folder, 'Summarized_LEI_Results.csv'),index_col=0)
lei0014.columns = ["%s_0014" % x for x in lei0014.columns]

lei9014 = pd.read_csv(os.path.join(LEI_folder, 'Summarized_LEI_Results_90_0014.csv'),index_col=0)
lei9014.columns = ["%s_9014" % x for x in lei9014.columns]

lei9000 = pd.read_csv(os.path.join(LEI_folder, 'Summarized_LEI_Results_90_00.csv'),index_col=0)
lei9000.columns = ["%s_9000" % x for x in lei9000.columns]

built_area = pd.read_csv("/home/wb411133/temp/LEI_cities_built.csv",index_col=0)
built_area.columns = ["%s_BUILT" % x for x in built_area.columns]


In [None]:
combined_results = lei0014.join(lei9014).join(lei9000).join(built_area)

In [None]:
combined_results.to_csv(os.path.join(LEI_folder, 'LEI_COMBINED.csv'))

In [None]:
combined_results['Expansion_0014'] + combined_results['Infill_0014'] + combined_results['Leapfrog_0014'] - (combined_results['built14_BUILT'] - combined_results['built00_BUILT'])

In [None]:
built_area.head()

# Summarizing methods

In [None]:
in_ghsl = "/home/wb411133/data/Projects/LEI/1/GHSL.tif"
inR = rasterio.open(in_ghsl)
inD = inR.read()


In [None]:
# Get cell counts of each built category
built2014 = (inD >= 3).sum()
built2000 = (inD >= 4).sum()
built1990 = (inD >= 5).sum()
built1975 = (inD >= 6).sum()

In [None]:
print("%s\n%s\n%s\n%s" % (built2014, built2000, built1990, built1975))

In [None]:
lei_2000_2014 = calculate_LEI(in_ghsl, old_list = [4,5,6], new_list=[3])
lei_1990_2000 = calculate_LEI(in_ghsl, old_list = [5,6], new_list=[4])

In [None]:
xx = pd.DataFrame(lei, columns=['geometry', 'old', 'total'])
xx['LEI'] = xx['old'] / xx['total']        

In [None]:
in_file = "/home/wb411133/data/Projects/LEI/1/GHSLnew_LEI_90_00.csv"
inD = pd.read_csv(in_file, index_col=0)
inD.head()

In [None]:
summarize_LEI(in_file)

# DEBUGGING

In [None]:
bad_files = []

for root, dirs, files in os.walk('/home/wb411133/data/Projects/LEI/'):
    for f in files:
        if "90_00.csv" in f:
            bad_files.append(os.path.join(root, f))
            
bad_files

In [None]:
import shutil

for b in bad_files:
    new_file = b.replace("_90_00", "_90_0014")
    shutil.move(b, new_file)