In [14]:
import sys, os
import geemap, ee

import geopandas as gpd
import pandas as pd

from shapely.geometry import Point, Polygon

sys.path.append(os.path.join(os.path.expanduser("~"), "Code", "GEE_Zonal", "src"))
from gee_tools import ZonalStats
from gee_helpers import gpd_to_gee
ee.Initialize()

In [15]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [42]:
out_folder = "/home/wb411133/temp/ETH_NTL/" 
viirs_folder = os.path.join(out_folder, "Data/VIIRS")
zonal_res = os.path.join(out_folder, "Data/IP_Locations_NTL_ZONAL_v2.csv")

#ghs_gpkg = gpd.read_file(r'P:\Data\GLOBAL\URBAN\GHS\GHS_STAT_UCDB2015MT_GLOBE_R2019A\GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg')
ip_locations = "Data/IP_Locations.csv"
ghs_filter = gpd.read_file(ip_locations)
ghs_filter['geometry'] = [Point(float(x['Lon']), float(x['Lat'])) for idx, x in ghs_filter.iterrows()]
ghs_filter.crs = "epsg:4326"
ghs_filter = ghs_filter.to_crs('epsg:3857')

In [43]:
buffers = list(range(5, 50, 5))
buffers.append(3)
buffers.append(2)
buffers

[5, 10, 15, 20, 25, 30, 35, 40, 45, 3, 2]

In [45]:
ghs_buff = ghs_filter.copy()
ghs_buff['geometry'] = ghs_buff['geometry'].apply(lambda x: x.buffer(cur_buffer * 1000))
ghs_ee = gpd_to_gee(ghs_buff, id_col = 'Industrial Park')

zs = ZonalStats(
    collection_id = 'NOAA/VIIRS/DNB/MONTHLY_V1/VCMCFG',
    target_features = ghs_ee,
    statistic_type = 'sum',
    scale = 500,
    frequency = 'original',
    band = 'avg_rad',
    output_dir = 'ghs',
    output_name = 'ghs_ntl'
)
res = zs.runZonalStats()

In [57]:
zs.reportRunTime()

Completed
Runtime: 438 minutes and 20 seconds


Instead, we will split GHS into several batches with 1,000 features

In [18]:
for batch_number, i in enumerate(range(0, len(ghs_filter), 1000)):
    batch = ghs_filter.iloc[i:i+1000].copy()
    batch.to_file(os.path.join(output_dir, 'batches', f"batch_{batch_number}.geojson"), driver = "GeoJSON")

In [19]:
results = []
for batch_number, i in enumerate(range(0, len(ghs_filter), 1000)):
    ghs_batch = os.path.join(output_dir, 'batches', f"batch_{batch_number}.geojson")
    ghs_ee = geemap.geojson_to_ee(ghs_batch)
    zs = ZonalStats(
        collection_id = 'NOAA/VIIRS/DNB/MONTHLY_V1/VCMCFG',
        target_features = ghs_ee,
        statistic_type = 'sum',
        scale = 500,
        frequency = 'original',
        band = 'avg_rad',
        output_dir = 'ghs',
        output_name = f'ntl_batch_{batch_number}',
        min_threshold = 0
    )
    res = zs.runZonalStats()
    results.append(zs)

In [24]:
for zs in results:
    zs.reportRunTime()

Completed
Runtime: 0 minutes and 51 seconds
Completed
Runtime: 0 minutes and 33 seconds
Completed
Runtime: 0 minutes and 28 seconds


In [21]:
coverage = ee.ImageCollection('NOAA/VIIRS/DNB/MONTHLY_V1/VCMCFG').select('cf_cvg')

In [22]:
def getGT0(image):
    return image.gt(0) #.multiply(ee.Image.pixelArea())

def getGT1(image):
    return image.gt(1) #.multiply(ee.Image.pixelArea())

def getGTE0(image):
    return image.gte(0) #.multiply(ee.Image.pixelArea())

def setArea(feature):
    return feature.set("area_sqm", feature.area())

In [25]:
coverage_gt0 = coverage.map(getGT0)

In [26]:
coverage_gt1 = coverage.map(getGT1)

In [27]:
coverage_gte0 = coverage.map(getGTE0)

In [28]:
results_gt0 = []
for batch_number, i in enumerate(range(0, len(ghs_filter), 1000)):
    ghs_batch = os.path.join(output_dir, 'batches', f"batch_{batch_number}.geojson")
    ghs_ee = geemap.geojson_to_ee(ghs_batch)
    ghs_ee = ghs_ee.map(setArea)
    zs = ZonalStats(
        collection_id = 'NOAA/VIIRS/DNB/MONTHLY_V1/VCMCFG',
        target_features = ghs_ee,
        statistic_type = 'sum',
        scale = 500,
        frequency = 'original',
        output_dir = 'ghs',
        output_name = f'gt0_batch_{batch_number}',
        ee_dataset = coverage_gt0
    )
    res = zs.runZonalStats()
    results_gt0.append(zs)

In [29]:
for zs in results_gt0:
    zs.reportRunTime()

Status is Ready, hasn't started
Status is Ready, hasn't started
Status is Ready, hasn't started


In [30]:
results_gt1 = []
for batch_number, i in enumerate(range(0, len(ghs_filter), 1000)):
    ghs_batch = os.path.join(output_dir, 'batches', f"batch_{batch_number}.geojson")
    ghs_ee = geemap.geojson_to_ee(ghs_batch)
    ghs_ee = ghs_ee.map(setArea)
    zs = ZonalStats(
        collection_id = 'NOAA/VIIRS/DNB/MONTHLY_V1/VCMCFG',
        target_features = ghs_ee,
        statistic_type = 'sum',
        scale = 500,
        frequency = 'original',
        output_dir = 'ghs',
        output_name = f'gt1_batch_{batch_number}',
        ee_dataset = coverage_gt1
    )
    res = zs.runZonalStats()
    results_gt1.append(zs)

In [31]:
for zs in results_gt1:
    zs.reportRunTime()

Status is Ready, hasn't started
Status is Ready, hasn't started
Status is Ready, hasn't started


In [32]:
results_gte0 = []
for batch_number, i in enumerate(range(0, len(ghs_filter), 1000)):
    ghs_batch = os.path.join(output_dir, 'batches', f"batch_{batch_number}.geojson")
    ghs_ee = geemap.geojson_to_ee(ghs_batch)
    ghs_ee = ghs_ee.map(setArea)
    zs = ZonalStats(
        collection_id = 'NOAA/VIIRS/DNB/MONTHLY_V1/VCMCFG',
        target_features = ghs_ee,
        statistic_type = 'sum',
        scale = 500,
        frequency = 'original',
        output_dir = 'ghs',
        output_name = f'gte0_batch_{batch_number}',
        ee_dataset = coverage_gte0
    )
    res = zs.runZonalStats()
    results_gte0.append(zs)

### Collect Results
Downloaded results from google drive, and created three folders: ntl, gt0, gt1

In [33]:
ntl_dir = os.path.join(output_dir, 'ntl')
cov_gt0_dir = os.path.join(output_dir, 'cov_gt0')
cov_gte0_dir = os.path.join(output_dir, 'cov_gte0')
cov_gt1_dir = os.path.join(output_dir, 'cov_gt1')

In [34]:
ntl_dfs = [pd.read_csv(os.path.join(ntl_dir, file)) for file in os.listdir(ntl_dir)]

In [35]:
ntl_df = pd.concat(ntl_dfs)

In [36]:
ntl_df.drop(['system:index', '.geo'], axis=1, inplace=True)

In [37]:
ntl_df.reset_index(drop=True, inplace=True)

In [None]:
ntl_df.set_index("ID_HDC_G0", inplace=True)

In [38]:
ntl_df_final = ntl_df.loc[:, list(ntl_df.columns[-8:])+list(ntl_df.columns[:-9])].copy()

In [107]:
def renameFunc(col):
    if 'rad' in col:
        return "Month-"+col[4:6]+"-"+col[:4]
    else:
        return col

In [108]:
ntl_df_final.rename(renameFunc, axis=1, inplace=True)

In [110]:
ntl_df_final.to_csv(os.path.join(output_dir, "ntl.csv"))

In [42]:
cov_gt0_dfs = [pd.read_csv(os.path.join(cov_gt0_dir, file)) for file in os.listdir(cov_gt0_dir)]

In [43]:
cov_gt0_df = pd.concat(cov_gt0_dfs)

In [44]:
cov_gt0_df.drop(['system:index', '.geo'], axis=1, inplace=True)

In [45]:
cov_gt0_df.reset_index(drop=True, inplace=True)

In [None]:
cov_gt0_df.set_index("ID_HDC_G0", inplace=True)

In [47]:
cov_gte0_dfs = [pd.read_csv(os.path.join(cov_gte0_dir, file)) for file in os.listdir(cov_gte0_dir)]
cov_gte0_df = pd.concat(cov_gte0_dfs)
cov_gte0_df.drop(['system:index', '.geo'], axis=1, inplace=True)
cov_gte0_df.reset_index(drop=True, inplace=True)

In [59]:
cov_gte0_df.set_index("ID_HDC_G0", inplace=True)

In [60]:
coverage_gte0 = cov_gte0_df.loc[:, ['cvg' in col for col in cov_gte0_df.columns]].copy()

In [116]:
coverage_gt0 = cov_gt0_df.loc[:, ['cvg' in col for col in cov_gt0_df.columns]].copy()

In [117]:
coverage_gt0 = coverage_gt0.divide(coverage_gte0)*100

In [119]:
def renameFunc(col):
    return "Month-"+col[4:6]+"-"+col[:4]

In [120]:
coverage_gt0.rename(renameFunc, axis=1, inplace=True)

In [67]:
cov_gt1_dfs = [pd.read_csv(os.path.join(cov_gt1_dir, file)) for file in os.listdir(cov_gt1_dir)]
cov_gt1_df = pd.concat(cov_gt1_dfs)
cov_gt1_df.drop(['system:index', '.geo'], axis=1, inplace=True)
cov_gt1_df.reset_index(drop=True, inplace=True)

In [68]:
cov_gt1_df.set_index("ID_HDC_G0", inplace=True)

In [122]:
coverage_gt1 = cov_gt1_df.loc[:, ['cvg' in col for col in cov_gt1_df.columns]].copy()

In [123]:
coverage_gt1 = coverage_gt1.divide(coverage_gte0)*100

In [124]:
coverage_gt1.rename(renameFunc, axis=1, inplace=True)

In [73]:
attributes = cov_gt0_df.loc[:, ['cvg' not in col for col in cov_gt0_df.columns]].copy()

In [76]:
attributes.drop("AREA", axis=1, inplace=True)

In [78]:
attributes.loc[:, "area_sqkm"] = attributes['area_sqm'] / 1000000

In [85]:
attributes.QA2_1V.value_counts()

1    2785
2     109
0      14
Name: QA2_1V, dtype: int64

In [125]:
coverage_gt0 = attributes.join(coverage_gt0)

In [126]:
coverage_gt1 = attributes.join(coverage_gt1)

In [128]:
coverage_gt0.to_csv(os.path.join(output_dir, "cov_gt0.csv"))

In [129]:
coverage_gt1.to_csv(os.path.join(output_dir, "cov_gt1.csv"))