This second notebook takes in each output from the zonal stats tasks, and reshapes each dataset into long format.

In [1]:
import sys
sys.path.append('../../src') # relative path where the library is stored
# alternatively sys.path.append('C/Users/wb514197/Repos/GEE_Zonal/src')

In [2]:
import pandas as pd
import os
import glob

In [3]:
def process_file(file_path):
    
    df = pd.read_csv(file_path, index_col=0)
    df.reset_index(drop=True, inplace=True)
    df.drop(columns=".geo", inplace=True)
    df.loc[:, "n_null"] = df.apply(lambda x: x.isna().sum(), axis=1)
    
    file = os.path.basename(file_path)
    lc_id, var, temp_stat = file.replace(".csv", "").split("_")
    var_name = var.upper()
    var = "_".join([var, temp_stat])
    stubs = ["_".join([var, a]) for a in temp_stats]
    
    def rename_func(col):
        if var_name in col:
            p = col.split("_")
            new_name = p[1]+"_"+p[2]+"_"+p[3]+"__"+p[0]
            return new_name
        else:
            return col
    
    df.rename(rename_func, axis=1, inplace=True)
    df.rename(str.lower, axis=1, inplace=True)
    
    df_re = pd.wide_to_long(
        df,
        stubnames = stubs,
        i = 'wb_adm0_na',
        j = 'year',
        sep = "__"
    )
    
    return df_re

In [4]:
def process_collection(lc_id, files_dir):
    files = glob.glob(files_dir+f"/{lc_id}*")
    l_processed = [process_file(file) for file in files]
    l_all = pd.concat(l_processed, axis=1)
    l_all = l_all.loc[:, ~l_all.columns.duplicated()].copy()
    l_all.loc[:, "landsat_id"] = lc_id
    return l_all

## NDVI

In [8]:
temp_stats = ["mean", "max", "min", "stddev"]

In [9]:
data_dir = os.path.join("X:/data/ee")
ndvi_dir = os.path.join(data_dir, "ndvi")

In [10]:
lc_ids = ["LT05","LE07","LC08"]
# lc_ids = ["LE07","LC08"]

In [11]:
res = [process_collection(lc_id, ndvi_dir) for lc_id in lc_ids]

In [12]:
ndvi = pd.concat(res, axis=0)

In [13]:
ndvi.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,fid_100,n_null,objectid,shape_leng,region,laty,wb_adm0_co,un_m49,incomeg,lendingc,...,ndvi_mean_stddev,ndvi_min_mean,ndvi_min_max,ndvi_min_min,ndvi_min_stddev,ndvi_stddev_mean,ndvi_stddev_max,ndvi_stddev_min,ndvi_stddev_stddev,landsat_id
wb_adm0_na,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Palmyra Atoll (U.S.),1984,17000,115,171,19591.29,,5.874767,190,581,,,...,,,,,,,,,,LT05
Pitcairn Islands (U.K.),1984,17600,115,177,54269.91,,-24.425808,197,612,,,...,,,,,,,,,,LT05
Romania,1984,18200,3,183,4042455.0,,45.843975,203,642,,,...,0.069123,0.145123,0.43665,0.100008,0.045366,0.176018,0.493399,0.1,0.035404,LT05
"Saint Helena, Ascension and Tristan da Cunha (U.K.)",1984,18500,115,186,223659.9,,-25.557539,207,654,,,...,,,,,,,,,,LT05
Saint-Pierre-et-Miquelon (Fr.),1984,18800,19,189,236926.7,,46.94207,210,666,,,...,0.086466,0.221855,0.441662,0.102969,0.090933,0.14961,0.227456,0.100085,0.032024,LT05


In [14]:
ndvi.loc["Uganda"].head()

Unnamed: 0_level_0,fid_100,n_null,objectid,shape_leng,region,laty,wb_adm0_co,un_m49,incomeg,lendingc,...,ndvi_mean_stddev,ndvi_min_mean,ndvi_min_max,ndvi_min_min,ndvi_min_stddev,ndvi_stddev_mean,ndvi_stddev_max,ndvi_stddev_min,ndvi_stddev_stddev,landsat_id
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,22500,20,226,2588540.0,Sub-Saharan Africa,1.279114,253,800,Low income,IDA,...,0.11626,0.295688,0.746945,0.100001,0.159135,0.171805,0.518526,0.100004,0.042978,LT05
1985,22500,20,226,2588540.0,Sub-Saharan Africa,1.279114,253,800,Low income,IDA,...,0.13223,0.283866,0.707618,0.100004,0.135296,0.141957,0.553882,0.100008,0.037616,LT05
1986,22500,20,226,2588540.0,Sub-Saharan Africa,1.279114,253,800,Low income,IDA,...,0.102061,0.208122,0.630003,0.100001,0.08842,0.16253,0.360771,0.1,0.037646,LT05
1987,22500,20,226,2588540.0,Sub-Saharan Africa,1.279114,253,800,Low income,IDA,...,0.103353,0.217468,0.73602,0.100001,0.134141,0.165408,0.420269,0.1,0.036415,LT05
1988,22500,20,226,2588540.0,Sub-Saharan Africa,1.279114,253,800,Low income,IDA,...,0.06364,0.185641,0.645917,0.100038,0.094238,0.174365,0.34742,0.100068,0.032993,LT05


In [15]:
out_dir = os.path.join(data_dir, "output")
if not os.path.exists(out_dir):
    os.mkdir(out_dir)

In [94]:
ndvi.to_csv(os.path.join(out_dir, "ndvi.csv"))

## EVI

In [155]:
evi_dir = os.path.join(data_dir, 'evi')

In [158]:
res = [process_collection(lc_id, evi_dir) for lc_id in lc_ids]

In [160]:
evi = pd.concat(res, axis=0)

In [162]:
evi.to_csv(os.path.join(out_dir, "evi.csv"))

## NDSI

In [13]:
ndsi_dir = os.path.join(data_dir, 'ndsi')
res = [process_collection(lc_id, ndsi_dir) for lc_id in lc_ids]

In [19]:
ndsi = pd.concat(res, axis=0)
ndsi.to_csv(os.path.join(out_dir, "ndsi.csv"))

## NDWI

In [20]:
ndwi_dir = os.path.join(data_dir, 'ndwi')
res = [process_collection(lc_id, ndwi_dir) for lc_id in lc_ids]

In [21]:
ndwi = pd.concat(res, axis=0)
ndwi.to_csv(os.path.join(out_dir, "ndwi.csv"))

## Chirps

In [18]:
chirps_dir = os.path.join(data_dir, "chirps")

In [19]:
rain_all = pd.read_csv(os.path.join(chirps_dir, "chirps.csv"))
rain_sum = pd.read_csv(os.path.join(chirps_dir, "chirps_sum.csv"))

In [20]:
def rename_func(col):
    if 'precipitation' in col:
        new_name = col+"_sum"
        return new_name
    else:
        return col

In [21]:
rain_sum.rename(rename_func, axis=1, inplace=True)

In [23]:
rain_all = rain_all.join(rain_sum.loc[:,rain_sum.columns[['precipitation' in col for col in rain_sum.columns]]], how='outer')

In [24]:
df = rain_all.copy()

In [25]:
df.drop(columns=".geo", inplace=True)

In [26]:
var_name = 'precipitation'

In [27]:
def rename_func(col):
    if var_name in col:
        p = col.split("_")
        new_name = p[1]+"_"+p[2]+"_"+p[3]+"__"+p[0]
        return new_name
    else:
        return col

In [28]:
df.rename(rename_func, axis=1, inplace=True)
df.rename(str.lower, axis=1, inplace=True)

In [29]:
temp_stats

['mean', 'max', 'min', 'stddev']

In [30]:
var = "precipitation_sum"

In [31]:
stats = temp_stats + ['sum']

In [32]:
stubs = []
for temp_stat in stats: 
    var2 = "_".join([var, temp_stat])
    stubs.append(var2)

In [34]:
df_re = pd.wide_to_long(
    df,
    stubnames = stubs,
    i = 'wb_adm0_na',
    j = 'year',
    sep = "__"
)

In [45]:
df_re.to_csv(os.path.join(out_dir, "chirps.csv"))

## Lights

In [36]:
def process_data(files_dir, var_name):
    files = glob.glob(files_dir+f"/*")
    l_processed = [process_file(file, var_name) for file in files]
    l_all = pd.concat(l_processed, axis=1)
    l_all = l_all.loc[:, ~l_all.columns.duplicated()].copy()
    return l_all

In [37]:
def process_file(file_path, var_name):
    
    df = pd.read_csv(file_path, index_col=0)
#     df.reset_index(drop=True, inplace=True)
    df.drop(columns=".geo", inplace=True)
    df.loc[:, "n_null"] = df.apply(lambda x: x.isna().sum(), axis=1)
#     df = df.loc[df.n_null<df.n_null.max()]
    
    file = os.path.basename(file_path)
    source, temp_stat = file.replace(".csv", "").split("_")
#     var_name = "stable_lights"
    var = "_".join([var_name, temp_stat])
    stubs = ["_".join([var, a]) for a in temp_stats]
    
    def rename_func(col):
        if var_name in col:
            p = col.split("_")
            new_name = 'lights'+"_"+p[3]+"_"+p[4]+"__"+p[0]
            return new_name
        else:
            return col
    
    df.rename(rename_func, axis=1, inplace=True)
    df.rename(str.lower, axis=1, inplace=True)
    
    df_re = pd.wide_to_long(
        df,
        stubnames = stubs,
        i = 'wb_adm0_na',
        j = 'year',
        sep = "__"
    )
    
    return df_re

In [38]:
dmps_dir = os.path.join(data_dir, "dmps")

In [39]:
dmps = process_data(dmps_dir, "stable_lights")

In [133]:
dmps.to_csv(os.path.join(out_dir, "dmps.csv"))

In [18]:
viirs_dir = os.path.join(data_dir, "viirs")

In [22]:
viirs = process_data(viirs_dir, 'stable_lights')

In [24]:
viirs.to_csv(os.path.join(out_dir, "viirs.csv"))

## LST

In [8]:
lst_dir = os.path.join(data_dir, "lst")

In [11]:
lst = process_data(lst_dir)

In [16]:
lst.to_csv(os.path.join(out_dir, "temperature.csv"))

## Cropland

In [152]:
df = pd.read_csv(os.path.join(data_dir, 'cropland', 'cropland.csv'))

In [154]:
df.drop(columns=".geo", inplace=True)
df.rename(str.lower, axis=1, inplace=True)
df_re = pd.wide_to_long(
    df,
    stubnames = ['cropland'],
    i = 'wb_adm0_na',
    j = 'year',
    sep = "_"
)

In [156]:
df_re.to_csv(os.path.join(out_dir, "cropland.csv"))

## Impervious

In [157]:
df = pd.read_csv(os.path.join(data_dir, 'impervious', 'impervious.csv'))

In [160]:
df.drop(columns=".geo", inplace=True)
df.rename(str.lower, axis=1, inplace=True)
df_re = pd.wide_to_long(
    df,
    stubnames = ['imperv'],
    i = 'wb_adm0_na',
    j = 'year',
    sep = "_"
)

In [162]:
df_re.to_csv(os.path.join(out_dir, "impervious.csv"))