This second notebook takes in each output from the zonal stats tasks, and reshapes each dataset into long format.

In [1]:
import sys

sys.path.append("../../src")  # relative path where the library is stored
# alternatively sys.path.append('C/Users/wb514197/Repos/GEE_Zonal/src')

In [2]:
import pandas as pd
import os
import glob

In [3]:
def process_file_landsat(file_path):
    df = pd.read_csv(file_path, index_col=0)
    #     df.reset_index(drop=True, inplace=True)
    df.drop(columns=".geo", inplace=True)
    #     df.loc[:, "n_null"] = df.apply(lambda x: x.isna().sum(), axis=1)

    file = os.path.basename(file_path)
    lc_id, var, temp_stat = file.replace(".csv", "").split("_")
    var_name = var.upper()
    var = "_".join([var, temp_stat])
    stubs = ["_".join([var, a]) for a in temp_stats]

    def rename_func(col):
        if var_name in col:
            p = col.split("_")
            new_name = p[1] + "_" + p[2] + "_" + p[3] + "__" + p[0]
            return new_name
        else:
            return col

    df.rename(rename_func, axis=1, inplace=True)
    df.rename(str.lower, axis=1, inplace=True)

    df_re = pd.wide_to_long(df, stubnames=stubs, i="wb_adm0_na", j="year", sep="__")

    return df_re

In [10]:
def process_collection(lc_id, files_dir):
    files = glob.glob(files_dir + f"/{lc_id}*")
    l_processed = [process_file_landsat(file) for file in files]
    l_all = pd.concat(l_processed, axis=1)
    l_all = l_all.loc[:, ~l_all.columns.duplicated()].copy()
    l_all.loc[:, "landsat_id"] = lc_id
    return l_all

## NDVI

In [11]:
temp_stats = ["mean", "max", "min", "stddev"]

In [12]:
data_dir = os.path.join("X:/data/ee")
ndvi_dir = os.path.join(data_dir, "ndvi")

In [13]:
lc_ids = ["LT05", "LE07", "LC08"]
# lc_ids = ["LE07","LC08"]

In [14]:
res = [process_collection(lc_id, ndvi_dir) for lc_id in lc_ids]

In [15]:
ndvi = pd.concat(res, axis=0)

In [17]:
out_dir = os.path.join(data_dir, "output")
if not os.path.exists(out_dir):
    os.mkdir(out_dir)

In [18]:
# ndvi.to_csv(os.path.join(out_dir, "ndvi.csv"))

## EVI

In [19]:
evi_dir = os.path.join(data_dir, "evi")

In [20]:
res = [process_collection(lc_id, evi_dir) for lc_id in lc_ids]

In [21]:
evi = pd.concat(res, axis=0)

In [140]:
# evi.to_csv(os.path.join(out_dir, "evi.csv"))

## NDSI

In [22]:
ndsi_dir = os.path.join(data_dir, "ndsi")
res = [process_collection(lc_id, ndsi_dir) for lc_id in lc_ids]

In [23]:
ndsi = pd.concat(res, axis=0)
# ndsi.to_csv(os.path.join(out_dir, "ndsi.csv"))

## NDWI

In [24]:
ndwi_dir = os.path.join(data_dir, "ndwi")
res = [process_collection(lc_id, ndwi_dir) for lc_id in lc_ids]

In [25]:
ndwi = pd.concat(res, axis=0)
# ndwi.to_csv(os.path.join(out_dir, "ndwi.csv"))

## Chirps

In [27]:
chirps_dir = os.path.join(data_dir, "chirps")

In [28]:
rain_all = pd.read_csv(os.path.join(chirps_dir, "chirps.csv"))
rain_sum = pd.read_csv(os.path.join(chirps_dir, "chirps_sum.csv"))

In [30]:
def rename_func(col):
    if "precipitation" in col:
        new_name = col + "_sum"
        return new_name
    else:
        return col

In [31]:
rain_sum.rename(rename_func, axis=1, inplace=True)

In [32]:
rain_all = rain_all.join(
    rain_sum.loc[
        :, rain_sum.columns[["precipitation" in col for col in rain_sum.columns]]
    ],
    how="outer",
)

In [33]:
df = rain_all.copy()
df.drop(columns=".geo", inplace=True)
var_name = "precipitation"


def rename_func(col):
    if var_name in col:
        p = col.split("_")
        new_name = p[1] + "_" + p[2] + "_" + p[3] + "__" + p[0]
        return new_name
    else:
        return col


df.rename(rename_func, axis=1, inplace=True)
df.rename(str.lower, axis=1, inplace=True)
var = "precipitation_sum"
stats = temp_stats + ["sum"]
stubs = ["_".join([var, a]) for a in stats]
df_re = pd.wide_to_long(df, stubnames=stubs, i="wb_adm0_na", j="year", sep="__")

In [169]:
# df_re.to_csv(os.path.join(out_dir, "chirps.csv"))

## Lights

In [41]:
def process_data(files_dir, var_name):
    files = glob.glob(files_dir + "/*")
    l_processed = [process_file(file, var_name) for file in files]
    l_all = pd.concat(l_processed, axis=1)
    l_all = l_all.loc[:, ~l_all.columns.duplicated()].copy()
    return l_all

In [83]:
def process_file(file_path, var_name):
    df = pd.read_csv(file_path, index_col=0)
    df.drop(columns=".geo", inplace=True)

    file = os.path.basename(file_path)
    source, temp_stat = file.replace(".csv", "").split("_")
    var = "_".join([var_name, temp_stat])
    stubs = ["_".join([var, a]) for a in temp_stats]

    def rename_func(col):
        if var_name in col or "avg_rad" in col:
            p = col.split("_")
            if var_name == "lights":
                new_name = "lights" + "_" + p[3] + "_" + p[4] + "__" + p[0]
            elif var_name == "temperature":
                new_name = "temperature" + "_" + p[3] + "_" + p[4] + "__" + p[0]
            return new_name
        else:
            return col

    df.rename(rename_func, axis=1, inplace=True)
    df.rename(str.lower, axis=1, inplace=True)

    df_re = pd.wide_to_long(df, stubnames=stubs, i="wb_adm0_na", j="year", sep="__")

    return df_re

In [84]:
dmps_dir = os.path.join(data_dir, "dmps")

In [85]:
dmps = process_data(dmps_dir, "lights")

In [86]:
# dmps.to_csv(os.path.join(out_dir, "dmps.csv"))

In [87]:
viirs_dir = os.path.join(data_dir, "viirs")

In [89]:
viirs = process_data(viirs_dir, "lights")

In [91]:
# viirs.to_csv(os.path.join(out_dir, "viirs.csv"))

## LST

In [94]:
lst_dir = os.path.join(data_dir, "lst")

In [95]:
lst = process_data(lst_dir, "temperature")

In [16]:
# lst.to_csv(os.path.join(out_dir, "temperature.csv"))

## Cropland

In [152]:
df = pd.read_csv(os.path.join(data_dir, "cropland", "cropland.csv"))

In [154]:
df.drop(columns=".geo", inplace=True)
df.rename(str.lower, axis=1, inplace=True)
df_re = pd.wide_to_long(df, stubnames=["cropland"], i="wb_adm0_na", j="year", sep="_")

In [156]:
df_re.to_csv(os.path.join(out_dir, "cropland.csv"))

## Impervious

In [157]:
df = pd.read_csv(os.path.join(data_dir, "impervious", "impervious.csv"))

In [160]:
df.drop(columns=".geo", inplace=True)
df.rename(str.lower, axis=1, inplace=True)
df_re = pd.wide_to_long(df, stubnames=["imperv"], i="wb_adm0_na", j="year", sep="_")

In [162]:
df_re.to_csv(os.path.join(out_dir, "impervious.csv"))