In [None]:
import pandas as pd
import geopandas as gpd
from pyproj import CRS
from shapely.geometry import Polygon
from shapely.geometry import Point
import rasterio as rio
from rasterio import features
from rasterio.windows import Window
import matplotlib.pyplot as plt
import numpy as np
import pprint as pp
import itertools as it
from tqdm import tqdm
import time

from leaf.deforestation import (
    area,
    to_lossyear_timeseries,
    closest_index,
)


In [None]:
geo = gpd.read_file('data/Hansen_GFC-2022-v1.10_lossyear_20S_060W_0_0_5000_5000.gpkg')
asset = pd.read_csv('data/combined_asset_data.csv')

In [None]:
num_changes = geo.groupby('group').area.count().sort_values(ascending = False).reset_index().rename(columns = {'area':'num_changes'})
num_changes

In [None]:
# turn into "proper" panel data

# get number of unique IDs and loss years
group_ids = geo.group.unique()
group_ids
lossyears = range(2001, 2023)

index = pd.DataFrame(it.product(group_ids, lossyears), columns=['group', 'lossyear'])

# combine the datasets
geo_mid = pd.merge(index, geo, on=['lossyear', 'group'], how='outer').sort_values(['group', 'lossyear']).reset_index().drop(columns=['index'])
geo_ts = pd.merge(geo_mid, num_changes, on=['group'], how='outer')
geo_ts[geo_ts.group == 54153].head()

In [None]:
# replace all 2001 values with a 0 if missing
geo_ts.loc[(geo_ts['lossyear'] == 2001) & (geo_ts['area'].isna()), 'area'] = 0

#### front fill values ####
## AREA (simple front fill)

geo_ts.area.fillna(method = 'ffill', inplace = True)

## GEOMETRY (involved front fill)

# Step 1: assign empty geometry to first observation in group if is empty
geo_ts.loc[(geo_ts['lossyear'] == 2001) & (geo_ts['geometry'].isna()), 'geometry'] = Polygon([])

# Step 2: turn geoseries into strings (geoseries do not support front fill)
geo_ts['geo_string'] = geo_ts['geometry'].apply(lambda geom: geom.wkt if not pd.isnull(geom) else None)

# Step 3: front fill values (automatically by group, due to step 1)
geo_ts.geo_string.fillna(method = 'ffill', inplace = True)

# Step 4: turn imputed strings back to geoseries
geo_ts['geometry'] = gpd.GeoSeries.from_wkt(geo_ts.geo_string)

# Step 5: drop auxiliary column
geo_ts.drop(columns = ['geo_string'], inplace = True)

# check dataset
geo_ts.head(30)

In [None]:
# from deforestation.py

# cumulative area: 

temp3['cum_area'] = proj_3857.groupby(group_id).area.cumsum()

for i in tqdm(group_ids.to_numpy()):
    temp3.loc[i, 'cum_geometry'] = list(it.accumulate(temp3.loc[i, 'geometry'], func=lambda x,y: x.union(y)))


In [None]:
brazil = asset[asset['country'] == 'Brazil']
blip = brazil[(brazil['longitude'] > -60) & (brazil['longitude'] < -58.8) & (brazil['latitude'] > -21) & (brazil['latitude'] < -20)]

from typing import Tuple, Optional

def closest_(gdf: gpd.GeoDataFrame, lat: float, long: float, year: int, verbose: bool = False) -> Tuple[float, int]:

    # TODO: geopandas.sindex.SpatialIndex.nearest
    distances = gdf[gdf['lossyear']==year].distance(Point(lat, long))
    index = distances.idxmin()
    return (distances.iloc[index], index)

proj_3857 = geo.to_crs(epsg=3347) # lambert projection
#foo = closest_(proj_3857, -22, -55, 2020, False)
#print(foo)
#geo.iloc[foo[1]]
temp = brazil.apply(lambda x: closest_(proj_3857, x.latitude, x.longitude, 2020, False), axis=1)
# geo.iloc[temp[1]]

In [None]:
#print(blip.head())
#brazil[[mask]]#.first_valid_index()
geometry=[Point(xy) for xy in zip(brazil["longitude"], brazil["latitude"])]
geodata=gpd.GeoDataFrame(None,crs=geo.crs, geometry=geometry)
base = geo[geo.lossyear==2020].plot(column='group', legend=True)
geodata.plot(ax=base, marker='o', color='red', markersize=5);
base.set_xlim(-60, -50)
base.set_ylim(-22, -20)
