In [None]:
import pandas as pd
#import geopandas as gpd
import rasterio as rio
from rasterio.plot import show
from rasterio.warp import transform
from rasterio.transform import (xy, rowcol)
from rasterio.windows import Window
import itertools as it
import rioxarray as rx
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import time
import timeit


In [None]:
xs = np.array([3327493.433, 16177493.43])
ys = np.array([7389201.61, -580798.392])

with rio.open("data/TCL_DD_2022_20230407.tif") as src:

    pt = src.xy(0, 0) # latitude is N/S AKA y, 

    print(f'Number of bands: {src.count}')
    print(f'Image resolution: ({src.height}, {src.width})')
    print(f'Coordinate Reference System: ({src.crs}')

    fig, ax = plt.subplots()

    extent = [src.bounds[0], src.bounds[2], src.bounds[1], src.bounds[3]]
    ax = show(src, extent=extent, ax=ax, cmap="pink")

    # https://epsg.io/4326
    # WGS 84 -- WGS84 - World Geodetic System 1984, used in GPS

    # 18.18517 55.56466
    # 29.766234, 69.421527 (latitude, longitude) pakistan
    # -15.28575, -55.94627 (latitude, longitude) brazil
    lat = -15.28575
    long = -55.94627

    to_crs = src.crs
    from_crs = rio.crs.CRS.from_epsg(4326)
    x, y = transform(from_crs, to_crs, [long], [lat])

    # expect 1 - 5... but mention is made of Zero or Minor Loss
    # 1. CommodityDriven.Deforestation
    # 2. Shifting.Agriculture
    # 3. TreeFarm.ForestryOther
    # 4. Wildfire
    # 5. Urban

    for val in src.sample([(x, y)]): 
        print(val)
    

In [None]:
xr.set_options(display_expand_attrs=False, display_expand_data=False)
ds = xr.tutorial.load_dataset("air_temperature")



In [None]:
print(ds)
ds.air.plot()

In [None]:
# rasterio.features.dataset_features # works on raster value rather than mask...
# data.attrs["units"] = "metres/sec"
# rioxarray combines xarray and rasterio similar to how geopandas combines functionality from pandas and fiona
# and if you want to connect between geopandas and xarray, you can use geocube...

lat = -22.00027
long = -58.99658

# we can convert to and from pixel data with xy and rowcol respectively
# nearest neighbout in raster space would be the shortest distance in pixel space
# for certain values
# convert query from GPS to rowcol
# mask values not of interest... find rowcol of closest
# convert from rowcol to GPS

# shapely.ops.nearest_points(geom1, geom2)
# but we use rowcol values that can then be converted back to xy AKA GPS
# this way we would have 40000 x 40000 Point rather than pixel values
# distance calculation on projection may be distorted...
# use ‘haversine’ distance



with rx.open_rasterio('data/Hansen_GFC-2022-v1.10_lossyear_20S_060W.tif').squeeze() as xda:
    
    #print(xda)
    
    #xda.rio.write_crs(3347, inplace=True)
    #print(xda.spatial_ref)

    #xda.rio.set_spatial_dims("lon", "lat", inplace=True)

    # band is lossyear
    # x is long
    # y is lat

    to_crs = xda.rio.crs
    from_crs = rio.crs.CRS.from_epsg(4326)
    x, y = transform(from_crs, to_crs, [long], [lat])

    # If you use "projected coordinate system", no problem. The distance that 
    # you get is the distance on the map (not on the spherical earth). When 
    # using "geographic coordinate system - GCS", the distance that you get 
    # will be the shortest distance in 3D space.

    # This uses the ‘haversine’ formula to calculate the great-circle distance
    # between two points – that is, the shortest distance over the earth’s surface
    #  – giving an ‘as-the-crow-flies’ distance between the points (ignoring any 
    # hills they fly over, of course!).

    #xarray_distance.data = np.sqrt((lat - spec_lat)[:,None]**2 + ((lon - spec_lon)**2)
    
    # get value from grid
    value = xda.sel(x=x[0], y=y[0], method="nearest")
    # ... is unspecified or infinite number of arguments!
    print(value)

    # An empty (tuple) index is a full scalar index into a zero dimensional array. x[()] returns a scalar
    # if x is zero dimensional and a view otherwise. On the other hand x[...] always returns a view.

    # When an ellipsis (...) is present but has no size (i.e. replaces zero :) the 
    # result will still always be an array. A view if no advanced index is present, otherwise a copy.

    #value.values[()]





In [None]:
# We want to grab treecover2000 per asset/location, and pixels per lossyear
# The result is a simple DataFrame as we do not use shapely geometry...

lat = -22.00027
long = -58.99658
offset = 16 # 16 * 2 + 1 == 33, odd number allows for single pixel centre, sides are close to 1km

# The term "kernel" in the context of convolutional neural networks (CNNs) comes from image processing 
# and mathematics, specifically from the field of signal processing. In image processing, a kernel is a 
# small matrix used for blurring, sharpening, edge detection, or other image processing operations.
# kernel -- a small matrix of weights --

# data/Hansen_GFC-2022-v1.10_treecover2000_10S_050W.tif 
# data/Hansen_GFC-2022-v1.10_treecover2000_10S_060W.tif 
# data/Hansen_GFC-2022-v1.10_treecover2000_20S_050W.tif 
# data/Hansen_GFC-2022-v1.10_treecover2000_20S_060W.tif

# data/Hansen_GFC-2022-v1.10_lossyear_10S_050W.tif 
# data/Hansen_GFC-2022-v1.10_lossyear_10S_060W.tif 
# data/Hansen_GFC-2022-v1.10_lossyear_20S_050W.tif 
# data/Hansen_GFC-2022-v1.10_lossyear_20S_060W.tif

assets = pd.read_csv('data/assets_for_deforestation.csv', sep='\t')
assets.shape, assets.uid_gem.unique().size, assets.head()
# TODO: should not be needed...
assets.drop_duplicates('uid_gem', inplace=True)
assets = assets.set_index('uid_gem')
assets.shape, assets.index.unique().size, assets.head()



In [None]:


uid_gems = assets.index.unique()
lossyears = range(2001, 2023)

index = pd.MultiIndex.from_tuples(tuples=it.product(uid_gems, lossyears), names=('uid_gem', 'lossyear'))
xda = rx.open_rasterio('data/Hansen_GFC-2022-v1.10_lossyear_20S_060W.tif').squeeze()


In [None]:

#print(xda)

to_crs = xda.rio.crs
from_crs = rio.crs.CRS.from_epsg(4326)
xs, ys = transform(from_crs, to_crs, assets.longitude, assets.latitude)
#print(xda.x.values)
rows, cols = rowcol(xda.rio.transform(), xs, ys)
assets['row'] = rows
assets['col'] = cols
# we may have coordinates beyond the extent of the DataArray...
local_assets = assets[(assets.row >= 0) & (assets.col >= 0)].copy()
display(local_assets)
    
# rio accessors...
# isel_window(window: Window, pad: bool = False) → Dataset | DataArray[source]
# slice_xy(minx: float, miny: float, maxx: float, maxy: float) → Dataset | DataArray[
# xdsc = xds.rio.clip_box, also specifying another CRS

# rows, cols should be same shape and order as df...
#print(local_assets.row)
#print(local_assets.col)

def bar(r): # Optional[xarray.DataArray]
    da = xda.isel(x=slice(r.col-offset-1, r.col+offset), y=slice(r.row-offset-1, r.row+offset))
    if da.size > 0:
        return da
    else:
        return None

def myfunc(row, col, xdarray, size):
    s1 = slice(col-size-1, col+size)
    s2 = slice(row-size-1, row+size)
    foo = xdarray.isel(x=s1, y=s2)
    bar = np.empty([0,]) if foo.size == 0 else foo.data
    unique, counts = np.unique(bar, return_counts=True)
    years = unique + 2000
    result = dict(zip(years, counts))
    return result

#vfunc = np.vectorize(myfunc)

print(f'-> {myfunc(1000, 1000, xda, offset)}')
print(f'-> {type(myfunc(1000, 1000, xda, offset))}')

#display(local_assets)

np_row = local_assets.row.to_numpy()
np_col = local_assets.col.to_numpy()

print(type(np_row))
print(type(np_col))
print(np_row.shape)
print(np_col.shape)


In [None]:

#my_lambda = lambda r: xda.isel(x=slice(r[12]-offset-1, r[12]+offset), y=slice(r[11]-offset-1, r[11]+offset))
# important to specify 'otypes' to avoid 'only size-1 arrays can be converted to Python scalars'
# vectorize is just a for loop really so mostly syntactic sugar...
# excluded needs both positional and keyword members to be flexible... 
#otypes=[dict], 
#, excluded=['xda', 'offset']
#, otypes=[dict]
#del my_func
my_func = np.vectorize(myfunc, excluded=['xdarray','size'], cache=False)


In [None]:

# Nota bene: np.vectorize is consistently a little quicker than apply... %timeit 
result = my_func(row=np_row, col=np_col, xdarray=xda, size=offset)
#%timeit local_assets.apply(lambda r: myfunc(r.row,r.col,xda,offset), axis=1)

#display(result)
print(result.shape)
print(type(result.dtype))

poo = pd.Series(result)
display(poo[poo.notna()])


In [None]:

local_assets['region'] = pd.Series(result, index=local_assets.index)
columns = local_assets.columns.drop('region')
print(columns)
# expand to columns i.e. to wide format... indexed by uid_gem
#display(local_assets[local_assets.index == 'L800190'])
#display(local_assets[local_assets.region.notna()])
#display(local_assets.tail())
#bar = local_assets['region'].apply(pd.Series)
#display(bar.tail())
local_assets_lossyears = pd.concat([local_assets['region'].apply(pd.Series)], axis=1)
# remove nodata before we shift to long format
# TODO: where does the colum 0 come from?
#local_assets.drop(['region'], inplace=True, axis=1)
display(local_assets_lossyears.tail())
#local_assets_lossyears.drop([2000, 0], inplace=True, axis=1)
local_assets_lossyears.shape, local_assets_lossyears.columns
#local_assets_lossyears[local_assets_lossyears[2021].notna()].head(30)
#local_assets_lossyears[local_assets_lossyears.index == 'L800190']


In [None]:

# the id variables need to uniquely identify each row... 
# TODO: it does not like region as a dict
#foo = pd.wide_to_long(local_assets, stubnames=range(2001, 2023), i=columns, j='lossyear').reset_index()
# value_vars=range(2001, 2023) # takes all except id_vars when not given...
#display(columns)
#display(local_assets.columns)
years = local_assets_lossyears.columns
display(years)
display(local_assets_lossyears)
foo = pd.melt(local_assets_lossyears, value_vars=years, var_name='lossyear', value_name='count', ignore_index=False)
display(foo.tail())
#display(local_assets[local_assets.index == 'L800190'])
#display(foo[foo.index == 'L800190'])


In [None]:
bar = foo.groupby(['uid_gem', 'lossyear']).first()
bar = bar.reindex(index)
bar.ffill(inplace=True)
#bar.loc['L800190', ]
bar.reset_index(inplace=True)
#display(bar[bar.uid_gem == 'L800190'])

# pivot on uid_gem and 
far = bar.pivot(index='uid_gem', columns='lossyear', values='count')
display(far.columns)
display(local_assets.columns)
# combine far with local_assets based on index...
local_assets_with_lossyears = pd.merge(local_assets, far, validate='one_to_one', left_on='uid_gem', right_on='uid_gem')

display(local_assets_with_lossyears)

local_assets_with_lossyears.to_csv('data/geotiff-sample.csv')

# Index contains duplicate entries, cannot reshape
#foo.pivot(index='uid_gem', columns='start_year_first')


#len(uid_gem), len(lossyears)


In [None]:
xda