In [None]:
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression

# Open PGD datasets

In [None]:
ds_list = [
    xr.open_dataset(
        "BE40a_l_noTEB_PGD.fa.nc", engine="netcdf4", chunks="auto"
    ).assign_coords({"type": "noTEB"}),
    xr.open_dataset("BE40a_l_PGD.fa.nc", engine="netcdf4", chunks="auto").assign_coords(
        {"type": "original"}
    ),
    xr.open_dataset(
        "BE40a_l_no_urban_PGD.fa.nc", engine="netcdf4", chunks="auto"
    ).assign_coords({"type": "no_urban"}),
]

ds = xr.concat(ds_list, dim="type")
ds

## Calculate covers/tiles dataset

In [None]:
ds_filled = ds.fillna(0)
da_list = list()

for cover_number in range(1, 574):
    varname = f"SFX.COVER{cover_number:03d}____"
    if varname in ds_filled:
        da = ds_filled[varname]
        da_list.append(da.rename("COVER").assign_coords({"cover": cover_number}))

da = xr.concat(da_list, dim="cover")
ds_covers = da.to_dataset()
ds_covers

In [None]:
ds_filled = ds.fillna(0)
da_list = list()

varname = "SFX.FRAC_SEA____"
da = ds_filled[varname]
da_list.append(da.rename("TILE").assign_coords({"tile": "sea"}))

varname = "SFX.FRAC_WATER__"
da = ds_filled[varname]
da_list.append(da.rename("TILE").assign_coords({"tile": "water"}))

varname = "SFX.FRAC_TOWN___"
da = ds_filled[varname]
da_list.append(da.rename("TILE").assign_coords({"tile": "town"}))

varname = "SFX.FRAC_NATURE_"
da = ds_filled[varname]
da_list.append(da.rename("TILE").assign_coords({"tile": "nature"}))

da = xr.concat(da_list, dim="tile")
ds_tiles = da.to_dataset()
ds_tiles

## Plot sum of covers and tiles

In [None]:
ds_covers_sum = ds_covers["COVER"].sum(dim="cover")
ds_covers_sum_masked = ds_covers_sum.where(ds_covers_sum < 1.0 - 1e-6)
ds_covers_sum_masked.plot(col="type", aspect=4 / 3, size=4.8, vmin=0, vmax=1)
plt.show()
ds_covers_sum_masked.min(dim=["x", "y"]).values

We have plotted the sum of all covers for the three types of PGDs. We only plotted the points were the sum was smaller than $1 - 10^{-6}$ . The minimum value of the sum is 0.16667 . 

In [None]:
ds_covers_sum.diff(dim="type").plot(col="type", aspect=4 / 3, size=4.8)
plt.show()

Next, we plotted all points were the sum of all covers differs between the types. The left plot compares original and noTEB, the right plot compares original and no_urban. The differences are contained to about $10^{-7}$. So, the sum of all covers are virtually identical across the types. The fact that the sum is smaller than 1 in some points is unrelated to the LPS tool.

In [None]:
ds_tiles_sum = ds_tiles["TILE"].sum(dim="tile")
ds_tiles_sum_masked = ds_tiles_sum.where(ds_tiles_sum < 1.0 - 1e-6)
ds_tiles_sum_masked.plot(col="type", aspect=4 / 3, size=4.8, vmin=0, vmax=1)
plt.show()
ds_tiles_sum_masked.min(dim=["x", "y"]).values

The sum of the tile fraction on the other hand is nicely one everywhere and for every type.

# Investigate town fraction
## Plot town fraction

In [None]:
ds_tiles["TILE"].sel(tile="town").plot(
    col="type", aspect=4 / 3, size=4.8, vmin=0, vmax=0.1
)
plt.show()

In what points is the town fraction higher than zero in no_urban but different than in original?

In [None]:
town_frac = ds["SFX.FRAC_TOWN___"]
mask1 = (town_frac.sel(type="original") - town_frac.sel(type="no_urban")) > 0
mask2 = town_frac.sel(type="no_urban") > 0
mask = mask1 & mask2
town_frac_masked = town_frac.where(mask)

town_frac_masked.plot(
    col="type", levels=np.arange(0, 0.11, 0.01), extend="max", aspect=4 / 3, size=4.8
)
plt.show()

We now calculate the difference in covers between the original and no_urban PGDs. The plotted difference is calculated as original value - no_urban. So negative values indicate covers that are reduced (removed) from the original file.

In [None]:
ds_filled = ds.fillna(0)
ds_diff = (
    ds_filled.sel(type=["original", "no_urban"])
    .diff(dim="type")
    .assign_coords(type=["difference"])
    .isel(type=0)
)

for cover_number in range(1, 574):
    varname = f"SFX.COVER{cover_number:03d}____"
    if varname in ds_diff:
        max_diff = np.max(np.abs(ds_diff[varname])).values
        if max_diff > 1e-9:
            # fig, ax = plt.subplots()
            # ds_diff[varname].plot(levels=[-1.001, -0.5, -0.2, -0.1, -0.05, 0.05, 0.1, 0.2, 0.5, 1.001])
            min_diff = ds_diff[varname].min().values
            max_diff = ds_diff[varname].max().values
            # ax.set_title(f"COVER {cover_number:03d}\nmin = {min_diff:.2f} ; max = {max_diff:.2f}")
            print(
                f"COVER {cover_number:03d}\nmin = {min_diff:.2f} ; max = {max_diff:.2f}"
            )

# FIND COVERS THAT ARE SUPPOSEDLY URBAN BUT NOT REALLY
First we plot the town fraction for all the points which have the same town fraction between original and no_urban.

In [None]:
town_frac = ds_tiles["TILE"].sel(tile="town")
town_frac_original = town_frac.sel(type="original")
town_frac_original_points = town_frac_original.stack(points=["x", "y"])

# Find points where town fraction is higher than zero, but equal between original and no_urban
town_frac_original = town_frac.sel(type="original")
town_frac_no_urban = town_frac.sel(type="no_urban")

mask1 = np.abs(town_frac_original - town_frac_no_urban) < 1e-6
mask2 = town_frac_original > 1e-6
mask = mask1 & mask2

town_frac_masked = town_frac.where(mask)
town_frac_masked.plot(
    col="type", aspect=4 / 3, size=4.8, vmin=0, vmax=0.1, extend="max"
)

## Calculate town coefficients for each cover
Next, we calculate the town coefficient for each cover with the following algorithm. First we look for all points where a cover is 100 %. With this value we can easily determine the cover-specific town tile fraction. Next, we loop over all points with only one cover with an unknown coefficient. Based on previously calculated coefficients, we can calculate the coefficient for this cover too. We continue until everything is calculated, or nothing can be calculated anymore.

In [None]:
ds_covers_sum = ds_covers["COVER"].sum(dim="cover")
mask = (ds_covers_sum > 1.0 - 1e-6).all(dim="type")
mask.plot()

ds_covers_original = ds_covers["COVER"].sel(type="original")
ds_covers_masked_original = ds_covers_original.where(mask)

town_frac_original = town_frac.sel(type="original")
town_frac_masked_original = town_frac_original.where(mask)

town_frac_masked_original_points = town_frac_masked_original.stack(points=["x", "y"])
town_frac_original_points = town_frac_original.stack(points=["x", "y"])

ds_covers_masked_original_points = ds_covers_masked_original.stack(points=["x", "y"])
ds_covers_original_points = ds_covers_original.stack(points=["x", "y"])

In [None]:
cond = (ds_covers_masked_original_points == 1).load()

ds_covers_masked_original_points_dropped = ds_covers_masked_original_points.dropna(
    dim="points"
).load()
town_frac_masked_original_points_dropped = town_frac_masked_original_points.dropna(
    dim="points"
).load()

covers_to_check = cond.any(dim="points")
da_coeff = xr.DataArray(
    data=np.ones(len(ds_covers_masked_original_points_dropped.cover)) * -1,
    dims="cover",
    coords={"cover": ds_covers_masked_original_points_dropped.cover},
)

for check_idx in range(len(covers_to_check)):
    check = covers_to_check[check_idx]
    cover = check.cover
    if check.values:
        X = ds_covers_masked_original_points_dropped.sel(cover=cover)
        y = town_frac_masked_original_points_dropped
        da_coeff[check_idx] = y.values[np.argmax(X.values)]
        print(f"Cover {cover:03d}")

In [None]:
covers_points = ds_covers_masked_original_points.dropna(dim="points").load()
town_frac_points = town_frac_masked_original_points.dropna(dim="points").load()

nonzero_covers_points = covers_points > 0

specified_covers = (
    da_coeff["cover"].where(da_coeff > -0.01).dropna(dim="cover").astype(int).values
)
specified_covers_mask = da_coeff > -0.01
non_specified_covers = (
    da_coeff["cover"].where(da_coeff < -0.01).dropna(dim="cover").astype(int).values
)
print(f"Number of unspecified covers = {len(non_specified_covers)}")

points_to_look_into = covers_points.points.where(
    (nonzero_covers_points & ~specified_covers_mask).sum(dim="cover") == 1
).dropna(dim="points")
print(f"Number of points to look into = {len(points_to_look_into)}")

while len(non_specified_covers) and len(points_to_look_into):
    for point in points_to_look_into[:1]:
        covers_point = covers_points.sel(points=point)
        nonzero_covers_point = nonzero_covers_points.sel(points=point)
        print(
            f"Cover fractions in point : {covers_point.where(nonzero_covers_point).dropna(dim='cover').values}"
        )
        print(
            f"Covers in point : {covers_point.where(nonzero_covers_point).dropna(dim='cover').cover.values}"
        )

        new_cover = int(
            covers_points.cover.where(
                nonzero_covers_point & ~specified_covers_mask
            ).dropna("cover")
        )
        print(f"New cover = {new_cover}")
        town_frac_sum = (da_coeff.where(da_coeff > -0.01) * covers_point).sum().values
        town_frac_point = town_frac_points.sel(points=point).values
        cover_frac = covers_point.sel(cover=new_cover).values
        new_coeff = (town_frac_point - town_frac_sum) / cover_frac
        if new_coeff < 0.001:
            new_coeff = 0
        print(f"New coefficient = {new_coeff}")
        da_coeff.loc[dict(cover=new_cover)] = new_coeff
        print("---------------------------------------------")

    specified_covers = (
        da_coeff["cover"].where(da_coeff > -0.01).dropna(dim="cover").astype(int).values
    )
    specified_covers_mask = da_coeff > -0.01
    non_specified_covers = (
        da_coeff["cover"].where(da_coeff < -0.01).dropna(dim="cover").astype(int).values
    )

    print(f"Number of unspecified covers = {len(non_specified_covers)}")
    points_to_look_into = covers_points.points.where(
        (nonzero_covers_points & ~specified_covers_mask).sum(dim="cover") == 1
    ).dropna(dim="points")
    print(f"Number of points to look into = {len(points_to_look_into)}")

We round the found coefficients to 2 digits (as ECOCLIMAP does). First, we check the difference with the rounded values and the raw values:

In [None]:
(da_coeff.round(2) - da_coeff).plot.hist()

We plot the coefficients

In [None]:
da_coeff.round(2).cover.values

In [None]:
da_coeff.round(2).plot.scatter(x="cover")

We compute the town fraction ourselves and compare with town fraction from PGD.

In [None]:
town_frac_myself = da_coeff.round(3) * ds_covers
(town_frac_myself.sum("cover") - town_frac).COVER.plot(
    col="type", aspect=4 / 3, size=4.8
)

## Linear regression

In [None]:
town_frac = ds_tiles["TILE"].sel(tile="town")
town_frac_original = town_frac.sel(type="original")
town_frac_original_points = town_frac_original.stack(points=["x", "y"])

# Find points where town fraction is higher than zero, but equal between original and no_urban
town_frac_original = town_frac.sel(type="original")
town_frac_no_urban = town_frac.sel(type="no_urban")

mask1 = np.abs(town_frac_original - town_frac_no_urban) < 1e-6
mask2 = town_frac_original > 1e-6
mask = mask1 & mask2
mask = 1

ds_covers_original = ds_covers["COVER"].sel(type="original")
ds_covers_masked_original = ds_covers_original.where(mask)

town_frac_original = town_frac.sel(type="original")
town_frac_masked_original = town_frac_original.where(mask)

town_frac_masked_original_points = town_frac_masked_original.stack(points=["x", "y"])
town_frac_original_points = town_frac_original.stack(points=["x", "y"])

ds_covers_masked_original_points = ds_covers_masked_original.stack(points=["x", "y"])
ds_covers_original_points = ds_covers_original.stack(points=["x", "y"])

In [None]:
y.shape

In [None]:
X = ds_covers_masked_original_points.dropna(dim="points").astype(float).values.T
y = town_frac_masked_original_points.dropna(dim="points").astype(float).values

model = LinearRegression(fit_intercept=False)
model.fit(X, y)

coefficients = model.coef_
plt.scatter(x=ds_covers.cover.values, y=coefficients)

In [None]:
(coefficients.round(3) == da_coeff.round(3)).all().values

Linear regression values and algorithm values are the same!

# Differences between noTEB and original

In [None]:
ds_covers_filled = ds_covers.fillna(0)
ds_diff = (
    ds_covers_filled.sel(type=["original", "noTEB"])
    .diff(dim="type")
    .assign_coords(type=["difference"])
    .isel(type=0)
    .COVER
)

In [None]:
counter = 0
for cover in ds_diff.cover:
    max_diff = np.max(np.abs(ds_diff.sel(cover=cover))).values
    if max_diff > 1e-9:
        # fig, ax = plt.subplots()
        # ds_diff[varname].plot(levels=[-1.001, -0.5, -0.2, -0.1, -0.05, 0.05, 0.1, 0.2, 0.5, 1.001])
        min_diff = ds_diff[cover].min().values
        max_diff = ds_diff[cover].max().values
        # ax.set_title(f"COVER {cover_number:03d}\nmin = {min_diff:.2f} ; max = {max_diff:.2f}")
        print(f"COVER {cover_number:03d}\nmin = {min_diff:.2f} ; max = {max_diff:.2f}")
    print(max_diff)
    counter += 1
print(counter)

Covers between original and noTEB are identical!

# Save to file

In [None]:
df = (da_coeff.round(2)).rename("Town_coefficients").to_dataframe()
df.to_csv("ECOCLIMAP_cover_town_coefficients.csv")