In [1]:
from copy import deepcopy

import numpy as np

from embeddings.common.log import logger
from embeddings.dataset.emission_field_transforms import CenterCropTransform
from embeddings.dataset.tno_dataset import TnoDataset
from embeddings.dataset.tno_dataset_collection import TnoDatasetCollection
from embeddings.models.common.metrics import mse
from embeddings.models.vae.vae import VariationalAutoEncoder


In [2]:
dataset_collection = TnoDatasetCollection()

[INFO] > Loading TNO data from '/home/must1d/Repositories/inventory-embeddings/data/TNO-GHGco-1km/TNO_by_city/TNO_GHGco_2015_highres_by_city.csv'
[INFO] > Loading TNO data from '/home/must1d/Repositories/inventory-embeddings/data/TNO-GHGco-1km/TNO_by_city/TNO_GHGco_2018_highres_by_city.csv'
[INFO] > Test Set has 13 unique cites!                          
	Gdynia, Siegen, Innsbruck, Duisburg, Großzschocher, Reims, Szczecin, Magdeburg, Dresden, Hamburg, Aachen, Prague, Győr
[INFO] > Validation Set has 15 unique cites!
	Linz, Wałbrzych, Bielefeld, Kassel, Erfurt, Ingolstadt, Koblenz, Nijmegen, Frankfurt am Main, Southampton, Brussels, Zielona Góra, Kalisz, Angers, Bydgoszcz
[INFO] > Training Set has 74 unique cites!
	Freiburg, Basel, Hannover, Orléans, Koszalin, Rotterdam, Brno, Bremerhaven, Dijon, Kingston upon Hull, Brugge, Lille, Amiens, Münster, Pilsen, Reading, Metz, Sheffield, Gorzów Wielkopolski, Antwerpen, Leicester, Groningen, Salzburg, Odense, Rouen, Le Havre, Besançon, Kiel, Wr

# Mean of data

In [3]:
def _compute_mean(tno_dataset: TnoDataset) -> float:
    means = np.zeros(len(tno_dataset.city_emission_fields))
    for i, city in enumerate(tno_dataset.city_emission_fields):
        means[i] = city.co2_ff_field.mean()
    return float(means.mean())

In [4]:
logger.info(f"Mean of training data:\t\t{_compute_mean(dataset_collection.training_data)}")
logger.info(f"Mean of validation data:\t{_compute_mean(dataset_collection.validation_data)}")
logger.info(f"Mean of test data:\t\t{_compute_mean(dataset_collection.test_data)}")


[INFO] > Mean of training data:		0.045735363120123676
[INFO] > Mean of validation data:	0.04517446150369493
[INFO] > Mean of test data:		0.04488871208793886


# Reconstruction MSE per City

In [5]:
MODEL_NAME = "2048"

vae = VariationalAutoEncoder.load(MODEL_NAME)

In [6]:
crop_transform = CenterCropTransform(
    TnoDatasetCollection.CROPPED_WIDTH,
    TnoDatasetCollection.CROPPED_HEIGHT
)

def _compute_mse_per_city(tno_dataset: TnoDataset) -> dict[str, list[float]]:
    resulting_dict: dict[str, list[float]] = {}
    for c in tno_dataset.city_emission_fields:
        current_mse = resulting_dict.get(c.city_name, [])
        transformed_city = crop_transform(deepcopy(c))
        x = transformed_city.co2_ff_tensor
        x_hat = vae.reconstruct(x)
        current_mse.append(float(mse(x, x_hat)))
        resulting_dict[c.city_name] = current_mse

    return resulting_dict

In [8]:
datasets = {
    "Test Data": dataset_collection.test_data,
    "Validation Data": dataset_collection.validation_data,
    "Training Data": dataset_collection.training_data,
}

for name, data in datasets.items():
    logger.info(f"-------- {name} -------")
    result = _compute_mse_per_city(data)
    for city, mse_values in result.items():
        logger.info(f"\t{city}: {', '.join([str(v) for v in mse_values])}")
    means = []
    for value in result.values():
        means += value
    logger.info(f"Mean: {float(np.array(means).mean())}")

[INFO] > -------- Test Data -------
[INFO] > 	Aachen: 0.03240668401122093, 0.03709529712796211
[INFO] > 	Dresden: 0.027762269601225853, 0.03455411270260811
[INFO] > 	Duisburg: 0.06913617253303528, 0.08489953726530075
[INFO] > 	Gdynia: 0.025606004521250725, 0.02409745194017887
[INFO] > 	Großzschocher: 0.032614342868328094, 0.042206522077322006
[INFO] > 	Győr: 0.0059650493785738945, 0.0038708068896085024
[INFO] > 	Hamburg: 0.07413586974143982, 0.08771705627441406
[INFO] > 	Innsbruck: 0.014841662719845772, 0.019731558859348297
[INFO] > 	Magdeburg: 0.0206943079829216, 0.02799762226641178
[INFO] > 	Prague: 0.02880382165312767, 0.03665880858898163
[INFO] > 	Reims: 0.016303617507219315, 0.01250146422535181
[INFO] > 	Siegen: 0.021285777911543846, 0.027654340490698814
[INFO] > 	Szczecin: 0.02171463519334793, 0.02124669775366783
[INFO] > Mean: 0.03275005738788213
[INFO] > -------- Validation Data -------
[INFO] > 	Angers: 0.020146142691373825, 0.020119627937674522
[INFO] > 	Bielefeld: 0.02838194