In [8]:
from copy import deepcopy

import numpy as np

from src.common.log import logger
from src.dataset.emission_field_transforms import CenterCropTransform
from src.dataset.tno_dataset import TnoDataset
from src.dataset.tno_dataset_collection import TnoDatasetCollection
from src.models.common.metrics import mse
from src.models.vae.vae import VariationalAutoEncoder

In [9]:
dataset_collection = TnoDatasetCollection()

[INFO] > Loading TNO data from '/home/must1d/Repositories/inventory-embeddings/data/TNO-GHGco-1km/TNO_by_city/TNO_GHGco_2015_highres_by_city.csv'
[INFO] > Loading TNO data from '/home/must1d/Repositories/inventory-embeddings/data/TNO-GHGco-1km/TNO_by_city/TNO_GHGco_2018_highres_by_city.csv'
[INFO] > Test Set has 13 unique cites!                          
	Reims, Hamburg, Duisburg, Gdynia, Magdeburg, Szczecin, Győr, Prague, Aachen, Dresden, Innsbruck, Großzschocher, Siegen
[INFO] > Validation Set has 15 unique cites!
	Ingolstadt, Linz, Kalisz, Koblenz, Bydgoszcz, Angers, Kassel, Nijmegen, Southampton, Brussels, Wałbrzych, Bielefeld, Frankfurt am Main, Erfurt, Zielona Góra
[INFO] > Training Set has 74 unique cites!
	Bremerhaven, Pilsen, Rouen, Wrocław, Metz, Amsterdam, Le Havre, Saint Peters, Augsburg, London, Brighton, Antwerpen, Groningen, Chemnitz, Mannheim, Orléans, Basel, Braunschweig, Köln, Legnica, Milton Keynes, Ulm, Trier, Nancy, Regensburg, Besançon, Freiburg, Leicester, Readin

# Mean of data

In [10]:
def _compute_mean(tno_dataset: TnoDataset) -> float:
    m = np.zeros(len(tno_dataset.city_emission_fields))
    for i, c in enumerate(tno_dataset.city_emission_fields):
        m[i] = c.co2_ff_field.mean()
    return float(m.mean())

In [11]:
logger.info(f"Mean of training data:\t\t{_compute_mean(dataset_collection.training_data)}")
logger.info(f"Mean of validation data:\t{_compute_mean(dataset_collection.validation_data)}")
logger.info(f"Mean of test data:\t\t{_compute_mean(dataset_collection.test_data)}")

[INFO] > Mean of training data:		0.045735363120123676
[INFO] > Mean of validation data:	0.04517446150369493
[INFO] > Mean of test data:		0.04488871208793886


# Reconstruction MSE per City

In [12]:
MODEL_NAME = "2048"

vae = VariationalAutoEncoder.load(MODEL_NAME)

In [13]:
crop_transform = CenterCropTransform(
    TnoDatasetCollection.CROPPED_WIDTH,
    TnoDatasetCollection.CROPPED_HEIGHT
)

def _compute_mse_per_city(tno_dataset: TnoDataset) -> dict[str, list[float]]:
    resulting_dict: dict[str, list[float]] = {}
    for c in tno_dataset.city_emission_fields:
        current_mse = resulting_dict.get(c.city_name, [])
        transformed_city = crop_transform(deepcopy(c))
        x = transformed_city.co2_ff_tensor
        x_hat = vae.reconstruct(x)
        current_mse.append(float(mse(x, x_hat)))
        resulting_dict[c.city_name] = current_mse

    return resulting_dict

In [14]:
datasets = {
    "Test Data": dataset_collection.test_data,
    "Validation Data": dataset_collection.validation_data,
    "Training Data": dataset_collection.training_data,
}

for name, data in datasets.items():
    logger.info(f"-------- {name} -------")
    result = _compute_mse_per_city(data)
    for city, mse_values in result.items():
        logger.info(f"\t{city}: {', '.join([str(v) for v in mse_values])}")
    means = []
    for value in result.values():
        means += value
    logger.info(f"Mean: {float(np.array(means).mean())}")

[INFO] > -------- Test Data -------
[INFO] > 	Aachen: 0.03450087830424309, 0.0366925485432148
[INFO] > 	Dresden: 0.027638111263513565, 0.03563503921031952
[INFO] > 	Duisburg: 0.07061716169118881, 0.08480164408683777
[INFO] > 	Gdynia: 0.023331526666879654, 0.026205124333500862
[INFO] > 	Großzschocher: 0.0322684571146965, 0.03886016830801964
[INFO] > 	Győr: 0.005651071202009916, 0.00368107738904655
[INFO] > 	Hamburg: 0.06658349186182022, 0.08469772338867188
[INFO] > 	Innsbruck: 0.016359740868210793, 0.02484234981238842
[INFO] > 	Magdeburg: 0.020373499020934105, 0.027417179197072983
[INFO] > 	Prague: 0.03272707015275955, 0.03778918460011482
[INFO] > 	Reims: 0.01786777563393116, 0.013396251946687698
[INFO] > 	Siegen: 0.020703358575701714, 0.023225633427500725
[INFO] > 	Szczecin: 0.021198328584432602, 0.021334443241357803
[INFO] > Mean: 0.032630724554809816
[INFO] > -------- Validation Data -------
[INFO] > 	Angers: 0.021621691063046455, 0.01821845769882202
[INFO] > 	Bielefeld: 0.0289408639