In [None]:
# Correlation indicator of construction date with total energy usage
# Percentage energy usage versus produced

import pandas as pd
import numpy as np
import random
from smart import Smart

real_energy_usages = pd.read_csv("../EUR/Integrated/energy_usages.csv")

smart = Smart()
print(real_energy_usages["building_name"].value_counts())
# map IDs to building names, to do:use real mappings
id_to_name = {
    "0599100000309497" : "Polak",
    "0599100000319364" : "Sanders en PG",
    "0599100000332174" : "Bayle",
    "0599100000332175" : "Mandeville",
    "0599100000332176" : "Van der Goot"
}

In [None]:
def simulateEnergyUsage(period_type="year", period_start=1922, period_duration=100, building_energy_mu=None):

    building_energy_increase_constant = 1 / period_duration

    if building_energy_mu == None:
        building_energy_mu = random.randint(5, 30)
        building_energy_sigma = 2
        building_energy_increase_constant *= 30
    else:
        building_energy_sigma = building_energy_mu / 30
        building_energy_increase_constant *= building_energy_mu / 15

    usage = list(map(lambda x: {
        period_type: x,
        "energy": np.random.normal(building_energy_mu + (x - period_start) * building_energy_increase_constant, building_energy_sigma)
        }, range(period_start, period_start + period_duration)))
    return usage


In [None]:
data_type = "combined"
# sum_col = "elec_consumed(kWh)"
sum_col = "frac_produced_consumed"
# real_data = smart.monthlySums(2020, data_type=data_type, sum_col=sum_col)
# real_data = smart.monthlyCombinedSums(2020)
real_data = smart.monthlySums(2020, data_type="energy", sum_col="kWh")

pd.set_option('display.max_rows', None)  # or 1000
print(real_data)

In [None]:
from dataclasses import dataclass
import json


# Opening JSON file
# f = open('../objects/BAG_WFS_build_4326.geojson',)
f = open('../objects/fakeBuildingNamesData_4326.geojson',)
data = json.load(f)
# Closing file
f.close()

period_type = "month"
period_start = 1
period_duration = 12

first = data['features'][0]


# real_data = smart.monthlySums(2020, data_type=data_type, sum_col=sum_col)
# real_average = real_data["kWh"].mean()
# print()

fakes = []
for building in data["features"]:
	fake = {}
	fake["id"] = building["properties"]["pandID"]
	# fake["year"] = building["properties"]["bouwjaar"]
	fake["year"] = building["properties"]["year"]
	fake["name"] = building["properties"]["buildingName"]
	fake["geometry"] = building["geometry"]

	available_buildings = real_data["building_name"].unique().tolist()
	# print(available_buildings)
	usage = []
	if fake["name"] in available_buildings:
		# current_building = id_to_name[fake["id"]]
		current_building = fake["name"]
		building_energy = real_data[real_data["building_name"]==current_building]
		# if current_building == "Van der G":
		for index, row in building_energy.iterrows():
			print(row[period_type])
			usage.append({
				period_type : row[period_type],
				"energy" : row[sum_col]
			})
		# print(id_to_name[fake["id"]])
	else:
		usage = simulateEnergyUsage(period_type=period_type, period_start=period_start, period_duration=period_duration, building_energy_mu=0)#building_energy_mu=real_average)


	fake["usage"] = usage
	fakes.append(fake)

# Normalization of data
energies = list(map(lambda x: list(map(lambda usage: usage["energy"], x["usage"])), fakes))
energies = [e for es in energies for e in es]
max_energy = max(energies)
min_energy = min(energies)

construction_years = list(map(lambda fake: fake["year"], fakes))
max_year = max(construction_years)
min_year = min(construction_years)
print(min_year, max_year)

for fake in fakes:
	fake["year"] = (fake["year"] - min_year) / max_year
	for x in fake["usage"]:
		old_energy = x["energy"]
		x["energy"] = (x["energy"] - min_energy) / max_energy
print(min_energy, max_energy)
# print(fakes[4])

# Consider changing format two one longer array instead of different dictionaries

In [None]:
with open(f"../objects/sensitive/fakeAndReal{data_type.capitalize()}Data.json", "w") as f:
    json.dump(fakes, f)