In [1]:
# %% Cell 1: Dynamisch inladen van transform_ned.py
import importlib.util
from pathlib import Path

# Vind project-root door omhoog te lopen tot we een folder met 'src' tegenkomen
NB_PATH = Path().resolve()
while not (NB_PATH / 'src').is_dir():
    NB_PATH = NB_PATH.parent
PROJECT_ROOT = NB_PATH
print("Project-root:", PROJECT_ROOT)

# Stel het pad naar je module op
module_path = PROJECT_ROOT / 'src' / 'data_processing' / 'transform_ned.py'
assert module_path.exists(), f"Kon {module_path} niet vinden!"

# Laad de module vanuit dat bestand
spec   = importlib.util.spec_from_file_location('transform_ned', str(module_path))
tn_mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(tn_mod)

# Haal de clean-functie eruit
clean_ned_obs = tn_mod.clean_ned_obs
print("✅ clean_ned_obs geladen uit:", module_path)

Project-root: /Users/redouan/ENEXIS
✅ clean_ned_obs geladen uit: /Users/redouan/ENEXIS/src/data_processing/transform_ned.py


In [3]:
# %% Cell 2: Maak een voorbeeld-DataFrame met raw_ned_obs-kolommen
import pandas as pd
from IPython.display import display

data = [
    {"capacity": "100", "volume": "200", "percentage": "50.5", "validfrom": "2022-01-01T00:00:00+00:00",
     "@id": "/v1/utilizations/1", "lastupdate": "2024-01-01T12:00:00+00:00"},
    {"capacity": "150", "volume": "300", "percentage": "75.0", "validfrom": "2022-01-02T01:00:00+00:00",
     "@id": "/v1/utilizations/2", "lastupdate": "2024-01-02T13:00:00+00:00"},
    {"capacity": None, "volume": "400", "percentage": "25.25", "validfrom": "2022-01-03T02:00:00+00:00",
     "@id": "/v1/utilizations/3", "lastupdate": "2024-01-03T14:00:00+00:00"}
]

raw_df = pd.DataFrame(data)
print("Voorbeeld raw_ned_obs:")
display(raw_df)
print("Kolomtypes raw_df:")
print(raw_df.dtypes)

Voorbeeld raw_ned_obs:


Unnamed: 0,capacity,volume,percentage,validfrom,@id,lastupdate
0,100.0,200,50.5,2022-01-01T00:00:00+00:00,/v1/utilizations/1,2024-01-01T12:00:00+00:00
1,150.0,300,75.0,2022-01-02T01:00:00+00:00,/v1/utilizations/2,2024-01-02T13:00:00+00:00
2,,400,25.25,2022-01-03T02:00:00+00:00,/v1/utilizations/3,2024-01-03T14:00:00+00:00


Kolomtypes raw_df:
capacity      object
volume        object
percentage    object
validfrom     object
@id           object
lastupdate    object
dtype: object


In [4]:
# %% Cell 3: Transform met clean_ned_obs en inspecteer
clean_df = clean_ned_obs(raw_df)
print("\nResultaat clean_ned_obs:")
display(clean_df)
print("Kolomtypes cleaned_df:")
print(clean_df.dtypes)


Resultaat clean_ned_obs:


Unnamed: 0,ned.capacity,ned.volume,ned.percentage,ned.validfrom
0,100.0,200,50.5,2022-01-01 00:00:00+00:00
1,150.0,300,75.0,2022-01-02 01:00:00+00:00
2,,400,25.25,2022-01-03 02:00:00+00:00


Kolomtypes cleaned_df:
ned.capacity                    Int64
ned.volume                      Int64
ned.percentage                float64
ned.validfrom     datetime64[ns, UTC]
dtype: object


In [5]:
# %% Cell 4: Automatische checks
assert all(col.startswith("ned.") for col in clean_df.columns), "Prefix ontbreekt!"
import pandas.api.types as ptypes
assert ptypes.is_integer_dtype(clean_df["ned.capacity"].dtype),   "capacity moet Int64 zijn"
assert ptypes.is_integer_dtype(clean_df["ned.volume"].dtype),     "volume moet Int64 zijn"
assert ptypes.is_float_dtype(clean_df["ned.percentage"].dtype),   "percentage moet float zijn"
assert ptypes.is_datetime64_any_dtype(clean_df["ned.validfrom"].dtype), "validfrom moet datetime UTC zijn"
print("\n✅ clean_ned_obs function werkt correct!")


✅ clean_ned_obs function werkt correct!
