In [1]:
# cell 1: imports, project root en module-path instellen
import os, sys
from datetime import datetime, date, timezone
import pandas as pd
# Pas dit pad aan naar de root van jouw ENEXIS-repo:
PROJECT_ROOT = '/Users/redouan/ENEXIS'
print("📁 PROJECT_ROOT is:", PROJECT_ROOT)

# Voeg jouw src-folder toe zodat je NED.py kunt importeren
SRC_FOLDER = os.path.join(PROJECT_ROOT, 'src')
sys.path.insert(0, SRC_FOLDER)

# Nu kun je je module importeren
from data_ingestion.NED import (
    load_config, get_connection, table_exists,
    get_last_timestamp, fetch_records,
    create_table_from_df, remove_duplicates
)

print("✅ Module data_ingestion.NED geïmporteerd!")

📁 PROJECT_ROOT is: /Users/redouan/ENEXIS
✅ Module data_ingestion.NED geïmporteerd!


In [2]:
# cell 2: config inladen
config_path = os.path.join(PROJECT_ROOT, 'workspaces', 'sandeep', 'config', 'api-call.json')
print("🔍 Config bestand:", config_path)
assert os.path.exists(config_path), f"Config niet gevonden op {config_path}"
config = load_config(config_path)
print("Config keys:", list(config['ned'].keys()))

🔍 Config bestand: /Users/redouan/ENEXIS/workspaces/sandeep/config/api-call.json
Config keys: ['ned_api_endpoint', 'demo-ned-api-key', 'ned_download_dir']


In [3]:
# cell 3: DB-connecties openen
warp_db  = os.path.join(PROJECT_ROOT, 'src', 'data', 'WARP.db')
logs_db  = os.path.join(PROJECT_ROOT, 'src', 'data', 'logs.db')

# Zorg dat de folder bestaat (werkt dankzij onze aangepaste get_connection)
os.makedirs(os.path.dirname(warp_db), exist_ok=True)
os.makedirs(os.path.dirname(logs_db), exist_ok=True)

conn_data = get_connection(warp_db)
conn_log  = get_connection(logs_db)

print(f"🗄️ Data DB: {warp_db}")
print(f"🗄️ Log  DB: {logs_db}")
print("  - Bestaande tabellen in data DB:",
      [row[0] for row in conn_data.execute("SELECT name FROM sqlite_master WHERE type='table'")])

🗄️ Data DB: /Users/redouan/ENEXIS/src/data/WARP.db
🗄️ Log  DB: /Users/redouan/ENEXIS/src/data/logs.db
  - Bestaande tabellen in data DB: ['raw_weather_obs', 'raw_weather_preds', 'raw_ned_obs']


In [4]:
# cell 4: laatste timestamp ophalen
TS_COL = config.get('ned_timestamp_column', 'validfrom').lower()
# oude regel (verkeerd aantal args):
# LAST_TS = get_last_timestamp(conn_data, 'raw_ned_obs', TS_COL)
# nieuwe regel:
LAST_TS = get_last_timestamp(conn_data)
print("⏱️  Laatste timestamp in 'raw_ned_obs':", LAST_TS)

⏱️  Laatste timestamp in 'raw_ned_obs': 2025-05-03T21:00:00+00:00


In [5]:
# cell 5: start- en einddatum bepalen
if LAST_TS:
    start_date = LAST_TS
    print("🟢 Gebruik vorige timestamp als start_date")
else:
    start_date = config.get('default_start_date', '2022-01-01')
    print("⭕ Db leeg, fallback start_date:", start_date)

end_date = date.today().isoformat()
print("🛑 end_date:", end_date)

🟢 Gebruik vorige timestamp als start_date
🛑 end_date: 2025-05-04


In [6]:
# cell X: inspecteer de functie die je écht gebruikt
import inspect
print(inspect.getsource(fetch_records))

def fetch_records(endpoint, headers, start_date, end_date, gen_type):
    """Paginate through the NED API and collect all records for één gen_type."""
    all_recs = []
    params = {
        'point': '0',
        'type': str(gen_type),
        'granularity': '5',
        'granularitytimezone': '1',
        'classification': '2',
        'activity': '1',
        'validfrom[after]': start_date,
        'validfrom[strictly_before]': end_date,
        'page': 1
    }
    # Eerste request om de laatste pagina te bepalen
    resp = requests.get(endpoint, params=params, headers=headers)
    resp.raise_for_status()
    data = resp.json()
    last_url = data.get('hydra:view', {}).get('hydra:last')
    if not last_url:
        raise RuntimeError('Could not determine last page from API response')
    last_page = int(last_url.split('page=')[-1])

    # Loop door alle pagina's
    for page in range(1, last_page + 1):
        params['page'] = page
        resp = requests.get(endpoint, params=params

In [7]:
# cell 6: records ophalen per type (met volledige params uit data_ingestion.NED)
endpoint = config['ned']['ned_api_endpoint']
api_key  = config['ned']['demo-ned-api-key']
headers  = {
    'X-AUTH-TOKEN': api_key,
    'Content-Type': 'application/json'
}

all_recs = []
for gen_type in config.get('ned_types', [2]):
    print(f"\n➡️  Ophalen gen_type={gen_type} …")
    # fetch_records komt uit data_ingestion.NED en includeert point, granularity, etc.
    recs = fetch_records(endpoint, headers, start_date, end_date, gen_type)
    print(f"   📦  Aantal records ontvangen voor type {gen_type}: {len(recs)}")
    all_recs.extend(recs)

print(f"\n📝 Totaal ontvangen records: {len(all_recs)}")


➡️  Ophalen gen_type=2 …


HTTPError: 403 Client Error: Forbidden for url: https://api.ned.nl/v1/utilizations?point=0&type=2&granularity=5&granularitytimezone=1&classification=2&activity=1&validfrom%5Bafter%5D=2025-05-03T21%3A00%3A00%2B00%3A00&validfrom%5Bstrictly_before%5D=2025-05-04&page=1

In [None]:
# cell 7: data in DataFrame en naar DB schrijven
if all_recs:
    df = pd.DataFrame(all_recs)
    print("▶️  DataFrame shape:", df.shape)
    df.columns = [c.lower() for c in df.columns]
    
    # eerste run: tabel aanmaken
    if not table_exists(conn_data, 'raw_ned_obs'):
        print("📋 Tabel 'raw_ned_obs' bestaat nog niet, maak aan…")
        create_table_from_df(conn_data, df, 'raw_ned_obs')
    else:
        print("✅ Tabel 'raw_ned_obs' bestaat al, ga direct appenden")
    
    df.to_sql('raw_ned_obs', conn_data, if_exists='append', index=False)
    print("✅ Data geschreven naar 'raw_ned_obs'")
    
    # duplicaten opruimen
    before = conn_data.execute("SELECT COUNT(*) FROM raw_ned_obs").fetchone()[0]
    remove_duplicates(conn_data, 'raw_ned_obs')
    after  = conn_data.execute("SELECT COUNT(*) FROM raw_ned_obs").fetchone()[0]
    print(f"🧹 Duplicaten verwijderd: vóór={before}, ná={after}")
else:
    print("⚠️  Geen nieuwe data om te schrijven")

In [None]:
# cell 8: afronden en sluiten
conn_data.close()
conn_log.close()
print("🔒 Verbindingen gesloten, klaar!")