Import required dependencies

In [1]:
import os
import json
import requests
import pandas as pd
import pickle           # Save and load data


# Setting Pandas options.
pd.set_option("display.max_rows", 50) # How to display all rows from data frame using pandas. Setting value to None to show all rows.
pd.set_option("display.max_columns", None)
pd.set_option("display.max_info_columns", 100)
pd.set_option("display.max_info_rows", 1000000)
pd.set_option("display.precision", 2)
#pd.set_option("styler.format.precision", 2)

In [2]:
# Import functions from Pieter's utils package.
# from utils_pieter import f_describe, f_info, f_get_account_name

load config files

In [3]:
# Load API Key and download directory from config file
CONFIG_FILE = "../config/api-call.json"

def load_config(config_path):
    """Load configuration from a JSON file."""
    with open(config_path, "r") as file:
        return json.load(file)



API configurations

In [4]:
# Load config values
config = load_config(CONFIG_FILE)
print(config)
API_ENDPOINT= config["ned"]["ned_api_endpoint"]
API_KEY = config["ned"]["demo-ned-api-key"]
DOWNLOAD_DIR = config["settings"]["download_dir"]

# https://api.ned.nl/v1/utilizations?point=0&type=2&granularity=3&granularitytimezone=1&classification=2&activity=1&validfrom[strictly_before]=2020-11-17&validfrom[after]=2020-11-16


# Ensure the download directory exists
os.makedirs(DOWNLOAD_DIR, exist_ok=True)

# API details (Replace with actual API endpoint)
API_URL = API_ENDPOINT
#HEADERS = {"Authorization": f"X-AUTH-TOKEN {API_KEY}"}

# Headers with X-AUTH-TOKEN
api_headers = {
    "X-AUTH-TOKEN": API_KEY,  # Replace with your actual token
    "Content-Type": "application/json"  # Optional, if sending JSON
}


api_params={
    "point": "11",
    "type": "2",
    "granularity": "4",
    "granularitytimezone": "1",
    "classification": "2",
    "activity": "1",
    "validfrom[strictly_before]": "2025-02-22",
    "validfrom[after]": "2025-02-01"
    }


{'KNMI': {'api_endpoint': 'https://api.dataplatform.knmi.nl/open-data/v1/datasets/knmi14_globale_straling/versions/3.2/files', 'api_key': 'nctBcxmIU5Ox5CxqzBuiUxLDoDC4yNabewk63UGW', 'notification_key': 'eyJvcmciOiI1ZTU1NGUxOTI3NGE5NjAwMDEyYTNlYjEiLCJpZCI6IjM3ODk5YjE4NjM5ZDQ0ZmRhYjMzYjdhMjBlZDE3Mjg3IiwiaCI6Im11cm11cjEyOCJ9'}, 'ned': {'ned_api_endpoint': 'https://api.ned.nl/v1/utilizations', 'demo-ned-api-key': '01dd5b246eb73805193aa271dfe3ada2589d20f60838d091077155ad4658ac84', 'ned_download_dir': '../../src/data/weather/'}, 'settings': {'download_dir': 'downloads'}}


make API call

In [5]:
"""Download a data file from an API and save it in the specified directory."""
response = requests.get(API_URL, headers=api_headers, params=api_params)

# Convert JSON to Pandas DataFrame
    
if response.status_code == 200:
    # JSON in bytes format
    json_bytes = response.content

    # Convert bytes → string → dictionary
    json_dict = json.loads(json_bytes.decode("utf-8"))

    print(json_dict)

    # Convert to DataFrame
    df = pd.DataFrame.from_dict(json_dict, orient="index")

    # Convert the list of utilizations into a DataFrame
    json_data = pd.DataFrame(json_dict["hydra:member"])

    # Display the DataFrame as a table
    # print("\nConverted JSON Payload to Table Format:\n")
    # print(json_data.to_string(index=False))

           
    #file_path = os.path.join(DOWNLOAD_DIR, "downloaded_data.json")  # Change extension as needed
    #with open(file_path, "wb") as file:
    #    file.write(response.content)
    #print(f"File downloaded successfully: {file_path}")
else:
    print(f"Error: {response.status_code} - {response.text}")
    

{'@context': '/v1/contexts/Utilization', '@id': '/v1/utilizations', '@type': 'hydra:Collection', 'hydra:totalItems': 2016, 'hydra:member': [{'@id': '/v1/utilizations/68092559673', '@type': 'Utilization', 'id': 68092559673, 'point': '/v1/points/11', 'type': '/v1/types/2', 'granularity': '/v1/granularities/4', 'granularitytimezone': '/v1/granularity_time_zones/0', 'activity': '/v1/activities/1', 'classification': '/v1/classifications/2', 'capacity': 0, 'volume': 0, 'percentage': 0, 'emission': 0, 'emissionfactor': 0, 'validfrom': '2025-01-31T23:00:00+00:00', 'validto': '2025-01-31T23:15:00+00:00', 'lastupdate': '2025-02-02T23:52:43+00:00'}, {'@id': '/v1/utilizations/68092621463', '@type': 'Utilization', 'id': 68092621463, 'point': '/v1/points/11', 'type': '/v1/types/2', 'granularity': '/v1/granularities/4', 'granularitytimezone': '/v1/granularity_time_zones/0', 'activity': '/v1/activities/1', 'classification': '/v1/classifications/2', 'capacity': 0, 'volume': 0, 'percentage': 0, 'emissio

Function to write JSON data in table format

In [6]:
def json_to_table(json_data):
    #Converts JSON utilization data to a Pandas DataFrame for table display.
    # Args: json_data: The JSON data (either a string or a Python dictionary).
    # Returns: A Pandas DataFrame or None if there's an error.
    
    try:
        # If the input is a JSON string, parse it
        if isinstance(json_data, str):
            data = json.loads(json_data)
        elif isinstance(json_data, dict): # if it is already a dictionary
            data = json_data
        else:
            print("Invalid input: Please provide a JSON string or dictionary.")
            return None

        # Extract the 'hydra:member' array which contains the utilization data
        utilization_data = data.get('hydra:member', [])  # Handle missing key

        # Create a list of dictionaries, where each dictionary represents a row
        rows = []
        for item in utilization_data:
            # Select the fields you want to include in the table
            row = {
                'id': item.get('id'),
                'capacity': item.get('capacity'),
                'volume': item.get('volume'),
                'percentage': item.get('percentage'),
                'emission': item.get('emission'),
                'emissionfactor': item.get('emissionfactor'),
                '@id': item.get('@id'),
                '@type': item.get('@type'),
                'point': item.get('point'), # Include point for more context
                'type': item.get('type'), # Include type for more context
                'granularity': item.get('granularity'), # Include granularity for more context
                'granularitytimezone': item.get('granularitytimezone'), # Include granularity for more context
                'activity': item.get('activity'), # Include granularity for more context
                'activity': item.get('activity'), # Include granularity for more context
                'classification': item.get('classification'),
                'validfrom': item.get('validfrom'),
                'validto': item.get('validto'),
                'lastupdate': item.get('lastupdate')
            }
            rows.append(row)

        # Create a Pandas DataFrame from the list of dictionaries
        df = pd.DataFrame(rows)
        
        # print("Number of rows : ", df.shape[0])
        # print(df.head(3))

        return df

    except json.JSONDecodeError:
        print("Error: Invalid JSON format.")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

Descriptive analytics using Pieter package

In [7]:
# f_describe(df, 10)
df_orig_num    = json_data.select_dtypes(include='number')
l_df_num_names = df_orig_num.columns.tolist()

print(l_df_num_names)
print(f"\nNumber of numerical variables: {len(l_df_num_names)}")

df_orig_cat    = json_data.select_dtypes(include='object')
l_df_cat_names = list(df_orig_cat.columns)

print(f"\nNumber of categorical variables: {len(l_df_cat_names)}")
print(l_df_cat_names)

formatted_df = json_to_table(json_dict)

# Convert 'capacity' to numeric, handling errors by setting non-numeric values to NaN
formatted_df['capacity'] = pd.to_numeric(formatted_df['capacity'], errors='coerce')

# Filter, excluding rows where capacity is NaN
filtered_df = formatted_df[formatted_df['capacity'] > 0].dropna(subset=['capacity'])

print(filtered_df.shape[0])
print(filtered_df)

#if formatted_df is not None:
#    # Print the DataFrame (table format)
#    print(formatted_df.head(3))



['id', 'capacity', 'volume', 'percentage', 'emission', 'emissionfactor']

Number of numerical variables: 6

Number of categorical variables: 11
['@id', '@type', 'point', 'type', 'granularity', 'granularitytimezone', 'activity', 'classification', 'validfrom', 'validto', 'lastupdate']
51
              id  capacity  volume  percentage  emission  emissionfactor  \
34   68098114999     19424    4856    4.71e-03         0               0   
35   68098311743     77624   19406    1.88e-02         0               0   
36   68098387836    237664   59416    5.76e-02         0               0   
37   68098444455    441508  110377    1.07e-01         0               0   
38   68098483360    716812  179203    1.74e-01         0               0   
..           ...       ...     ...         ...       ...             ...   
139  68110617274   1712960  428240    4.15e-01         0               0   
140  68110685317   2011672  502918    4.88e-01         0               0   
141  68110742640   2287460  5

Save as pickel file for data

In [8]:
# Create dictionary 'dc_ned_json_data_1' with objects that will be used in the next exercises.
dc_ned_json_data_1 = {
   'df_orig': json_data    
}

# Save dc_exercise_1_2_3 as 'dc_ned_json_data_1.pkl'
with open('../data/dc-ned-json-data-1.pkl', 'wb') as pickle_file:
    pickle.dump(dc_ned_json_data_1, pickle_file)