In [8]:
# setup darts
!pip install -r requirements.txt

Collecting darts@ git+https://github.com/unit8co/darts.git@master (from -r requirements.txt (line 1))
  Cloning https://github.com/unit8co/darts.git (to revision master) to /private/var/folders/wl/dxt16jls7jz1_252v4f209780000gp/T/pip-install-a89nbfk8/darts_991b31b8ff7347158bc30df341fb31a2
  Running command git clone --filter=blob:none --quiet https://github.com/unit8co/darts.git /private/var/folders/wl/dxt16jls7jz1_252v4f209780000gp/T/pip-install-a89nbfk8/darts_991b31b8ff7347158bc30df341fb31a2
  Resolved https://github.com/unit8co/darts.git to commit a0cc279fe020dc62e991ef936bc934eb7d0051d2
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone


In [9]:
# Download data from data source to local
import io
import os
import zipfile
import requests

import pandas as pd

In [10]:
data_dir = "data"
anom_dir = os.path.join(data_dir, "anomaly_detection")
fc_dir = os.path.join(data_dir, "forecasting")
for dir_path in [data_dir, anom_dir, fc_dir]:
    if not os.path.exists(dir_path):
        os.mkdir(dir_path)

In [11]:
# URL of the zip file
zip_url = "https://my.hidrive.com/api/sharelink/download?id=lmCmAjUP"

file_path = os.path.join(anom_dir, "svdb.zip")
if not os.path.exists(file_path):
    # Send a GET request to download the zip file
    response = requests.get(zip_url)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Save the zip file to the local drive
        with open(file_path, "wb") as file:
            file.write(response.content)
        print("Zip file downloaded successfully.")
else:
    print("Zip file already downloaded.")    

# Extract the zip file
with zipfile.ZipFile(file_path, 'r') as zip_ref:
    zip_ref.extractall(anom_dir)
print("Zip file extracted successfully.")

Zip file already downloaded.
Zip file extracted successfully.


In [12]:
def download_weather_data():
    """Concatenate the yearly csv files into a single dataframe and reshape it"""
    # download the csv from the url
    base_url = "https://data.stadt-zuerich.ch/dataset/ugz_meteodaten_stundenmittelwerte/download/"
    filenames = [f"ugz_ogd_meteo_h1_{year}.csv" for year in range(2015, 2023)]
    df = pd.concat([pd.read_csv(base_url + fname) for fname in filenames])
    # retain only one weather station
    df = df.loc[df["Standort"] == "Zch_Stampfenbachstrasse"]
    # pivot the df to get all measurements as columns
    df["param_name"] = df["Parameter"] + " [" + df["Einheit"] + "]"
    df = df.pivot(index="Datum", columns="param_name", values="Wert")
    # convert time index to from CET to UTC and extract the required time range
    df.index = pd.DatetimeIndex(pd.to_datetime(df.index, utc=True)).tz_localize(
        None
    )
    df = df.loc[
        (pd.Timestamp("2015-01-01") <= df.index)
        & (df.index <= pd.Timestamp("2022-08-31"))
    ]
    return df

In [13]:
def download_energy_data():
    # URL of the zip file
    energy_url = "https://data.stadt-zuerich.ch/dataset/ewz_stromabgabe_netzebenen_stadt_zuerich/download/ewz_stromabgabe_netzebenen_stadt_zuerich.csv"
    
    file_path = os.path.join(fc_dir, energy_url.split("/")[-1])
    processed_path = file_path.replace(".csv", "_proc.csv")
    
    if not os.path.exists(processed_path):
        if not os.path.exists(file_path):
            # Send a GET request to download the zip file
            response = requests.get(energy_url)
            
            # Check if the request was successful
            if response.status_code == 200:
                # Save the zip file to the local drive
                with open(file_path, "wb") as file:
                    file.write(response.content)
                print("File downloaded successfully.")
            else:
                raise ValueError("Failed to download.")
        print("Processing file..")
        df = pd.read_csv(file_path, index_col=0)
        # convert time index
        df.index = pd.DatetimeIndex(pd.to_datetime(df.index, utc=True)).tz_localize(None)
        # extract pre-determined period
        df = df.loc[(pd.Timestamp("2015-01-01") <= df.index) & (df.index <= pd.Timestamp("2022-08-31"))]
        # download and preprocess the weather information
        df_weather = download_weather_data()
        # add weather data as additional features
        df = pd.concat([df, df_weather], axis=1)
        # interpolate weather data
        df = df.interpolate()
        # raining duration is given in minutes -> we divide by 4 from hourly to quarter-hourly records
        df["RainDur [min]"] = df["RainDur [min]"] / 4
    
        # round Electricity cols to 4 decimals, other columns to 2 decimals
        cols_precise = ["Value_NE5", "Value_NE7"]
        df = df.round(decimals={col: (4 if col in cols_precise else 2) for col in df.columns})
    
        # export the dataset
        df.index.name = "Timestamp"
        df.to_csv(processed_path)
        os.remove(file_path)
        print("File successfully processed.")
    else:
        print("File already downloaded.")    

In [14]:
download_energy_data()

File already downloaded.
