Notebook for merged time series data with their label (timestamp within anomaly).

In [16]:
import pandas as pd
import json 
import os

In [17]:
# path
data_folder = "../data/"
label_file = os.path.join(data_folder, "speed_7578_label.json")

In [18]:
# baca label di json
with open(label_file, 'r') as f:
    labels = json.load(f)

In [19]:
# buat fungsi untuk proses penggabungan file CSV dengan label JSON
def process_csv(filename):
    # baca file csv dulu
    csv_path = os.path.join(data_folder, filename)
    df = pd.read_csv(csv_path)

    # make sure df punya timestamp, cek terlebih dahulu
    if 'timestamp' not in df.columns:
        raise ValueError(f"File {filename} tidak memiliki kolom 'timestamp'.")
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    
    # ambil list timestamp anomali dari JSON
    anomaly_timestamps = labels.get(filename, [])
    # konversi juga ke datetime
    anomaly_timestamps = [pd.to_datetime(ts) for ts in anomaly_timestamps]

    # proses penambahan kolom label
    df['label'] = df['timestamp'].isin(anomaly_timestamps).astype(int)

    # simpan kembali ke csv
    output_path = os.path.join(data_folder, filename.replace(".csv", "_labeled.csv"))
    df.to_csv(output_path, index=False)
    print(f"Saved {filename} into {output_path}")
    
    return df

In [20]:
# apply
df_labeled = process_csv("speed_7578.csv")

Saved speed_7578.csv into ../data/speed_7578_labeled.csv
