In [1]:
import pandas as pd
from pathlib import Path
import sys
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.cm import get_cmap
import plotly.graph_objects as go

ROOT = Path.cwd().parent
sys.path.append(str(ROOT))
from configs.path_config import RAW_DATA_DIR

C:\Users\hpl886\Documents\Exjobb


In [2]:
def find_files(target):
    """Find files in subfolders between two timestamps with a specified target name.

    Args:
        target (str): The name of the target file to search for.

    Returns:
        time (list): A list of timestamps corresponding to the found files.
        df (DataFrame): A DataFrame containing the index and timestamp of each found file.
    """

    base_dir = ROOT  # ROOT should be a Path object
    start_time = "20090605000000"
    end_time = "20210611160000"

    # Path to timestamps file
    file_path = base_dir / "configs" / "txt_configs" / "timestamps.txt"

    # Read the subfolder names
    with file_path.open("r") as file:
        subfolders = [line.strip() for line in file if line.strip().isdigit()]

    try:
        # Ensure timestamps are in the list
        start_index = subfolders.index(start_time)
        end_index = subfolders.index(end_time)

        # Get the relevant subfolders
        subfolders_between = subfolders[start_index : end_index + 1]
        subfolder_relative_paths = [Path(folder) for folder in subfolders_between]

        print(len(subfolder_relative_paths), "subfolders found for the specified start and end times.")

        # Initialize DataFrame
        df = pd.DataFrame(columns=["Time_index", "Time"])

        for idx, subfolder in enumerate(subfolder_relative_paths):
            path = RAW_DATA_DIR / subfolder / target
            if path.exists():
                # Append row with index and timestamp
                df.loc[len(df)] = [idx, subfolder.name]  # subfolder.name = timestamp string

        if df.empty:
            print(f"No files found in the specified subfolders for {target}.")
        else:
            print(df)

    except ValueError:
        print("One or both of the specified subfolders were not found.")

    time = df['Time'].tolist()

    return time, df, subfolder_relative_paths

In [3]:
def find_warnings(target, loops, target_alarm):

    _, _, subfolder_relative_paths = find_files(target)

    # Placeholder for warnings
    df_warnings = pd.DataFrame(columns=["Time"] + loops)

    for subfolder in subfolder_relative_paths:
        path = RAW_DATA_DIR / subfolder / target
        if path.exists():
            with open(path, 'r', encoding='latin1') as file:
                content = file.read()
                row = {"Time": subfolder.name}
                for loop in loops:
                    row[loop] = int(target_alarm + ';' + loop in content)
                df_warnings = pd.concat([df_warnings, pd.DataFrame([row])], ignore_index=True)

    df_warnings['Time'] = pd.to_datetime(df_warnings['Time'], format='%Y%m%d%H%M%S')

    df_warnings = df_warnings[df_warnings[loops].sum(axis=1) > 0]

    return df_warnings

In [None]:
from matplotlib.cm import get_cmap
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

def plot_warnings_heatmap(df_warnings, path, target_alarm):
    """Plot a heatmap of specific warnings over time for a specified set of loops.

    Args:
        df_warnings (DataFrame): DataFrame containing the warnings data (boolean values).
        path (Path): Path to save the heatmap.
        target_alarm (str): The target alarm to plot.
    """  
    # Convert 'Time' to datetime
    df_warnings['Time'] = pd.to_datetime(df_warnings['Time'], format='%Y%m%d%H%M%S')

    # Set 'Time' as the index and convert to int
    df_warnings_data = df_warnings.set_index('Time').astype(int).T  # Transpose for heatmap

    # Use tab10 colormap
    tab10 = get_cmap('tab10')
    cmap = [tab10(0), 'white']  # Adjust as needed

    # Plot heatmap
    fig, ax = plt.subplots(figsize=(15, 3))
    sns.heatmap(df_warnings_data, cmap=cmap, linecolor='grey', linewidths=0.5, cbar=False, ax=ax)

    # Format labels with custom font sizes
    ax.set_title(f'{target_alarm} Warnings Over Time', fontsize=18)

    # X-tick labels
    ax.set_xticks(range(len(df_warnings_data.columns)))
    ax.set_xticklabels(
        pd.to_datetime(df_warnings_data.columns).strftime('%Y-%m-%d'),
        rotation=90,
        fontsize=12
    )

    # Y-tick labels
    ax.tick_params(axis='y', labelsize=12)

    plt.tight_layout()
    # plt.show()
    plt.savefig(path, format='pdf', bbox_inches='tight')  # save before showing
    plt.close()


# Usage
loops = ['S-B_Close_Comp', 'S-C_Close_Comp', 'S-D_Close_Comp', 'S-E_Close_Comp', 'S-F_Close_Comp']
target_alarm = 'T3a'
save_dir = ROOT / 'output' / 'figures' / 'warnings'
save_dir.mkdir(parents=True, exist_ok=True)
save_path = save_dir / f'{target_alarm}_warnings_heatmap.pdf'

df_warnings = find_warnings('BodyEmailWRN.txt', loops, target_alarm)
plot_warnings_heatmap(df_warnings, save_path, target_alarm)

26339 subfolders found for the specified start and end times.
     Time_index            Time
0          1256  20091231080000
1          1257  20091231120000
2          1258  20091231160000
3          1259  20091231200000
4          1260  20100101000000
..          ...             ...
161       25443  20210113120000
162       25459  20210116040000
163       25492  20210121160000
164       25494  20210122000000
165       25610  20210210080000

[166 rows x 2 columns]


  tab10 = get_cmap('tab10')
