In [None]:
import pandas as pd
from pathlib import Path
import sys
import matplotlib.pyplot as plt
import seaborn as sns

ROOT = Path.cwd().parent
sys.path.append(str(ROOT))
from configs.path_config import RAW_DATA_DIR

In [None]:
def find_files(target):
    """Find files in subfolders between two timestamps with a specified target name.

    Args:
        target (str): The name of the target file to search for.

    Returns:
        time (list): A list of timestamps corresponding to the found files.
        df (DataFrame): A DataFrame containing the index and timestamp of each found file.
    """

    base_dir = ROOT  # ROOT should be a Path object
    start_time = "20090605000000"
    end_time = "20210611160000"

    # Path to timestamps file
    file_path = base_dir / "configs" / "txt_configs" / "timestamps.txt"

    # Read the subfolder names
    with file_path.open("r") as file:
        subfolders = [line.strip() for line in file if line.strip().isdigit()]

    try:
        # Ensure timestamps are in the list
        start_index = subfolders.index(start_time)
        end_index = subfolders.index(end_time)

        # Get the relevant subfolders
        subfolders_between = subfolders[start_index : end_index + 1]
        subfolder_relative_paths = [Path(folder) for folder in subfolders_between]

        print(len(subfolder_relative_paths), "subfolders found for the specified start and end times.")

        # Initialize DataFrame
        df = pd.DataFrame(columns=["Time_index", "Time"])

        for idx, subfolder in enumerate(subfolder_relative_paths):
            path = RAW_DATA_DIR / subfolder / target
            if path.exists():
                # Append row with index and timestamp
                df.loc[len(df)] = [idx, subfolder.name]  # subfolder.name = timestamp string

        if df.empty:
            print(f"No files found in the specified subfolders for {target}.")
        else:
            print(df)

    except ValueError:
        print("One or both of the specified subfolders were not found.")

    time = df['Time'].tolist()

    return time, df, subfolder_relative_paths

In [None]:
def find_warnings(target, loops, target_alarm):

    _, _, subfolder_relative_paths = find_files(target)

    # Placeholder for warnings
    df_warnings = pd.DataFrame(columns=["Time"] + loops)

    for subfolder in subfolder_relative_paths:
        path = RAW_DATA_DIR / subfolder / target
        if path.exists():
            with open(path, 'r', encoding='latin1') as file:
                content = file.read()
                row = {"Time": subfolder.name}
                for loop in loops:
                    row[loop] = int(target_alarm + ';' + loop in content)
                df_warnings = pd.concat([df_warnings, pd.DataFrame([row])], ignore_index=True)

    df_warnings['Time'] = pd.to_datetime(df_warnings['Time'], format='%Y%m%d%H%M%S')

    df_warnings = df_warnings[df_warnings[loops].sum(axis=1) > 0]

    return df_warnings

In [None]:
def plot_warnings_heatmap(df_warnings):
    """Plot a heatmap of specific  warnings over time for a specified set of loops.

    Args:
        df_warnings (DataFrame): DataFrame containing the warnings data (boolean values).
        target_alarm (str): The target alarm to plot.
    """  

    # Convert 'Time' to datetime
    df_warnings['Time'] = pd.to_datetime(df_warnings['Time'], format='%Y%m%d%H%M%S')

    # Set 'Time' as the index and convert to int for the heatmap
    df_warnings_data = df_warnings.set_index('Time').iloc[:, 0:].astype(int)  # Set Time as index and convert to int for heatmap

    # Transpose the data to swap x and y axes
    df_warnings_data = df_warnings_data.T

    # Plot the heatmap
    plt.figure(figsize=(15, 10))  # Adjust the figure size for the transposed data
    sns.heatmap(df_warnings_data, cmap='Blues', linecolor='grey', linewidths=0.5, cbar=False)

    # Format x-axis (time) and y-axis (loops)
    plt.title(f'{target_alarm} Warnings Over Time')
    plt.xlabel('Time')
    plt.ylabel('Loop')

    # Set the x-ticks (positions) and their labels (formatted dates)
    plt.xticks(ticks=range(len(df_warnings_data.columns)), labels=pd.to_datetime(df_warnings_data.columns).strftime('%Y-%m-%d %H:%M:%S'), rotation=70)

    plt.tight_layout()
    plt.show()


loops = ['S-B_Close_Comp', 
         'S-C_Close_Comp', 
         'S-D_Close_Comp', 
         'S-E_Close_Comp', 
         'S-F_Close_Comp']

target_alarm = 'T4'

df_warnings = find_warnings('BodyEmailWRN.txt', loops, target_alarm)
plot_warnings_heatmap(df_warnings)