In [None]:
import pandas as pd
from pathlib import Path
import sys

ROOT = Path.cwd().parent
sys.path.append(str(ROOT))
from configs.path_config import RAW_DATA_DIR

In [None]:
# path = ROOT / 'output' / 'txt' / 'boolean_matrix.txt'
# df = pd.read_csv(path, sep='\t', header=None)
# dates = df.columns[df.loc['Bob'] == 1]

In [None]:
from pathlib import Path
import pandas as pd

base_dir = ROOT  # ROOT should be a Path object
start_time = "20090605000000"
end_time = "20210611160000"
target = 'BodyEmailWRN.txt'

# Path to timestamps file
file_path = base_dir / "configs" / "txt_configs" / "timestamps.txt"

# Read the subfolder names
with file_path.open("r") as file:
    subfolders = [line.strip() for line in file if line.strip().isdigit()]

try:
    # Ensure timestamps are in the list
    start_index = subfolders.index(start_time)
    end_index = subfolders.index(end_time)

    # Get the relevant subfolders
    subfolders_between = subfolders[start_index : end_index + 1]
    subfolder_relative_paths = [Path(folder) for folder in subfolders_between]

    print(len(subfolder_relative_paths), "subfolders found for the specified start and end times.")

    # Initialize DataFrame
    df = pd.DataFrame(columns=["Time_index", "Time"])

    for idx, subfolder in enumerate(subfolder_relative_paths):
        path = RAW_DATA_DIR / subfolder / target
        if path.exists():
            # Append row with index and timestamp
            df.loc[len(df)] = [idx, subfolder.name]  # subfolder.name = timestamp string

    print(df)

except ValueError:
    print("One or both of the specified subfolders were not found.")

time = df['Time'].tolist()

In [None]:
subfolder_relative_paths = [Path(folder) for folder in time]

print(len(subfolder_relative_paths), "subfolders found for the specified start and end times.")

# Initialize DataFrame
loops = ['S-B_Close_Comp', 
         'S-C_Close_Comp', 
         'S-D_Close_Comp', 
         'S-E_Close_Comp', 
         'S-F_Close_Comp']

target_alarm = 'T4'

# Placeholder for warnings
df_warnings = pd.DataFrame(columns=["Time"] + loops)

for subfolder in subfolder_relative_paths:
    path = RAW_DATA_DIR / subfolder / target
    if path.exists():
        with open(path, 'r', encoding='latin1') as file:
            content = file.read()
            row = {"Time": subfolder.name}
            for loop in loops:
                row[loop] = int(target_alarm + ';' + loop in content)
            df_warnings = pd.concat([df_warnings, pd.DataFrame([row])], ignore_index=True)

df_warnings['Time'] = pd.to_datetime(df_warnings['Time'], format='%Y%m%d%H%M%S')

df_warnings = df_warnings[df_warnings[loops].sum(axis=1) > 0]

df_warnings

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# Convert 'Time' to datetime
df_warnings['Time'] = pd.to_datetime(df_warnings['Time'], format='%Y%m%d%H%M%S')

# Set 'Time' as the index and convert to int for the heatmap
df_warnings_data = df_warnings.set_index('Time').iloc[:, 0:].astype(int)  # Set Time as index and convert to int for heatmap

# Plot the heatmap
plt.figure(figsize=(5, 15))
sns.heatmap(df_warnings_data, cmap='Blues', linecolor='grey', linewidths=0.5, cbar=False)

# Format y-axis (time)
plt.title(f'{target_alarm} Warnings Over Time')
plt.xlabel('Loop')
plt.ylabel('Time')

# Set the y-ticks (positions) and their labels (formatted dates)
plt.yticks(ticks=range(len(df_warnings_data.index)), labels=pd.to_datetime(df_warnings_data.index).strftime('%Y-%m-%d %H:%M:%S'), rotation=0)

plt.xticks(rotation=90)
plt.tight_layout()
plt.show()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# Convert 'Time' to datetime
df_warnings['Time'] = pd.to_datetime(df_warnings['Time'], format='%Y%m%d%H%M%S')

# Set 'Time' as the index and convert to int for the heatmap
df_warnings_data = df_warnings.set_index('Time').iloc[:, 0:].astype(int)  # Set Time as index and convert to int for heatmap

# Transpose the data to swap x and y axes
df_warnings_data = df_warnings_data.T

# Plot the heatmap
plt.figure(figsize=(20, 5))  # Adjust the figure size for the transposed data
sns.heatmap(df_warnings_data, cmap='Blues', linecolor='grey', linewidths=0.5, cbar=False)

# Format x-axis (time) and y-axis (loops)
plt.title(f'{target_alarm} Warnings Over Time')
plt.xlabel('Time')
plt.ylabel('Loop')

# Set the x-ticks (positions) and their labels (formatted dates)
plt.xticks(ticks=range(len(df_warnings_data.columns)), labels=pd.to_datetime(df_warnings_data.columns).strftime('%Y-%m-%d %H:%M:%S'), rotation=70)

plt.tight_layout()
plt.show()