In [2]:
# IMPORTS
import pandas as pd
import numpy as np
import re
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
# READ-IN CLEAN ARC DATAFRAME
filename = 'MEBL3_All_Arc_Counts_Clean'
arc_count_df = pd.read_csv(filename, parse_dates=["Time"])    
print(f"{filename} : file read into a pandas dataframe.")

MEBL3_All_Arc_Counts_Clean : file read into a pandas dataframe.


In [4]:
arc_count_df

Unnamed: 0,Time,B2 Beam Blanker,B2 Bias,B2 Extractor,B2 Filament,B2 Gun Lens,B2 Main Lens,B2 Suppressor,B3 Beam Blanker,B3 Bias,...,D3 Gun Lens,D3 Main Lens,D3 Suppressor,D4 Beam Blanker,D4 Bias,D4 Extractor,D4 Filament,D4 Gun Lens,D4 Main Lens,D4 Suppressor
0,2024-06-14 01:55:12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,...,,,,,,,,,,
1,2024-06-14 01:55:13,,,,,,,,,,...,,,,,,,,,,
2,2024-06-14 01:55:14,,,,,,,,,,...,,,,,,,,,,
3,2024-06-14 01:55:15,,,,,,,,0.0,0.0,...,,,,,,,,,,
4,2024-06-14 01:55:16,,,,,,,,,,...,,,0.0,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1060403,2024-06-27 23:59:54,,,,,,,,,,...,,,,,,,,,,
1060404,2024-06-27 23:59:55,,,,,,,,,,...,,,,,,,,,,
1060405,2024-06-27 23:59:56,,,,,,,,,,...,,,,,,,,,,
1060406,2024-06-27 23:59:57,,,,,,,,,,...,,,,,,,,,,


In [5]:
print(type(arc_count_df['Time'][0]))

<class 'pandas._libs.tslibs.timestamps.Timestamp'>


# FUNCTION TO GRAB SPECIFIED SET OF DATAFRAME BASED ON INPUT PARAMETERS 
### arc_window() calls time_window_filter(), sys_col_identifier(), col_comp_identifier()

In [6]:
def sys_col_identifier(sys_col, window_df):
    '''Identifies proper columns from the arc_count_df2 dataframe to keep based on input to the sys_col variable
            ex: if B2 inputted, keep all B2 measurements, if [C2, C3, C4] inputted, keep all C2 + C3 + C4 measurements
    '''
    sys_col_identified_keep_vars = []
    if type(sys_col) != list: # one input
        sys_col = [sys_col]
    try:
        # regex match system column
        ## ultimately regex is not necessary, unless this system were scaled up and invalid inputs were more disastrous, thus ignore quoted out lines
        '''
        sys_col_patt = r'^([B-D]{1}[2-4]{1})$'
        match = re.search(sys_col_patt, sys_col)
        match is not None  # will break case if invalid sys_col input
        print(f"System Column {match[0]} identified")   # match[0] == sys_col in a valid try case
        '''
        # find matching dataframe columns
        for i in range(1, len(window_df.columns)):   # skip Time variable
            df_column = window_df.columns[i]
            try:
                variable_sys_col = df_column[:2] # this is the System Column that is measured in this dataframe column (called variable here for differentiation from other column convention)
                if variable_sys_col in sys_col:
                    #print(f"{df_column:<20}{'Match (Keep)':>20}")
                    sys_col_identified_keep_vars.append(df_column)
                else:
                    #print(f"{df_column:<20}{'No Match':>20}")
                    continue
            except:
                print(f"Error {window_df.columns[i]}")
                continue
    except:     # cannot find matching system column (not B-D, 2-4)
        print(f"{'Invalid input to sys_col parameter':<20}{sys_col:>20}")
        return
    return sys_col_identified_keep_vars

In [7]:
def col_comp_identifier(col_comp, window_df):
    '''Identifies proper columns from the arc_count_df2 dataframe to keep based on input to the col_comp variable
            ex: if Bias inputted, keep all Bias measurements, if [Bias, Suppressor, Extractor] inputted, keep all Bias + Suppressor + Extractor measurements
    '''
    col_comp_identified_keep_vars = []
    if type(col_comp) != list: # one input
        col_comp = [col_comp]
    try: # find matching dataframe columns
        for i in range(1, len(window_df.columns)):   # skip Time variable
            df_column = window_df.columns[i]
            try:
                variable_pattern = r'([B-D]{1}[2-4]{1}) (.*)'
                variable_pattern_match = re.search(variable_pattern, df_column)
                variable_col_comp = variable_pattern_match.group(2)
                if variable_col_comp in col_comp:
                    #print(f"{df_column:<20}{'Match (Keep)':>20}")
                    col_comp_identified_keep_vars.append(df_column)
                else:
                    #print(f"{df_column:<20}{'No Match':>20}")
                    continue
            except:
                print(f"Error {window_df.columns[i]}")
                continue
    except: # cannot find matching system column (not B-D, 2-4)
        print(f"{'Invalid input to sys_col parameter':<20}{col_comp:>20}")
        return
    return col_comp_identified_keep_vars

In [8]:
def time_window_filter(timestamp, window_df, window_seconds = 5):
    '''filters out rows that do not have a Time value within window_seconds seconds of the timestamp parameter
    '''
    timestamp = pd.to_datetime(timestamp)
    window_df['Time_diff'] = (window_df['Time'] - timestamp).abs()
    filtered_df = window_df[window_df['Time_diff'] <= pd.Timedelta(seconds=window_seconds)]
    filtered_df = filtered_df.drop(columns=['Time_diff'])
    window_df = window_df.drop(columns=['Time_diff'])
    return filtered_df

In [35]:
def arc_window(timestamp, dataframe, sys_col = None, col_comp = None, window_seconds = None):
    '''Grabs a 10 second time frame surrounding an arc event
        inputs: can be the system column and/or the column component
        timestamp parameter: keeps rows in which Time is within 10 seconds of arc event (timestamp input) (row axis filter)
        sys_col and col_comp: keep columns matching desired inputs (column axis filter)
    '''
    window_df = dataframe
    # row axis filter
    timestamp = pd.to_datetime(timestamp)
    if window_seconds is not None:
        window_df2 = time_window_filter(timestamp, window_df, window_seconds)
    else:
        window_df2 = time_window_filter(timestamp, window_df)
    # column axis filter
    keep_vars = ["Time"]
    if sys_col is not None:
        sys_col_keep_vars = sys_col_identifier(sys_col, window_df2)
    else:
        sys_col_keep_vars = window_df.columns[1:-1]
    if col_comp is not None:
        col_comp_keep_vars = col_comp_identifier(col_comp, window_df2) 
    else:
        col_comp_keep_vars = ["Beam Blanker", "Bias", "Extractor", "Filament", "Gun Lens", "Main Lens", "Suppressor"]
    keep_vars_intersection = list(set(sys_col_keep_vars) & set(col_comp_keep_vars))
    keep_vars += keep_vars_intersection
    window_df3 = window_df2[keep_vars] # filter again
    #window_df4 = window_df3["Time"] + window_df3[window_df3.columns[1:]].sort_index(axis=1)
    #print(window_df4)
    col_order = ["Time"] + sorted(window_df3.columns[1:])
    window_df4 = window_df3[col_order]

    '''
    # PLOT WINDOW    
    plt.figure(figsize=(12, 6))
    ax1 = plt.gca()

    # Plot chamber pressure
    sns.lineplot(data=window_df, x="Time", y="Chamber Pressure", ax=ax1, label='Chamber Pressure', color='b')
    ax1.set_ylabel('Chamber Pressure')

    # Create a second y-axis for the column pressure
    ax2 = ax1.twinx()
    plot = sns.lineplot(data=window_df, x="Time", y="Column Pressure", ax=ax2, label='Column Pressure', color='r')
    ax2.set_ylabel('Column Pressure')

    # Adding titles and legend
    ax1.set_title('Chamber and Column Pressure Over Time')
    ax1.legend(loc='upper left')
    ax2.legend(loc='upper right')

    # x axis formatting
    time_ticks = window_df['Time'].dt.strftime('%Y-%m-%d %H:%M:%S')
    plot.set_xticks(window_df["Time"])
    ax1.set_xticklabels(time_ticks, rotation = 30)
    ax2.set_xticklabels(time_ticks, rotation = 30)
    
    plt.show()
    '''
    return window_df4

In [36]:
#sys_col = ["B2", "B3", "B4"]
col_comp=['Suppressor']
df = arc_window("2024-06-17 13:05:38", arc_count_df, col_comp = col_comp)
df

Unnamed: 0,Time,B2 Suppressor,B3 Suppressor,B4 Suppressor,C2 Suppressor,C3 Suppressor,C4 Suppressor,D2 Suppressor,D3 Suppressor,D4 Suppressor
291836,2024-06-17 13:05:33,,,,,,,,,
291837,2024-06-17 13:05:34,,,,,,,,,
291838,2024-06-17 13:05:35,,,,,,,,,
291839,2024-06-17 13:05:36,,,,,,,,,
291840,2024-06-17 13:05:37,,,,,,,,,
291841,2024-06-17 13:05:38,,,,,,,,,
291842,2024-06-17 13:05:39,,,,,,,,,
291843,2024-06-17 13:05:40,,,,,,,,,
291844,2024-06-17 13:05:41,,,,,,,,,
291845,2024-06-17 13:05:42,,,,,,,,,


In [None]:
arc_count_df = arc_count_df.drop(columns="Time_diff") # not sure why Time_diff is still part of the df