In [None]:
import re
import pandas as pd
import numpy as np


In [None]:
# https://elog.belle2.org/elog/VTX+Upgrade+Testbeam+2025/
file = 'elog_text.csv'
df = pd.read_csv(file, sep=';',encoding='utf-8',encoding_errors='ignore')


# Create columns if they don't exist
if 'Run_Number' not in df.columns:
    df['Run_Number'] = np.nan
if 'Energy' not in df.columns:
    df['Energy'] = np.nan
if 'FrontEnd' not in df.columns:
    df['FrontEnd'] = None
if 'Bias' not in df.columns:
    df['Bias'] = np.nan
if 'NTC' not in df.columns:
    df['NTC'] = np.nan

In [None]:
rows_to_drop = []

regex = re.compile(r'''
        automatic\ log\ for\ run\ (?P<run_number>\d+).*?      # Run number
        Energy:.*?(?P<energy>\d+\.?\d*).*?                    # Energy (float)
        FrontEnd:\s*(?P<frontend>\w+).*?                      # FrontEnd (word)
        Bias:.*?(?P<bias>\d+\.?\d*).*?                        # Bias (float)
        NTC:.*?(?P<ntc>\d+\.?\d*)C                            # NTC (float, ends with C)
    ''', re.VERBOSE)

for index, row in df.iterrows():
    comment = str(row['Text']).replace("\n","").rstrip()  # Access the "Text" column
    try:
        match = regex.search(comment)
        if row['Type'] != 'Run':
            rows_to_drop.append(index)
            continue
        if match:
            run_number = int(match.group('run_number'))
            energy = float(match.group('energy'))
            frontend = match.group('frontend')
            bias = float(match.group('bias'))
            ntc = float(match.group('ntc'))
            
            # Add extracted values to the dataframe
            df.loc[index, 'Run_Number'] = run_number
            df.loc[index, 'Energy'] = energy
            df.loc[index, 'FrontEnd'] = frontend
            df.loc[index, 'Bias'] = bias
            df.loc[index, 'NTC'] = ntc
        else:
            rows_to_drop.append(index)
    except Exception as e:
        print(f"Error processing row {index}: {e}")
        rows_to_drop.append(index)

# Drop rows where no match was found
df.drop(index=rows_to_drop, inplace=True) # Drop rows with no match, which correspond to no run
df.drop(columns=['Text'], inplace=True) # Drop the "Text" column for clarity

df
df.to_csv('run_log2.csv', index=False) # Save the cleaned data to a CSV file