In [1]:
import pandas as pd
import os

In [2]:
def get_csvs():
    #Get paths and filenames for all csvs in subdirectories of current folder 
    csvs = []

    for root, dirs, files in os.walk(os.getcwd()):
        for file in files:
            if file.endswith(".csv"):
                 csvs.append(os.path.join(root, file))
                    
    return csvs

In [3]:
def summarize(compounds_list):
    #Generate a dataframe containing timestamp and peak area data for all csvs in subdirectories of the current folder
    csvs = get_csvs()
    cols = ['Timestamp', 'Year', 'Month', 'Day', 'Hour', 'Minute', 'Second'] + compounds_list
    df = pd.DataFrame(columns = cols)
    
    for i in csvs:
        record = get_areas(i, compounds_list)
        df.loc[len(df)] = record
        
    return df

In [4]:
def get_areas(filename, compounds_list):
    #For a given filename and list of copoudns of interest, return the timestamp and peak areas
    #Open filename and extract named compounds
    df1 = pd.read_csv(filename, header = 9)
    df1 = df1[df1['Name'].isin(compounds_list)]
    
    #Get date and time from filename
    intro_string = 'WL_MicroGC-'
    start_pos = str(filename).find(intro_string) + len(intro_string)
    timestamp = filename[start_pos:start_pos+19]
    year = filename[start_pos: start_pos+4]
    month = filename[start_pos+5:start_pos+7]
    day = filename[start_pos+8:start_pos+10]
    hr = filename[start_pos+11:start_pos+13]
    minute = filename[start_pos+14:start_pos+16]
    second = filename[start_pos+17:start_pos+19]
    
    #Generate a new record to add to dataframe
    areas = []
    for i in compounds_list:
        if i in df1['Name'].values:
            areas.append(df1[df1['Name']==i]['Area'].to_numpy(dtype = 'float32')[0])
        else:
            areas.append(0)
            
    record = [timestamp, year, month, day, hr, minute, second] + areas
    return(record)

In [5]:
compounds_list = ['Hydrogen', 'Oxygen', 'Nitrogen', 'Carbon Dioxide', 'Carbon Monoxide']

In [6]:
df = summarize(compounds_list)

In [8]:
df.to_excel('GC Summary.xlsx')