In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import datetime

# pd.set_option('display.width', 1000)
pd.set_option('display.expand_frame_repr', False)
plt.rcParams["font.family"] = "serif"
plt.rcParams["font.size"] = 12
plt.rcParams.update({
    "text.usetex": True,
})

In [2]:
print(np.__version__)
print(datetime.datetime.now())

2.2.0
2024-12-19 13:17:35.738434


In [3]:
def load_data(path):
    """ read in excel sheets
        grab all .xlsx files in the dir specified by path """
    files_under_path = os.listdir(path)
    fnames = [f[:-5] for f in files_under_path if f.endswith('.xlsx')]

    all_files = {}
    for i in range(len(fnames)):
        file = pd.read_excel(f'{path}/{fnames[i]}.xlsx', sheet_name=None, engine='openpyxl')
        all_files[fnames[i]] = file

    return fnames, all_files

In [4]:
path = '../input_sheets/1227'
fnames, all_files = load_data(path)

In [5]:
def func_detail(df, fname, sheet_name,func_name, output_dir, actuall_run = False):
    """ get function with its arguments"""
    # get the elements with first column value = 'y'; 
    # when actuall_run = True, get the elements with first column value = 'y' only
    
    cols = df.columns
    if len(cols) == 0:
        with open(f'{output_dir}/log.txt', 'a') as f:
            f.write(f'{fname}: {sheet_name} - empty file\n\n')
        return None
    
    if not actuall_run:
        filt = (df[cols[0]] == 'y') | (df[cols[0]] == 'n')
    else:
        filt = (df[cols[0]] == 'y')
    
    if ('SampleStartLayoutAssembler' not in cols) or (func_name not in df['SampleStartLayoutAssembler'].unique()):
        # print(f'{func_name} not in {fname}: {sheet_name}')
        return None
    
    filt = filt & (df['SampleStartLayoutAssembler'] == func_name)
    df_func = df[filt]
    # remove the first column
    df_func = df_func.drop(columns=cols[0])

    if func_name == 'StartLayoutAssembler':
        arg_names = df_func.columns
    else:
        arg_names = df[df[cols[1]] == f'Sample{func_name}'].dropna(axis='columns', how='all').values[0]
    # print(f'here:{arg_names}')
    df_func.drop(columns=cols[1+len(arg_names):], inplace=True)
    df_func.columns = arg_names
    df_func = df_func.rename(columns={df_func.columns[0]: 'function'})
    df_func['file'] = fname
    df_func['sheet'] = sheet_name
    return df_func


In [6]:
func_detail(all_files['canon_1227_d3t7_jm5_siv_2x']['xy4'],'canon_1227_d3t7_jm5_siv_2x', 'xy4', 'xy_canon', path, actuall_run=True)

Unnamed: 0,function,cv.cvid,lpp.lpp,xy_cds.points,xy_lengths.points,xy_distance.points,stepping.list,shift.float,varname.declare,file,sheet
27,xy_canon,cv,SIV_mask.drawing,(1.6 1.6),(25 25),(28.4 28.4),((5)),,,canon_1227_d3t7_jm5_siv_2x,xy4
31,xy_canon,cv,SIV_mask.drawing,(1.8 1.8),(25 25),(28.2 28.2),((5)),,,canon_1227_d3t7_jm5_siv_2x,xy4
35,xy_canon,cv,SIV_mask.drawing,(2 2),(25 25),(28 28),((5)),,,canon_1227_d3t7_jm5_siv_2x,xy4


In [7]:
# func_detail(all_files['canon_1227_d3t7_jm5_siv_2x']['xy4'],'canon_1227_d3t7_jm5_siv_2x', 'xy4', 'create_instance', path, actuall_run=True)

In [8]:
# func_detail(all_files['canon_1227_d3t7_jm5_siv_2x']['xy4'],'canon_1227_d3t7_jm5_siv_2x', 'xy4', 'StartLayoutAssembler', path, actuall_run=True)

In [20]:
def func_with_args(input_path,actuall_run,func_name):
    # define dataframes
    fnames, all_files = load_data(input_path)
    list_of_dfs = []
    for fname in fnames:
        sheet_names = all_files[fname].keys()
        for sheet_name in sheet_names:
            df_sheet = all_files[fname][sheet_name]  # a single sheet in a single file
            df_func = func_detail(df_sheet, fname, sheet_name, func_name, input_path, actuall_run=actuall_run)
            if df_func is not None:
                # remove cols with all NaN values
                df_func.dropna(axis='columns', how='all', inplace=True)
                list_of_dfs.append(df_func)

    # concatenate all dataframes
    df = pd.concat(list_of_dfs)
    # remove row index
    df.reset_index(drop=True, inplace=True)
    # remove columns with all NaN values
    df.dropna(axis='columns', how='all', inplace=True)
    return df

In [13]:
df = func_with_args(path, True, func_name='xy_canon')

In [14]:
df.to_csv(f'{os.getcwd()}/xy_canon_{path[-4:]}.csv', index=False)

In [15]:
paths = ['../input_sheets/1222', '../input_sheets/1227', '../input_sheets/1276', '../input_sheets/1278', '../input_sheets/1280', '../input_sheets/1282']

In [16]:
def main(input_paths, func_name):
    output_path = f'{os.getcwd()}/{func_name}_analysis'
    if not os.path.exists(output_path):
        os.mkdir(output_path)
    for input_path in input_paths:
        df = func_with_args(input_path, True, func_name)
        df.to_csv(f'{output_path}/{func_name}_{input_path[-4:]}.csv', index=False)

In [17]:
main(paths, 'xy_canon')

In [21]:
main(paths, 'StartLayoutAssembler')

In [22]:
main(paths, 'create_instance')