In [25]:
import pandas as pd
import numpy as np

## Observations for 10 Trials

In [40]:
# load
all_sheets = pd.read_excel(
    '/project/data/compiled_results/Compiled Field Results  for DSI - 2023 Bulk 10 Trial Data.xlsx',
    sheet_name=None,
    skiprows=2)
mass = all_sheets['All Mass Residuals by Prod TAB']
mass.head()

# rename
new_names = {
    'Facility Name': 'facility_name',
    'Trial Stage': 'trial_stage',
    'Bag Set': 'bag_set',
    'Bag Number': 'bag_number',
}
mass = mass.rename(columns=new_names)
mass.head()

Unnamed: 0,facility_name,trial_stage,bag_set,bag_number,N,O,Q,V,B,D,...,K,K1,K2,K3,N.1,O.1,P,Q.1,S,V.1
0,Facility 1 ( Windrow),First Removal,A (blue),10,,,,,0.059,0.026,...,,0.986,,0.546,,,,,,
1,Facility 1 ( Windrow),First Removal,A (blue),6,,,,,0.022,0.175,...,,0.696,0.007,0.572,,,,,,
2,Facility 1 ( Windrow),First Removal,A (blue),7,,,,,0.018,0.013,...,,0.933,0.023,0.313,,,,,,
3,Facility 1 ( Windrow),First Removal,A (blue),8,,,,,0.22,0.424,...,,0.909,0.0,0.412,,,,,,
4,Facility 1 ( Windrow),First Removal,A (blue),9,,,,,0.028,0.015,...,,0.928,0.05,0.65,,,,,,


In [41]:
mass['facility_name'].unique()

array(['Facility 1 ( Windrow)', 'Facility 2 (CASP)', 'Facility 5 (EASP)',
       'Facility 6 (CASP)', 'Facility 7 (CASP)', 'Facility 8 (ASP)',
       'Facility 9 (EASP)', 'Facility 10 (Windrow)', 'Facility 3 (EASP)',
       'Facility 4 (In-Vessel)'], dtype=object)

In [42]:
# Create 'bag_ID'
mass['bag_ID'] = mass['bag_set'].astype(str) + '-' + mass['bag_number'].astype(str)
mass.drop(['bag_set', 'bag_number'], axis=1, inplace=True)

# Melt
melted_mass = pd.melt(mass,
                     id_vars=['facility_name', 'trial_stage', 'bag_ID'],
                     var_name='item_ID',
                     value_name='mass_resid')
melted_mass = melted_mass.dropna(subset=['mass_resid'])
melted_mass['item_ID'] = melted_mass['item_ID'].apply(lambda x: x.split('.')[0])

# Adjust 'bag_ID' and 'facility_ID'

melted_mass['bag_ID'] = melted_mass['bag_ID'].str.replace(r"\s*\([^)]*\)", "", regex=True)
facility_to_ID = {'Facility 1 ( Windrow)': '1',
                  'Facility 2 (CASP)': '2',
                  'Facility 3 (EASP)': '3',
                  'Facility 4 (In-Vessel)': '4',
                  'Facility 5 (EASP)': '5',
                  'Facility 6 (CASP)': '6',
                  'Facility 7 (CASP)': '7',
                  'Facility 8 (ASP)': '8',
                  'Facility 9 (EASP)': '9',
                  'Facility 10 (Windrow)': '10'
}

melted_mass['facility_ID'] = melted_mass['facility_name'].map(facility_to_ID)
melted_mass.drop(['facility_name'], axis=1, inplace=True)

# Get mass residual as a percentage
melted_mass['mass_resid_%'] = (melted_mass['mass_resid'] * 100).round(2)
melted_mass.head()

Unnamed: 0,trial_stage,bag_ID,item_ID,mass_resid,facility_ID,mass_resid_%
5,First Removal,B-10,N,0.922608,1,92.26
6,First Removal,B-6,N,0.742723,1,74.27
7,First Removal,B-7,N,0.831994,1,83.2
8,First Removal,B-8,N,0.992169,1,99.22
9,First Removal,B-9,N,0.79041,1,79.04


In [43]:
# Repeat for sa
sa = all_sheets['All SA ImagJ Resids by Prod TAB']
sa.head()

# Clean
sa['bag_ID'] = sa['Bag Set'].astype(str) + '-' + sa['Bag Number'].astype(str)
sa.drop(['Bag Set', 'Bag Number'], axis=1, inplace=True)

# Melt
melted_sa = pd.melt(sa, id_vars=['Facility Name', 'Trial Stage', 'bag_ID'],
                    var_name='item_ID',
                    value_name='sa_resid')
melted_sa = melted_sa.dropna(subset=['sa_resid'])

melted_sa['bag_ID'] = melted_sa['bag_ID'].str.replace(r"\s*\([^)]*\)", "", regex=True)
melted_sa['facility_ID'] = melted_sa['Facility Name'].map(facility_to_ID)
melted_sa.drop(['Facility Name'], axis=1, inplace=True)

melted_sa['item_ID'] = melted_sa['item_ID'].apply(lambda x: x.split('.')[0])
melted_sa['sa_resid_%'] = (melted_sa['sa_resid'] * 100).round(2)
melted_sa.rename(columns={'Trial Stage': 'trial_stage'}, inplace=True)

# Merge
merged_df = pd.merge(melted_mass,
                     melted_sa,
                     on=['facility_ID', 'trial_stage', 'bag_ID', 'item_ID'],
                     how='inner',
                     suffixes=('_mass', '_sa'))
merged_df['bag_ID'] = merged_df['bag_ID'].replace({"-": str("NaN")})

new_column_order = ['facility_ID','bag_ID', 'item_ID','trial_stage', 'mass_resid_%', 'sa_resid_%']
reordered_df = merged_df[new_column_order]
reordered_df.head()
reordered_df.to_csv('/project/data/compiled_results/observations.csv', index=False)

## CASP004-01 Trial

In [50]:
casp4 = pd.read_csv('/project/data/CASP004-01/masterfile.csv')
casp4.head()

Unnamed: 0,org_ID,trial_ID,bag_color,bag_set,bag_ID,bag_content,bag_type,bag_placement,trial_stage,product_name,...,fragment_size,photo_available,weight1,weight2,weight3,mean_weight_final_g,notes,%_not_decomposed,%_decomposed,outlier_alert
0,44547.0,44547-01-21,Green,A,A-1,Baseline,Standard,Top depth,Start,12 oz Hot cup / Soup bowl,...,"3’’ diameter, 2.5’’ H",True,8.12,8.1,8.12,8.11,,100.0,0.0,False
1,44547.0,44547-01-21,Green,A,A-2,Baseline,Standard,Top depth,Start,12 oz Hot cup / Soup bowl,...,"3’’ diameter, 2.5’’ H",True,8.12,8.1,8.12,8.11,,100.0,0.0,False
2,44547.0,44547-01-21,Green,A,A-3,Baseline,Standard,Top depth,Start,12 oz Hot cup / Soup bowl,...,"3’’ diameter, 2.5’’ H",True,8.12,8.1,8.12,8.11,,100.0,0.0,False
3,44547.0,44547-01-21,Green,A,A-4,Baseline,Standard,Top depth,Start,12 oz Hot cup / Soup bowl,...,"3’’ diameter, 2.5’’ H",True,8.12,8.1,8.12,8.11,,100.0,0.0,False
4,44547.0,44547-01-21,Green,A,A-5,Baseline,Standard,Bottom depth,Start,12 oz Hot cup / Soup bowl,...,"3’’ diameter, 2.5’’ H",True,8.12,8.1,8.12,8.11,,100.0,0.0,False


In [51]:
# Force format
casp4 = casp4[['org_ID', 'bag_ID',
               'product_name', 'trial_stage',
               'product_weight_init_g', 'mean_weight_final_g']]
casp4['org_ID'] = '7'
casp4.rename(columns={'org_ID': 'facility_ID'}, inplace=True)

def calculate_mass_resid_percent(row):
    return round((row['mean_weight_final_g'] / row['product_weight_init_g']) * 100, 2)
casp4['mass_resid_%'] = casp4.apply(calculate_mass_resid_percent, axis=1)
casp4['mass_resid_%'] = casp4.apply(calculate_mass_resid_percent, axis=1)

casp4.drop(columns=['product_weight_init_g', 'mean_weight_final_g'], inplace=True)
casp4['sa_resid_%'] = float('nan')
casp4.head()

Unnamed: 0,facility_ID,bag_ID,product_name,trial_stage,mass_resid_%,sa_resid_%
0,7,A-1,12 oz Hot cup / Soup bowl,Start,100.0,
1,7,A-2,12 oz Hot cup / Soup bowl,Start,100.0,
2,7,A-3,12 oz Hot cup / Soup bowl,Start,100.0,
3,7,A-4,12 oz Hot cup / Soup bowl,Start,100.0,
4,7,A-5,12 oz Hot cup / Soup bowl,Start,100.0,


In [52]:
casp4['trial_stage'].unique()
casp4 = casp4[casp4['trial_stage'] != 'Interval']
casp4['trial_stage'].replace({'Start': float('nan'),
                              'End': 'Second Removal'}, inplace=True)

casp4.head()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  casp4['trial_stage'].replace({'Start': float('nan'),


Unnamed: 0,facility_ID,bag_ID,product_name,trial_stage,mass_resid_%,sa_resid_%
0,7,A-1,12 oz Hot cup / Soup bowl,,100.0,
1,7,A-2,12 oz Hot cup / Soup bowl,,100.0,
2,7,A-3,12 oz Hot cup / Soup bowl,,100.0,
3,7,A-4,12 oz Hot cup / Soup bowl,,100.0,
4,7,A-5,12 oz Hot cup / Soup bowl,,100.0,


In [53]:
casp4['product_name'].unique()
name_to_ID =  {'12 oz Hot cup / Soup bowl': 'A74',
               'Hot cup lid' : 'A7',
               '16 oz PLA cold cup': 'A36',
               'Cutlery': 'A23',
               'PLA-lined fibre bowl, white ': 'Q',
               'Uncoated paper food tray ': 'A25', 
               'Lined paper food tray with lid': 'A21',
               'Kraft control': 'A42',
               'Fiber Clamshell, Lined 9x9x3 SKU TO-SC-U9L-LF': 'A38',
               '16oz NoTree Paper Hot Cup SKU CU-SU-16': 'A1',
               '14oz PLA Cold Cup SKU CP-CS-14': 'A48',
               'PLA Lid: 32oz Burrito Bowl SKU BOL-CS-UBB': 'A78',
               'Fiber Cutlery, Spoon SP-FB-6-LF': 'A39',
               '3Gallon Food Scrap Bag BG-CS-3': 'A2',
               '8" Kraft straw ST-PA-8-K': 'A3',
               'TPLA Spoon SP-PS-6': 'A60',
               'Wrapper for TPLA Spoon SP-PS-I': 'A81',
               'Large brown bag': 'A57',
               'Small zippered clear colour bag': 'A53',
               'Printed small bag with brown background and logo': 'A52',
               'Large natural clear colour bag': 'A43'} 

casp4['item_ID'] = casp4['product_name'].map(name_to_ID)
casp4.drop(['product_name'], axis=1, inplace=True)
casp4 = casp4[['facility_ID', 'bag_ID', 'item_ID', 'trial_stage', 'mass_resid_%', 'sa_resid_%']]
casp4.head()

Unnamed: 0,facility_ID,bag_ID,item_ID,trial_stage,mass_resid_%,sa_resid_%
0,7,A-1,A74,,100.0,
1,7,A-2,A74,,100.0,
2,7,A-3,A74,,100.0,
3,7,A-4,A74,,100.0,
4,7,A-5,A74,,100.0,


In [115]:
items = pd.read_csv('/project/data/items.csv')
items.head()

Unnamed: 0,item_id,item_id_10_trials,item_name,item_description,item_sku,brand,manufacturer,material_class_i,material_class_ii,material_class_iii,certification,kit,item_weight,weight_unit,item_dimensions,dimensions_unit,item_capacity,capacity_unit
0,A1,,PLA-lined Bamboo Hot Cup 16oz,16oz NoTree Paper Hot Cup SKU CU-SU-16,,World Centric™,World Centric™,Fiber,Lined Fiber,PLA-lined Bamboo Paper,BPI,Custom,13.523333,grams,5.5x2.5x3.5,inches,,milliliters
1,A2,,PBAT Bin Liner 3 gallon,3Gallon Food Scrap Bag BG-CS-3,,World Centric™,World Centric™,Biopolymer,Biopolymer Film/Bag,PBAT and corn starch,BPI,Custom,7.156667,grams,8.75x15x0.1,inches,,milliliters
2,A3,,"Kraft straw 8""",,,World Centric™,World Centric™,Fiber,Unlined Fiber,Paper,BPI,Custom,1.073333,grams,8x0.250.25,inches,,milliliters
3,A4,,Multi-laminate stand-up pounch with zipper,Alter Eco Quinoa Bag,,Alter Eco,Alter Eco,Biopolymer,Biopolymer Film/Bag,Not disclosed,,Custom,9.0,grams,,inches,,milliliters
4,K,K3,Bagasse Clamshell 9x9,Fiber Clamshell,,Not disclosed,Not disclosed,Fiber,Unlined Fiber,Unlined Sugarcane Fibre (bagasse) PFAS Free,Certified BPI and/or TUV,CLP,45.19,grams,,inches,,milliliters


In [117]:
# Concat 10 trials & CASP
reordered_df = pd.concat([reordered_df, casp4], ignore_index=True)
# reordered_df.to_csv('/project/data/compiled_results/observations.csv', index=False)


## 5 Trials

In [118]:
# load all files
five_trials = pd.read_excel(
    '/project/data/compiled_results/raw/Compiled Field Results - CFTP Gathered Data.xlsx',
    sheet_name=None,
    skiprows=0)
ad001 = five_trials['AD001-01']
wr001 = five_trials['WR001-01']
wr003 = five_trials['WR003-01']
casp001 = five_trials['CASP001-01']
casp003 = five_trials['CASP003-01']

In [120]:
# Process WR003-01 Trial 

wr003_standardized = wr003[['Trial ID', 'Trial Bag ID',
                            'Item ID', 'Final Residual Weight - wet']].copy()
wr003_standardized.rename(columns={
                            'Trial ID': 'facility_name',
                            'Trial Bag ID': 'bag_ID',
                            'Item ID': 'item_ID',
                            'Final Residual Weight - wet': 'mass_resid_%'
}, inplace=True)

# Force Format
wr003_standardized['facility_name'] = 'Windrow'
wr003_standardized['trial_stage'] = np.nan
wr003_standardized['sa_resid_%'] = np.nan

# Reorder
column_order = ['facility_name', 'bag_ID',
                'item_ID', 'trial_stage',
                'mass_resid_%', 'sa_resid_%']
wr003_standardized = wr003_standardized[column_order]
wr003_standardized.head()

Unnamed: 0,facility_name,bag_ID,item_ID,trial_stage,mass_resid_%,sa_resid_%
0,Windrow,Blue Zip Tie #1,1D,,4.76,
1,Windrow,Blue Zip Tie #1,1F,,17.76,
2,Windrow,Blue Zip Tie #1,1H,,0.59,
3,Windrow,Blue Zip Tie #1,1G,,12.7,
4,Windrow,Blue Zip Tie #1,1B,,7.86,


In [121]:
# Process CASP003-01 Trial
casp003_standardized = casp003[['Trial ID', 'Trial Bag ID',
                                'Item Description Refined',
                                'Initial Item Weight - Aggregate',
                                'Final Residual Weight - wet - aggregate']].copy()

# Calculate mass_resid_%
def calculate_mass_resid(row):
    final_mass = row['Final Residual Weight - wet - aggregate']
    initial_mass = row['Initial Item Weight - Aggregate']
    return round((final_mass / initial_mass) * 100, 2)

casp003_standardized['mass_resid_%'] = casp003_standardized.apply(calculate_mass_resid, axis=1)
casp003_standardized.drop(['Final Residual Weight - wet - aggregate',
                           'Initial Item Weight - Aggregate'], axis=1, inplace=True)

# Force Format
casp003_standardized.rename(columns={'Trial ID': 'facility_name',
                                     'Trial Bag ID': 'bag_ID'}, inplace=True)
casp003_standardized['facility_name'] = 'CASP'
casp003_standardized['trial_stage'] = np.nan
casp003_standardized['sa_resid_%'] = np.nan

# Link names to IDs
name_to_ID = {'Fabrikal 16 oz PLA cold cup': 'A11',
               '2-ply Kraft Control 10"x5"': 'A75',
               'BÉSICS® 8 oz Soup Bowl' : 'A62',
               'Alter Eco Quinoa SUP 2018': 'A30',
               'BESICS 8oz Hot cup lid': 'K',
               'BÉSICS® Lined Paper Box with Lid': 'A7',
               'BÉSICS® Uncoated paper fry tray': 'Q',
               'BÉSICS® Spoon 6"': 'A8',
               'Elk Packaging SUP with white outer 2018': 'A32',
               'BÉSICS® Fibreware Clamshell 850mL': 'A65',
               'Straw': 'K2',
               'Vegware Fork ': 'A42',
               'Vegware Spoon': 'I'}
casp003_standardized['item_ID'] = casp003_standardized['Item Description Refined'].map(name_to_ID)
casp003_standardized.drop('Item Description Refined', axis=1, inplace=True)

# Reorder
column_order = ['facility_name', 'bag_ID', 'item_ID', 'trial_stage', 'mass_resid_%', 'sa_resid_%']
casp003_standardized = casp003_standardized[column_order]

casp003_standardized.head()

Unnamed: 0,facility_name,bag_ID,item_ID,trial_stage,mass_resid_%,sa_resid_%
0,CASP,A1,A11,,0.0,
1,CASP,A2,A11,,0.0,
2,CASP,A3,A11,,0.0,
3,CASP,A1,A75,,160.0,
4,CASP,A2,A75,,140.0,


In [122]:
# Process CASP001-01 Trial
casp001_standardized = casp001[['Trial ID', 'Trial Bag ID',
                                'Item Description Refined',
                                'Residual Item Weight - Wet']].copy()
# Keeping wet weight only, otherwise not comparable to all other results

casp001_standardized.rename(columns={'Trial ID': 'facility_name',
                                     'Trial Bag ID': 'bag_ID',
                                     'Residual Item Weight - Wet': 'final_mass'}, inplace=True)
casp001_standardized['facility_name'] = 'CASP'
casp001_standardized['trial_stage'] = np.nan
casp001_standardized['sa_resid_%'] = np.nan

name_to_ID = {'BÉSICS® 12 oz Soup bowl': 'A1',
              'BÉSICS® Sleeve': 'A67',
              'BÉSICS® Wrap': 'A68',
              'CPLA Fork 6" - Stalk Market': 'A69',
              'D&W 32oz Square PLA Box': 'A71',
              'Ecotainer PLA-Lined Soup Bowl 12oz': 'A73',
              'Fabrikal PLA Cold Cup 20oz': 'A74',
              'Kraft Control 10"x5" 2-ply': 'A75',
              'BÉSICS® 8oz CPLA Hot cup lid': 'K',
              'BÉSICS® Fibreware Bowl 16oz': 'A64',
              'PLA Foam Tray': 'A77',
              'BÉSICS® Cellulose bag 5x7in': 'A63',
              'CPLA Knife 6" - Stalk Market': 'A70',
              'D&W PLA Lid 32oz': 'A72',
              'BÉSICS® Fibreware Clamshell 9x9': 'A66',
              'MPLA Spoon - NaturTec': 'A76', 
              'SPP Unlined Paper Tray (hot dog tray)': 'A78'}

casp001_standardized['item_ID'] = casp001_standardized['Item Description Refined'].map(name_to_ID)
casp001_standardized.drop('Item Description Refined', axis=1, inplace=True)

def calculate_mass_resid(casp001, items):
    merged_df = casp001.merge(items, left_on='item_ID', right_on='item_id')
    merged_df['mass_resid_%'] = round((merged_df['final_mass'] / merged_df['item_weight']) * 100, 2)
    return merged_df['mass_resid_%']

casp001_standardized['mass_resid_%'] = calculate_mass_resid(casp001_standardized,
                                                            items)

# Reorder
column_order = ['facility_name', 'bag_ID', 'item_ID', 'trial_stage', 'mass_resid_%', 'sa_resid_%']
casp001_standardized = casp001_standardized[column_order]
casp001_standardized.head()

Unnamed: 0,facility_name,bag_ID,item_ID,trial_stage,mass_resid_%,sa_resid_%
0,CASP,ST R1 H8,A1,,0.0,
1,CASP,ST R1 H8,A67,,6.64,
2,CASP,ST R1 H8,A68,,0.0,
3,CASP,ST R1 H8,A69,,0.0,
4,CASP,ST R1 H8,A71,,0.0,


In [123]:
# Preprocess Trial WR001-01

wr001_standardized = wr001[['Trial ID', 'Trial Bag ID',
                                'Item Description Refined',
                                'Residual Item Weight - Wet']].copy()
# Keeping wet weight only, otherwise not comparable to all other results

wr001_standardized.rename(columns={'Trial ID': 'facility_name',
                                     'Trial Bag ID': 'bag_ID',
                                     'Residual Item Weight - Wet': 'final_mass'}, inplace=True)
wr001_standardized['facility_name'] = 'Windrow'
wr001_standardized['trial_stage'] = np.nan
wr001_standardized['sa_resid_%'] = np.nan

# name_to_ID
wr001_standardized['item_ID'] = wr001_standardized['Item Description Refined'].map(name_to_ID)
wr001_standardized.drop('Item Description Refined', axis=1, inplace=True)

# calculate mass_resid_%
wr001_standardized['mass_resid_%'] = calculate_mass_resid(wr001_standardized,
                                                            items)
# Reorder
column_order = ['facility_name', 'bag_ID', 'item_ID', 'trial_stage', 'mass_resid_%', 'sa_resid_%']
wr001_standardized = wr001_standardized[column_order]

wr001_standardized.head()

Unnamed: 0,facility_name,bag_ID,item_ID,trial_stage,mass_resid_%,sa_resid_%
0,Windrow,BL1,A1,,63.22,
1,Windrow,CL3,A1,,49.77,
2,Windrow,E2,A1,,59.16,
3,Windrow,E4,A1,,58.86,
4,Windrow,H1,A1,,,


In [124]:
ad001.columns = ad001.iloc[0]
ad001 = ad001.drop(ad001.index[0])
ad001.head()

Unnamed: 0,Trial ID,Trial Bag Set,Trial Bag ID,Item Description From Trial,Item Description Refined,Load Concentration,Number of Items per bag,Residual Item Weight - Wet,Residual Weight - Oven-dry
1,AD001-01,Set A,AD T10 H7,CPLA Lid,BÉSICS® 8oz CPLA Hot cup lid,HIGH,2,6.68,6.3
2,AD001-01,Set A,AD T7 H3,CPLA Lid,BÉSICS® 8oz CPLA Hot cup lid,HIGH,2,0.0,0.0
3,AD001-01,Set A,AD T8 H1,CPLA Lid,BÉSICS® 8oz CPLA Hot cup lid,HIGH,2,5.29,5.29
4,AD001-01,Set A,AD T8 L3,CPLA Lid,BÉSICS® 8oz CPLA Hot cup lid,LOW,1,4.18,3.0
5,AD001-01,Set B,AD T10 H8,BESICS Bowl,BÉSICS® 12 oz Soup bowl,HIGH,2,23.21,18.57


In [125]:
# Preprocess AD001-01 Trial

ad001_standardized = ad001[['Trial ID', 'Trial Bag ID',
                                'Item Description Refined',
                                'Residual Item Weight - Wet']].copy()
# Keeping wet weight only, otherwise not comparable to all other results

ad001_standardized.rename(columns={'Trial ID': 'facility_name',
                                    'Trial Bag ID': 'bag_ID',
                                    'Residual Item Weight - Wet': 'final_mass'}, inplace=True)
ad001_standardized['facility_name'] = 'AD001'
ad001_standardized['trial_stage'] = np.nan
ad001_standardized['sa_resid_%'] = np.nan

# name_to_ID
ad001_standardized['item_ID'] = ad001_standardized['Item Description Refined'].map(name_to_ID)
ad001_standardized.drop('Item Description Refined', axis=1, inplace=True)

# # calculate mass_resid_%
ad001_standardized['final_mass'] = ad001_standardized['final_mass'].replace('See spoon', np.nan)
ad001_standardized['mass_resid_%'] = calculate_mass_resid(ad001_standardized,
                                                          items)
# Reorder
column_order = ['facility_name', 'bag_ID', 'item_ID', 'trial_stage', 'mass_resid_%', 'sa_resid_%']
ad001_standardized = ad001_standardized[column_order]

ad001_standardized.head()

  ad001_standardized['final_mass'] = ad001_standardized['final_mass'].replace('See spoon', np.nan)


Unnamed: 0,facility_name,bag_ID,item_ID,trial_stage,mass_resid_%,sa_resid_%
1,AD001,AD T10 H7,K,,0.0,
2,AD001,AD T7 H3,K,,11.71,
3,AD001,AD T8 H1,K,,9.25,
4,AD001,AD T8 L3,K,,171.63,
5,AD001,AD T10 H8,A1,,55.61,


In [126]:
# Concatenate the 5 trials

reordered_df = pd.concat([reordered_df, ad001_standardized], ignore_index=True)
reordered_df = pd.concat([reordered_df, casp001_standardized], ignore_index=True)
reordered_df = pd.concat([reordered_df, casp003_standardized], ignore_index=True)
reordered_df = pd.concat([reordered_df, wr001_standardized], ignore_index=True)
reordered_df = pd.concat([reordered_df, wr003_standardized], ignore_index=True)

reordered_df.to_csv('/project/data/compiled_results/processed/observations_compiled.csv', index=False)