In [63]:
import pandas as pd
import numpy as np

## Observations for 10 Trials

In [64]:
# load
all_sheets = pd.read_excel(
    '../data/compiled_results/raw/Compiled Field Results  for DSI - 2023 Bulk 10 Trial Data.xlsx',
    sheet_name=None,
    skiprows=2)
mass = all_sheets['All Mass Residuals by Prod TAB']
items = pd.read_csv('../data/finalized_datasets/items.csv')

# rename
new_names = {
    'Facility Name': 'facility_name',
    'Trial Stage': 'trial_stage',
    'Bag Set': 'bag_set',
    'Bag Number': 'bag_number',
}
mass = mass.rename(columns=new_names)
mass.head()

Unnamed: 0,facility_name,trial_stage,bag_set,bag_number,N,O,Q,V,B,D,...,K,K1,K2,K3,N.1,O.1,P,Q.1,S,V.1
0,Facility 1 ( Windrow),First Removal,A (blue),10,,,,,0.059,0.026,...,,0.986,,0.546,,,,,,
1,Facility 1 ( Windrow),First Removal,A (blue),6,,,,,0.022,0.175,...,,0.696,0.007,0.572,,,,,,
2,Facility 1 ( Windrow),First Removal,A (blue),7,,,,,0.018,0.013,...,,0.933,0.023,0.313,,,,,,
3,Facility 1 ( Windrow),First Removal,A (blue),8,,,,,0.22,0.424,...,,0.909,0.0,0.412,,,,,,
4,Facility 1 ( Windrow),First Removal,A (blue),9,,,,,0.028,0.015,...,,0.928,0.05,0.65,,,,,,


## Mass residuals

In [65]:
# Create 'bag_ID'
mass['bag_ID'] = mass['bag_set'].astype(str) + '-' + mass['bag_number'].astype(str)
mass.drop(['bag_set', 'bag_number'], axis=1, inplace=True)

# Melt
melted_mass = pd.melt(mass,
                     id_vars=['facility_name', 'trial_stage', 'bag_ID'],
                     var_name='item_ID',
                     value_name='mass_resid')
melted_mass = melted_mass.dropna(subset=['mass_resid'])
melted_mass['item_ID'] = melted_mass['item_ID'].apply(lambda x: x.split('.')[0])

# Adjust 'bag_ID' and 'facility_ID'

melted_mass['bag_ID'] = melted_mass['bag_ID'].str.replace(r"\s*\([^)]*\)", "", regex=True)
facility_to_ID = {'Facility 1 ( Windrow)': '1',
                  'Facility 2 (CASP)': '2',
                  'Facility 3 (EASP)': '3',
                  'Facility 4 (In-Vessel)': '4',
                  'Facility 5 (EASP)': '5',
                  'Facility 6 (CASP)': '6',
                  'Facility 7 (CASP)': '7',
                  'Facility 8 (ASP)': '8',
                  'Facility 9 (EASP)': '9',
                  'Facility 10 (Windrow)': '10',
                  'AD001': '11',
                  'WR001': '12',
                  'CASP001': '13',
                  'CASP003': '14',
                  'WR003': '15'}

melted_mass['facility_ID'] = melted_mass['facility_name'].map(facility_to_ID)
melted_mass.drop(['facility_name'], axis=1, inplace=True)

name_to_ID = {'N': '14',
              'O': '16',
              'Q': '21',
              'V': '15',
              'B': '19',
              'D': '25',
              'H': '26',
              'I': '44',
              'J': '23',
              'K': '5',
              'K1': '20',
              'K2': '22',
              'K3': '13',
              'P': '17',
              'S': '24'}
melted_mass['item_ID'] = melted_mass['item_ID'].map(name_to_ID)

add_trial_ID = {'1': 'WR004-01',
                '2': 'CASP005-01',
                '3': 'EASP001-01',
                '4': 'IV002-01',
                '5': 'EASP002-01',
                '6': 'CASP006-01',
                '7': 'CASP004-02',
                '8': 'ASP001-01',
                '9': 'EASP003-01',
                '10': 'WR005-01'}
# melt
melted_mass['trial_ID'] = melted_mass['facility_ID'].map(add_trial_ID)

# Get mass residual as a percentage
melted_mass['mass_resid_%'] = (melted_mass['mass_resid'] * 100).round(2)
melted_mass.head()

Unnamed: 0,trial_stage,bag_ID,item_ID,mass_resid,facility_ID,trial_ID,mass_resid_%
5,First Removal,B-10,14,0.922608,1,WR004-01,92.26
6,First Removal,B-6,14,0.742723,1,WR004-01,74.27
7,First Removal,B-7,14,0.831994,1,WR004-01,83.2
8,First Removal,B-8,14,0.992169,1,WR004-01,99.22
9,First Removal,B-9,14,0.79041,1,WR004-01,79.04


## Surface Area residuals

In [66]:
sa = all_sheets['All SA ImagJ Resids by Prod TAB']
sa = sa.rename(columns=new_names)
sa.columns

# # Create 'bag_ID'
sa['bag_ID'] = sa['bag_set'].astype(str) + '-' + sa['bag_number'].astype(str)
sa.drop(['bag_set', 'bag_number'], axis=1, inplace=True)

sa.head()

Unnamed: 0,facility_name,trial_stage,N,O,Q,V,B,D,H,I,...,K1,K2,K3,N.1,O.1,P,Q.1,S,V.1,bag_ID
0,Facility 1 ( Windrow),First Removal,,,,,0.244,0.039,0.282,0.755,...,0.618,0.233,0.225,,,,,,,A (blue)-10
1,Facility 1 ( Windrow),First Removal,,,,,0.075,0.237,0.429,0.7,...,0.579,0.023,0.197,,,,,,,A (blue)-6
2,Facility 1 ( Windrow),First Removal,,,,,0.08,0.015,0.073,0.877,...,0.828,0.061,0.195,,,,,,,A (blue)-7
3,Facility 1 ( Windrow),First Removal,,,,,0.195,0.62,0.687,0.731,...,0.605,0.0,0.272,,,,,,,A (blue)-8
4,Facility 1 ( Windrow),First Removal,,,,,0.117,0.02,0.072,,...,0.786,0.146,0.271,,,,,,,A (blue)-9


In [67]:
# Melt
melted_sa = pd.melt(sa,
                    id_vars=['facility_name', 'trial_stage', 'bag_ID'],
                    var_name='item_ID',
                    value_name='sa_resid')
melted_sa = melted_sa.dropna(subset=['sa_resid'])


melted_sa['bag_ID'] = melted_sa['bag_ID'].str.replace(r"\s*\([^)]*\)", "", regex=True)
melted_sa['facility_ID'] = melted_sa['facility_name'].map(facility_to_ID)
melted_sa.drop(['facility_name'], axis=1, inplace=True)

melted_sa['item_ID'] = melted_sa['item_ID'].map(name_to_ID)
melted_sa['trial_ID'] = melted_sa['facility_ID'].map(add_trial_ID)
melted_sa['sa_resid_%'] = (melted_sa['sa_resid'] * 100).round(2)
melted_sa.head()

Unnamed: 0,trial_stage,bag_ID,item_ID,sa_resid,facility_ID,trial_ID,sa_resid_%
5,First Removal,B-10,14,0.923651,1,WR004-01,92.37
6,First Removal,B-6,14,0.716393,1,WR004-01,71.64
7,First Removal,B-7,14,0.842365,1,WR004-01,84.24
8,First Removal,B-8,14,0.814947,1,WR004-01,81.49
9,First Removal,B-9,14,0.609835,1,WR004-01,60.98


In [68]:
# Merge
merged_df = pd.merge(melted_mass,
                     melted_sa,
                     on=['facility_ID', 'trial_stage', 'bag_ID', 'item_ID', 'trial_ID'],
                     how='inner',
                     suffixes=('_mass', '_sa'))
merged_df['bag_ID'] = merged_df['bag_ID'].replace({"-": np.nan})
merged_df['treated_mass_resid_%'] = np.nan

new_column_order = ['facility_ID', 'trial_ID', 'bag_ID', 'item_ID','trial_stage', 'mass_resid_%', 'sa_resid_%', 'treated_mass_resid_%']
reordered_df = merged_df[new_column_order]
# reordered_df.to_csv('../data/compiled_results/processed/observations.csv', index=False)

reordered_df.head()

Unnamed: 0,facility_ID,trial_ID,bag_ID,item_ID,trial_stage,mass_resid_%,sa_resid_%,treated_mass_resid_%
0,1,WR004-01,B-10,14,First Removal,92.26,92.37,
1,1,WR004-01,B-10,14,First Removal,92.3,92.37,
2,1,WR004-01,B-6,14,First Removal,74.27,71.64,
3,1,WR004-01,B-6,14,First Removal,74.3,71.64,
4,1,WR004-01,B-7,14,First Removal,83.2,84.24,


## CASP004-01 Trial

In [69]:
casp4 = pd.read_csv('../data/CASP004-01/masterfile.csv')
casp4.head()

Unnamed: 0,org_ID,trial_ID,bag_color,bag_ID,bag_content,bag_type,bag_placement,trial_stage,product_name,product_brand,...,fragment_size,photo_available,weight1,weight2,weight3,mean_weight_final_g,notes,%_not_decomposed,%_decomposed,outlier_alert
0,44547.0,44547-01-21,Green,A-1,Baseline,Standard,Top depth,Start,12 oz Hot cup / Soup bowl,BÉSICS®,...,"3’’ diameter, 2.5’’ H",True,8.12,8.1,8.12,8.11,,100.0,0.0,False
1,44547.0,44547-01-21,Green,A-2,Baseline,Standard,Top depth,Start,12 oz Hot cup / Soup bowl,BÉSICS®,...,"3’’ diameter, 2.5’’ H",True,8.12,8.1,8.12,8.11,,100.0,0.0,False
2,44547.0,44547-01-21,Green,A-3,Baseline,Standard,Top depth,Start,12 oz Hot cup / Soup bowl,BÉSICS®,...,"3’’ diameter, 2.5’’ H",True,8.12,8.1,8.12,8.11,,100.0,0.0,False
3,44547.0,44547-01-21,Green,A-4,Baseline,Standard,Top depth,Start,12 oz Hot cup / Soup bowl,BÉSICS®,...,"3’’ diameter, 2.5’’ H",True,8.12,8.1,8.12,8.11,,100.0,0.0,False
4,44547.0,44547-01-21,Green,A-5,Baseline,Standard,Bottom depth,Start,12 oz Hot cup / Soup bowl,BÉSICS®,...,"3’’ diameter, 2.5’’ H",True,8.12,8.1,8.12,8.11,,100.0,0.0,False


In [70]:
# Force format
casp4 = casp4[['org_ID', 'trial_ID', 'bag_ID',
               'product_name', 'trial_stage',
               'product_weight_init_g', 'mean_weight_final_g']]

casp4 = casp4.assign(org_ID='7').rename(columns={'org_ID': 'facility_ID'})
casp4 = casp4.assign(trial_ID='CASP004-01')

def calculate_mass_resid_percent(row):
    """
    Calculates the percentage of mass residue from initial to final weight.

    Parameters:
    - row (dict): A row containing 'mean_weight_final_g' and 'product_weight_init_g' values.

    Returns:
    - float: The mass residue percentage, rounded to 2 decimal places.

    This function computes the mass residue as a percentage by dividing the final mean weight by the initial product weight and multiplying by 100.
    """
    return round((row['mean_weight_final_g'] / row['product_weight_init_g']) * 100, 2)
casp4['mass_resid_%'] = casp4.apply(calculate_mass_resid_percent, axis=1)
casp4.drop(columns=['product_weight_init_g', 'mean_weight_final_g'], inplace=True)

casp4['sa_resid_%'] = np.nan
casp4['treated_mass_resid_%'] = np.nan
casp4.head()

Unnamed: 0,facility_ID,trial_ID,bag_ID,product_name,trial_stage,mass_resid_%,sa_resid_%,treated_mass_resid_%
0,7,CASP004-01,A-1,12 oz Hot cup / Soup bowl,Start,100.0,,
1,7,CASP004-01,A-2,12 oz Hot cup / Soup bowl,Start,100.0,,
2,7,CASP004-01,A-3,12 oz Hot cup / Soup bowl,Start,100.0,,
3,7,CASP004-01,A-4,12 oz Hot cup / Soup bowl,Start,100.0,,
4,7,CASP004-01,A-5,12 oz Hot cup / Soup bowl,Start,100.0,,


In [71]:
casp4 = casp4[casp4['trial_stage'] != 'Interval']
casp4 = casp4[casp4['trial_stage'] != 'Start']
casp4['trial_stage'] = casp4['trial_stage'].replace({'End': 'Second Removal'})
casp4.head()


Unnamed: 0,facility_ID,trial_ID,bag_ID,product_name,trial_stage,mass_resid_%,sa_resid_%,treated_mass_resid_%
12,7,CASP004-01,A-1,12 oz Hot cup / Soup bowl,Second Removal,0.0,,
13,7,CASP004-01,A-2,12 oz Hot cup / Soup bowl,Second Removal,0.0,,
14,7,CASP004-01,A-3,12 oz Hot cup / Soup bowl,Second Removal,187.18,,
15,7,CASP004-01,A-4,12 oz Hot cup / Soup bowl,Second Removal,204.44,,
16,7,CASP004-01,A-5,12 oz Hot cup / Soup bowl,Second Removal,0.0,,


In [72]:
name_to_ID =  {'12 oz Hot cup / Soup bowl': '53',
               'Hot cup lid' : '2',
               '16 oz PLA cold cup': '11',
               'Cutlery': '34',
               'PLA-lined fibre bowl, white ': '4',
               'Uncoated paper food tray ': '10', 
               'Lined paper food tray with lid': '7',
               'Kraft control': '75',
               'Fiber Clamshell, Lined 9x9x3 SKU TO-SC-U9L-LF': '33',
               '16oz NoTree Paper Hot Cup SKU CU-SU-16': '27',
               '14oz PLA Cold Cup SKU CP-CS-14': '36',
               'PLA Lid: 32oz Burrito Bowl SKU BOL-CS-UBB': '58',
               'Fiber Cutlery, Spoon SP-FB-6-LF': '34',
               '3Gallon Food Scrap Bag BG-CS-3': '28',
               '8" Kraft straw ST-PA-8-K': '29',
               'TPLA Spoon SP-PS-6': '40',
               'Wrapper for TPLA Spoon SP-PS-I': '61',
               'Large brown bag': '39',
               'Small zippered clear colour bag': '38',
               'Printed small bag with brown background and logo': '37',
               'Large natural clear colour bag': '35'} 

casp4['item_ID'] = casp4['product_name'].map(name_to_ID)
casp4.drop(['product_name'], axis=1, inplace=True)
casp4 = casp4[['facility_ID', 'trial_ID', 'bag_ID', 'item_ID', 'trial_stage', 'mass_resid_%', 'sa_resid_%', 'treated_mass_resid_%']]
casp4.head()

Unnamed: 0,facility_ID,trial_ID,bag_ID,item_ID,trial_stage,mass_resid_%,sa_resid_%,treated_mass_resid_%
12,7,CASP004-01,A-1,53,Second Removal,0.0,,
13,7,CASP004-01,A-2,53,Second Removal,0.0,,
14,7,CASP004-01,A-3,53,Second Removal,187.18,,
15,7,CASP004-01,A-4,53,Second Removal,204.44,,
16,7,CASP004-01,A-5,53,Second Removal,0.0,,


In [73]:
# Concat 10 trials & CASP
reordered_df = pd.concat([reordered_df, casp4], ignore_index=True)
# reordered_df.to_csv('/project/data/compiled_results/observations.csv', index=False)

reordered_df.tail()

Unnamed: 0,facility_ID,trial_ID,bag_ID,item_ID,trial_stage,mass_resid_%,sa_resid_%,treated_mass_resid_%
2226,7,CASP004-01,B-2,61,Second Removal,0.0,,
2227,7,CASP004-01,B-3,61,Second Removal,67.07,,
2228,7,CASP004-01,B-4,61,Second Removal,156.1,,
2229,7,CASP004-01,B-5,61,Second Removal,0.0,,
2230,7,CASP004-01,B-6,61,Second Removal,102.44,,


## 5 Trials

In [74]:
# load all files
five_trials = pd.read_excel(
    '../data/compiled_results/raw/Compiled Field Results - CFTP Gathered Data.xlsx',
    sheet_name=None,
    skiprows=0)
ad001 = five_trials['AD001-01']
wr001 = five_trials['WR001-01']
wr003 = five_trials['WR003-01']
casp001 = five_trials['CASP001-01']
casp003 = five_trials['CASP003-01']

In [75]:
wr003.head()

Unnamed: 0,Trial ID,Trial Bag ID,Trial Bag Colour,Item ID,Item Description From Trial,Item Description Refined,Number of Items per bag,Fragments Found Y/N,Final Residual Weight - wet,Fragment size (L x W x H),Notes on Final Weight
0,OWR003-01,Blue Zip Tie #1,Blue,1D,CPLA utensil corn starch,WR3 - CPLA Spoon,1,Y,4.76,6x1.5x1,"good disintegration, in 3 fragments"
1,OWR003-01,Blue Zip Tie #1,Blue,1F,Coffee Bag 1,WR3 - Coffee Bag 1,1,Y,17.76,8x7x0.5,entire bag intact
2,OWR003-01,Blue Zip Tie #1,Blue,1H,Agave straw,WR3 - Agave Straw,1,Y,0.59,8x0.3x0.3,entire straw intact
3,OWR003-01,Blue Zip Tie #1,Blue,1G,Coffee Bag 2,WR3 - Coffee Bag 2,1,Y,12.7,6.5x6x0.5,inner plastic zipper and plastic did not degra...
4,OWR003-01,Blue Zip Tie #1,Blue,1B,"Portion cup, PLA clear",WR3 - PLA portion cup 16oz,1,Y,7.86,4x4x1,no notes


In [76]:
# Process WR003-01 Trial 
wr003_standardized = wr003[['Trial ID', 'Trial Bag ID',
                            'Item ID', 'Final Residual Weight - wet']].copy()
wr003_standardized.rename(columns={
                            'Trial ID': 'trial_ID',
                            'Trial Bag ID': 'bag_ID',
                            'Item ID': 'item_ID',
                            'Final Residual Weight - wet': 'final_mass'
}, inplace=True)

# Force Format
wr003_standardized['facility_ID'] = '15'
wr003_standardized['trial_stage'] = 'Second Removal'
wr003_standardized['sa_resid_%'] = np.nan

# Fix item IDs
ID_to_ID_correct = {'1D': '52',
                    '1F': '48',
                    '1H': '46',
                    '1G': '49',
                    '1B': '56',
                    '1A': '57',
                    '1C': '47',
                    '1E': '53',
                    '1I': 'no-match', #could not find a match
                    '2.D2': '50',
                    '2.A2': '54',
                    '2.B2': '60',
                    '2.C2': '58',
                    '2.E2': '51', # could not find a match (suppose a 200 was a mis-spel)
                    '2.F2': '45',
                    '2.G2': '59',
                    '2.H2': '55',
                    '2.I2': 'no-match'} #could not find a match
wr003_standardized['item_ID'] = wr003_standardized['item_ID'].map(ID_to_ID_correct)

# Calculate mass residuals
def calculate_mass_resid(trial, items):
    """
    Calculates mass residue percentages for items in a trial by merging trial and item data.

    Parameters:
    - trial (pandas.DataFrame): DataFrame containing trial data with 'item_ID' and 'final_mass' columns.
    - items (pandas.DataFrame): DataFrame containing item data with 'item_id' and 'item_weight' columns.

    Returns:
    - pandas.Series: A Series containing the mass residue percentages for each item, rounded to 2 decimal places.

    The function merges the trial and item DataFrames on 'item_ID' and 'item_id', respectively, computes the mass residue percentage for each item, and returns the calculated percentages.
    """
    items['item_id'] = items['item_id'].astype(str)
    merged_df = trial.merge(items, left_on='item_ID', right_on='item_id')
    merged_df['mass_resid_%'] = round((merged_df['final_mass'] / merged_df['item_weight']) * 100, 2)
    return merged_df['mass_resid_%']
wr003_standardized['mass_resid_%'] = calculate_mass_resid(wr003_standardized,items)
wr003_standardized['treated_mass_resid_%'] = np.nan

# Reorder
column_order = ['facility_ID', 'trial_ID', 'bag_ID',
                'item_ID', 'trial_stage',
                'mass_resid_%', 'sa_resid_%', 'treated_mass_resid_%']
wr003_standardized = wr003_standardized[column_order]
wr003_standardized.head()

Unnamed: 0,facility_ID,trial_ID,bag_ID,item_ID,trial_stage,mass_resid_%,sa_resid_%,treated_mass_resid_%
0,15,OWR003-01,Blue Zip Tie #1,52,Second Removal,104.23,,
1,15,OWR003-01,Blue Zip Tie #1,48,Second Removal,51.9,,
2,15,OWR003-01,Blue Zip Tie #1,46,Second Removal,0.0,,
3,15,OWR003-01,Blue Zip Tie #1,49,Second Removal,30.44,,
4,15,OWR003-01,Blue Zip Tie #1,56,Second Removal,0.0,,


In [77]:
wr003_standardized['item_ID'].unique() # there are some remaining nans

array(['52', '48', '46', '49', '56', '57', '47', '53', 'no-match', '50',
       '54', '60', '58', '51', '45', '59', '55', nan], dtype=object)

In [78]:
casp003.head()

Unnamed: 0,Trial ID,Trial Bag ID,Trial Bag Colour,Bag Set Detail,Item Description From Trial,Item Description Refined,Number of Items per bag,Initial Item Weight - Aggregate,Final Residual Weight - wet - aggregate,Weight units,Initial Per-Item Weight,Notes on Final Weight
0,CASP003-01,A1,Black,CFTP baseline,Plastic Cup,Fabrikal 16 oz PLA cold cup,1,15,0.0,grams,15.0,
1,CASP003-01,A2,Black,CFTP baseline,Plastic Cup,Fabrikal 16 oz PLA cold cup,1,15,0.0,grams,15.0,
2,CASP003-01,A3,Black,CFTP baseline,Plastic Cup,Fabrikal 16 oz PLA cold cup,1,14,0.0,grams,14.0,
3,CASP003-01,A1,Black,CFTP baseline,2 Kraft Papers,"2-ply Kraft Control 10""x5""",1,5,8.0,grams,5.0,
4,CASP003-01,A2,Black,CFTP baseline,2 Kraft Papers,"2-ply Kraft Control 10""x5""",1,5,7.0,grams,5.0,


In [79]:
# Process CASP003-01 Trial
casp003_standardized = casp003[['Trial ID', 'Trial Bag ID',
                                'Item Description Refined',
                                'Initial Item Weight - Aggregate',
                                'Final Residual Weight - wet - aggregate']].copy()

# Calculate mass_resid_%
def calculate_mass_resid_2(row):
    """
    Calculates the percentage of mass residue based on final and initial aggregate weights for 5 old trials.

    Parameters:
    - row (dict): A row containing 'Final Residual Weight - wet - aggregate' and 'Initial Item Weight - Aggregate'.

    Returns:
    - float: The calculated mass residue percentage, rounded to 2 decimal places.

    This function computes the mass residue percentage by dividing the final residual weight (wet aggregate) by the initial item weight (aggregate) and multiplying by 100.
    """
    final_mass = row['Final Residual Weight - wet - aggregate']
    initial_mass = row['Initial Item Weight - Aggregate']
    return round((final_mass / initial_mass) * 100, 2)

casp003_standardized['mass_resid_%'] = casp003_standardized.apply(
    calculate_mass_resid_2, axis=1
)
casp003_standardized.drop(['Final Residual Weight - wet - aggregate',
                           'Initial Item Weight - Aggregate'], axis=1, inplace=True)

# Force Format
casp003_standardized.rename(columns={'Trial ID': 'trial_ID',
                                     'Trial Bag ID': 'bag_ID'}, inplace=True)
casp003_standardized['facility_ID'] = '14'
casp003_standardized['trial_stage'] = 'Second Removal'
casp003_standardized['sa_resid_%'] = np.nan
casp003_standardized['treated_mass_resid_%'] = np.nan

# Link names to IDs
name_to_ID = {'Fabrikal 16 oz PLA cold cup': '11',
               '2-ply Kraft Control 10"x5"': '75',
               'BÉSICS® 8 oz Soup Bowl' : '62',
               'Alter Eco Quinoa SUP 2018': '30',
               'BESICS 8oz Hot cup lid': '5',
               'BÉSICS® Lined Paper Box with Lid': '7',
               'BÉSICS® Uncoated paper fry tray': '10',
               'BÉSICS® Spoon 6"': '8',
               'Elk Packaging SUP with white outer 2018': '32',
               'BÉSICS® Fibreware Clamshell 850mL': '65',
               'Straw': '46',
               'Vegware Fork ': '42',
               'Vegware Spoon': '44'}
casp003_standardized['item_ID'] = casp003_standardized[
    'Item Description Refined'
].map(name_to_ID)
casp003_standardized.drop('Item Description Refined', axis=1, inplace=True)

# Reorder
column_order = ['facility_ID', 'trial_ID', 'bag_ID', 'item_ID',
                'trial_stage', 'mass_resid_%', 'sa_resid_%',
                 'treated_mass_resid_%']
casp003_standardized = casp003_standardized[column_order]

casp003_standardized.head()

Unnamed: 0,facility_ID,trial_ID,bag_ID,item_ID,trial_stage,mass_resid_%,sa_resid_%,treated_mass_resid_%
0,14,CASP003-01,A1,11,Second Removal,0.0,,
1,14,CASP003-01,A2,11,Second Removal,0.0,,
2,14,CASP003-01,A3,11,Second Removal,0.0,,
3,14,CASP003-01,A1,75,Second Removal,160.0,,
4,14,CASP003-01,A2,75,Second Removal,140.0,,


In [80]:
casp001.columns

Index(['Trial ID', 'Trial Bag Set', 'Trial Bag ID',
       'Item Description From Trial', 'Item Description Refined',
       'Load Concentration', 'Number of Items per bag',
       'Residual Item Weight - Wet', 'Residual Weight - Oven-dry'],
      dtype='object')

In [81]:
# Process CASP001-01 Trial
casp001_standardized = casp001[['Trial ID', 'Trial Bag ID',
                                'Item Description Refined',
                                'Residual Item Weight - Wet',
                                'Residual Weight - Oven-dry']].copy()
# Keeping wet weight only, otherwise not comparable to all other results

casp001_standardized.rename(columns={'Trial ID': 'trial_ID',
                                     'Trial Bag ID': 'bag_ID',
                                     'Residual Item Weight - Wet': 'final_mass',
                                     'Residual Weight - Oven-dry': 'dry_mass'},
                                      inplace=True)
casp001_standardized['facility_ID'] = '13'
casp001_standardized['trial_stage'] = 'Second Removal'
casp001_standardized['sa_resid_%'] = np.nan

name_to_ID = {'BÉSICS® 12 oz Soup bowl': '1',
              'BÉSICS® Sleeve': '67',
              'BÉSICS® Wrap': '68',
              'CPLA Fork 6" - Stalk Market': '69',
              'D&W 32oz Square PLA Box': '71',
              'Ecotainer PLA-Lined Soup Bowl 12oz': '73',
              'Fabrikal PLA Cold Cup 20oz': '74',
              'Kraft Control 10"x5" 2-ply': '75',
              'BÉSICS® 8oz CPLA Hot cup lid': '5',
              'BÉSICS® Fibreware Bowl 16oz': '64',
              'PLA Foam Tray': '77',
              'BÉSICS® Cellulose bag 5x7in': '63',
              'CPLA Knife 6" - Stalk Market': '70',
              'D&W PLA Lid 32oz': '72',
              'BÉSICS® Fibreware Clamshell 9x9': '66',
              'MPLA Spoon - NaturTec': '76', 
              'SPP Unlined Paper Tray (hot dog tray)': '78'}

casp001_standardized['item_ID'] = casp001_standardized['Item Description Refined'].map(name_to_ID)
casp001_standardized.drop('Item Description Refined', axis=1, inplace=True)

def calculate_mass_resid(casp001, items):
    """
    Merges trial and item data to calculate mass residue percentages.

    Parameters:
    - casp001: Trial data DataFrame with 'item_ID' and 'final_mass'.
    - items: Item data DataFrame with 'item_id' and 'item_weight'.

    Returns:
    - Series of mass residue percentages, rounded to 2 decimal places.
    """
    merged_df = casp001.merge(items, left_on='item_ID', right_on='item_id')
    merged_df['mass_resid_%'] = round((merged_df['final_mass'] / merged_df['item_weight']) * 100, 2)
    return merged_df['mass_resid_%']

def calculate_dry_mass(casp001, items):
    """
    Merges trial and item data to calculate dry mass residue percentages.

    Parameters:
    - casp001: Trial data DataFrame with 'item_ID' and 'dry_mass'.
    - items: Item data DataFrame with 'item_id' and 'item_weight'.

    Returns:
    - Series of dry mass residue percentages, rounded to 2 decimal places.
    """
    merged_df = casp001.merge(items, left_on='item_ID', right_on='item_id')
    merged_df['treated_mass_resid_%'] = round((merged_df['dry_mass'] / merged_df['item_weight']) * 100, 2)
    return merged_df['treated_mass_resid_%']

casp001_standardized['mass_resid_%'] = calculate_mass_resid(casp001_standardized,
                                                            items)
casp001_standardized['treated_mass_resid_%'] = calculate_dry_mass(casp001_standardized,
                                                            items)

# Reorder
column_order = ['facility_ID', 'trial_ID', 'bag_ID', 'item_ID',
                 'trial_stage', 'mass_resid_%', 'sa_resid_%', 'treated_mass_resid_%']
casp001_standardized = casp001_standardized[column_order]
casp001_standardized.head()

Unnamed: 0,facility_ID,trial_ID,bag_ID,item_ID,trial_stage,mass_resid_%,sa_resid_%,treated_mass_resid_%
0,13,CASP001-01,ST R1 H8,1,Second Removal,0.0,,0.0
1,13,CASP001-01,ST R1 H8,67,Second Removal,109.14,,96.5
2,13,CASP001-01,ST R1 H8,68,Second Removal,47.1,,42.74
3,13,CASP001-01,ST R1 H8,69,Second Removal,0.0,,0.0
4,13,CASP001-01,ST R1 H8,71,Second Removal,0.0,,0.0


In [82]:
wr001.columns

Index(['Trial ID', 'Trial Bag ID', 'Trial Bag Set',
       'Item Description From Trial', 'Item Description Refined',
       'Load Concentration', 'Number of Items per bag',
       'Residual Item Weight - Wet', 'Residual Weight - Oven-dry'],
      dtype='object')

In [83]:
# Preprocess Trial WR001-01

wr001_standardized = wr001[['Trial ID', 'Trial Bag ID',
                                'Item Description Refined',
                                'Residual Item Weight - Wet',
                                'Residual Weight - Oven-dry']].copy()
# Keeping wet weight only, otherwise not comparable to all other results

wr001_standardized.rename(columns={'Trial ID': 'trial_ID',
                                    'Trial Bag ID': 'bag_ID',
                                    'Residual Item Weight - Wet': 'final_mass',
                                    'Residual Weight - Oven-dry': 'dry_mass'},
                                     inplace=True)
wr001_standardized['facility_ID'] = '12'
wr001_standardized['trial_stage'] = 'Second Removal'
wr001_standardized['sa_resid_%'] = np.nan

# name_to_ID
wr001_standardized['item_ID'] = wr001_standardized['Item Description Refined'].map(name_to_ID)
wr001_standardized.drop('Item Description Refined', axis=1, inplace=True)

# calculate mass_resid_%
wr001_standardized['mass_resid_%'] = calculate_mass_resid(wr001_standardized,
                                                            items)
wr001_standardized['treated_mass_resid_%'] = calculate_dry_mass(wr001_standardized,
                                                            items)
# Reorder
column_order = ['facility_ID', 'trial_ID', 'bag_ID',
                 'item_ID', 'trial_stage', 'mass_resid_%',
                  'sa_resid_%', 'treated_mass_resid_%']
wr001_standardized = wr001_standardized[column_order]

wr001_standardized.head()

Unnamed: 0,facility_ID,trial_ID,bag_ID,item_ID,trial_stage,mass_resid_%,sa_resid_%,treated_mass_resid_%
0,12,OWR001-01,BL1,1,Second Removal,63.22,,57.23
1,12,OWR001-01,CL3,1,Second Removal,49.77,,49.62
2,12,OWR001-01,E2,1,Second Removal,59.16,,52.21
3,12,OWR001-01,E4,1,Second Removal,58.86,,51.24
4,12,OWR001-01,H1,1,Second Removal,,,101.75


In [84]:
ad001.columns = ad001.iloc[0]
ad001 = ad001.drop(ad001.index[0])
ad001.head()

Unnamed: 0,Trial ID,Trial Bag Set,Trial Bag ID,Item Description From Trial,Item Description Refined,Load Concentration,Number of Items per bag,Residual Item Weight - Wet,Residual Weight - Oven-dry
1,AD001-01,Set A,AD T10 H7,CPLA Lid,BÉSICS® 8oz CPLA Hot cup lid,HIGH,2,6.68,6.3
2,AD001-01,Set A,AD T7 H3,CPLA Lid,BÉSICS® 8oz CPLA Hot cup lid,HIGH,2,0.0,0.0
3,AD001-01,Set A,AD T8 H1,CPLA Lid,BÉSICS® 8oz CPLA Hot cup lid,HIGH,2,5.29,5.29
4,AD001-01,Set A,AD T8 L3,CPLA Lid,BÉSICS® 8oz CPLA Hot cup lid,LOW,1,4.18,3.0
5,AD001-01,Set B,AD T10 H8,BESICS Bowl,BÉSICS® 12 oz Soup bowl,HIGH,2,23.21,18.57


In [85]:
# Preprocess AD001-01 Trial

ad001_standardized = ad001[['Trial ID', 'Trial Bag ID',
                                'Item Description Refined',
                                'Residual Item Weight - Wet',
                                'Residual Weight - Oven-dry']].copy()
# Keeping wet weight only, otherwise not comparable to all other results

ad001_standardized.rename(columns={'Trial ID': 'trial_ID',
                                    'Trial Bag ID': 'bag_ID',
                                    'Residual Item Weight - Wet': 'final_mass',
                                    'Residual Weight - Oven-dry': 'dry_mass'}, inplace=True)
ad001_standardized['facility_ID'] = '11'
ad001_standardized['trial_stage'] = 'Second Removal'
ad001_standardized['sa_resid_%'] = np.nan
ad001_standardized['treated_mass_resid_%'] = np.nan

# name_to_ID
ad001_standardized['item_ID'] = ad001_standardized['Item Description Refined'].map(name_to_ID)
ad001_standardized.drop('Item Description Refined', axis=1, inplace=True)

# # calculate mass_resid_%
ad001_standardized['final_mass'] = ad001_standardized['final_mass'].replace('See spoon', np.nan)
ad001_standardized['mass_resid_%'] = calculate_mass_resid(ad001_standardized,items)

# Reorder
column_order = ['facility_ID', 'trial_ID', 'bag_ID',
                 'item_ID', 'trial_stage', 'mass_resid_%',
                  'sa_resid_%', 'treated_mass_resid_%']
ad001_standardized = ad001_standardized[column_order]

ad001_standardized.head()

Unnamed: 0,facility_ID,trial_ID,bag_ID,item_ID,trial_stage,mass_resid_%,sa_resid_%,treated_mass_resid_%
1,11,AD001-01,AD T10 H7,5,Second Removal,0.0,,
2,11,AD001-01,AD T7 H3,5,Second Removal,11.71,,
3,11,AD001-01,AD T8 H1,5,Second Removal,9.25,,
4,11,AD001-01,AD T8 L3,5,Second Removal,6.15,,
5,11,AD001-01,AD T10 H8,1,Second Removal,0.0,,


In [86]:
# Concatenate the 5 trials

reordered_df = pd.concat([reordered_df, ad001_standardized], ignore_index=True)
reordered_df = pd.concat([reordered_df, casp001_standardized], ignore_index=True)
reordered_df = pd.concat([reordered_df, casp003_standardized], ignore_index=True)
reordered_df = pd.concat([reordered_df, wr001_standardized], ignore_index=True)
reordered_df = pd.concat([reordered_df, wr003_standardized], ignore_index=True)

In [87]:
na_counts = reordered_df.isna().sum()
na_counts

facility_ID                0
trial_ID                   0
bag_ID                    72
item_ID                   42
trial_stage                0
mass_resid_%              94
sa_resid_%               821
treated_mass_resid_%    2524
dtype: int64

In [88]:
final_df = reordered_df.dropna(subset=['item_ID'])
final_df = reordered_df.dropna(subset=['mass_resid_%'])

In [89]:
na_counts = final_df.isna().sum()
na_counts

facility_ID                0
trial_ID                   0
bag_ID                    72
item_ID                    0
trial_stage                0
mass_resid_%               0
sa_resid_%               727
treated_mass_resid_%    2450
dtype: int64

In [90]:
final_df.shape

(2610, 8)

In [91]:
final_df.to_csv('../data/finalized_datasets/observations_compiled.csv', index=False)