In [2]:
import pandas as pd
import numpy as np

In [3]:
DATA_FOLDER = "../data/"

### Clean Items

In [4]:
ITEMS_PATH = DATA_FOLDER + "CFTP Test Item Inventory with Dimensions - All Trials.xlsx"
items = pd.read_excel(ITEMS_PATH, sheet_name=0, skiprows=3)
items.head(2)

Unnamed: 0,Item ID,Item Format,Brand,Manufacturer,Item SKU,Item Name,Item Description Refined,Item Description From Trial,Item ID.1,Material Class I,...,Material Composition,Certification @ time of testing,Kit,Initial Weight 1,Initial Weight 2,Initial Weight 3,"Average Initial Weight, g",Item Dimensions Compiled,"Item Capacity, mL",Notes
0,A1,Bowl,BÉSICS®,WeiMon,WM-W270,PLA-lined Paper Bowl 12oz,BÉSICS® 12 oz Soup bowl,BESICS Bowl,,Fiber,...,"Paper, PLA lining, adhesive, ink",BPI,Baseline,8.12,8.1,8.12,8.113333,"3’’ diameter, 2.5’’ H",,
1,A2,Hot Cup Lid,BÉSICS®,Multiple,,CPLA Hot Cup Lid,BÉSICS® 12oz CPLA Hot Cup Lid,Hot cup lid,,Biopolymer,...,Crystallized PLA,BPI,Baseline,3.58,3.58,3.56,3.573333,"3.5’’ diameter, 0.5’’ H",,


In [5]:
for col in items.columns:
    print(col)

Item ID
Item Format
Brand
Manufacturer
Item SKU
Item Name
Item Description Refined
Item Description From Trial
Item ID.1
Material Class I
Material Class II
Material Class III
Material Description
Material Composition
Certification @ time of testing
Kit 
Initial Weight 1
Initial Weight 2
Initial Weight 3
Average Initial Weight, g
Item Dimensions Compiled
Item Capacity, mL
Notes


In [6]:
items['Start Weight'] = items['Average Initial Weight, g']

In [7]:
items_cols = [
    'Item ID',
    'Item Name',
    'Item Description Refined',
    'Material Class I',
    'Material Class II',
    'Material Class III',
    'Start Weight'
    ]

In [8]:
items_clean = items[items_cols]
items_clean.head(2)

Unnamed: 0,Item ID,Item Name,Item Description Refined,Material Class I,Material Class II,Material Class III,Start Weight
0,A1,PLA-lined Paper Bowl 12oz,BÉSICS® 12 oz Soup bowl,Fiber,Lined Fiber,PLA lined Paper,8.113333
1,A2,CPLA Hot Cup Lid,BÉSICS® 12oz CPLA Hot Cup Lid,Biopolymer,Rigid Biopolymer (< 0.75mm),CPLA,3.573333


In [9]:
item2id = {key.strip(): value for key, value in items_clean.set_index('Item Description Refined')['Item ID'].to_dict().items()}
item2id

{'BÉSICS® 12 oz Soup bowl': 'A1',
 'BÉSICS® 12oz CPLA Hot Cup Lid': 'A3',
 'BÉSICS® 250mL PLA-lined Bagasse Leaf Bowl': 'A4',
 'BÉSICS® 8oz CPLA Hot cup lid': 'A16',
 'BÉSICS® Box with Lid AND Fry Tray': 'A6',
 'BÉSICS® Lined Paper Box with Lid': 'A7',
 'BÉSICS® Spoon 6"': 'A9',
 'BÉSICS® Uncoated paper fry tray': 'Q',
 'Fabrikal 16 oz PLA cold cup': 'A11',
 'Kraft Control 10"x5" 1-ply': 'A12',
 'Bagasse Clamshell 9x9': 'O',
 'BÉSICS® 12oz Hot Cup': 'A14',
 'BÉSICS® 6" Spoon': 'A15',
 'Bin Liner Bag 2.5 gal': 'A17',
 'Cellulose bag CONTROL': 'A18',
 'Coffee Pod': 'A19',
 'Kraft butcher paper CONTROL': 'A20',
 'Moulded fiber bowl 16oz': 'A21',
 'Navel orange peel': 'A22',
 'PLA Cold Cup 12oz': 'A23',
 'PLA Cold Cup Lid 8oz': 'V',
 'Spoon PLA 6"': 'A25',
 'Straw PHA 8"': 'A26',
 '16oz NoTree Paper Hot Cup - World Centric': 'P',
 '3Gallon Food Scrap Bag - World Centric': 'K3',
 '8" Kraft straw ST-PA-8-K': 'B',
 'Alter Eco Quinoa SUP 2018': 'A30',
 'BÉSICS® Leaf Plate': 'A31',
 'Elk Packag

In [10]:
# ITEMS_SAVE_PATH = ""
# items.to_csv(ITEMS_SAVE_PATH, index=False)

### Clean Closed Loop Trial Observations

In [11]:
# TEN_TRIALS_PATH = DATA_FOLDER + "Compiled Field Results  for DSI - 2023 Bulk 10 Trial Data.xlsx"
TEN_TRIALS_PATH = DATA_FOLDER + "Donated Data 2023 - Compiled Field Results for DSI.xlsx"
observations_weight = pd.read_excel(TEN_TRIALS_PATH, sheet_name=3, skiprows=2)
observations_sa = pd.read_excel(TEN_TRIALS_PATH, sheet_name=4, skiprows=2)
observations_sa.head(2)

Unnamed: 0,Facility Name,Trial Stage,Bag Set,Bag Number,N,O,Q,V,B,D,...,K,K1,K2,K3,N.1,O.1,P,Q.1,S,V.1
0,Facility 1 ( Windrow),First Removal,A (blue),10,,,,,0.244,0.039,...,,0.618,0.233,0.225,,,,,,
1,Facility 1 ( Windrow),First Removal,A (blue),6,,,,,0.075,0.237,...,,0.579,0.023,0.197,,,,,,


In [12]:
# only use second removal
weight = observations_weight[observations_weight['Trial Stage'] == "Second Removal"]
area = observations_sa[observations_sa['Trial Stage'] == "Second Removal"]

In [13]:
weight_melted = weight.melt(id_vars=['Facility Name', 'Trial Stage', 'Bag Set', 'Bag Number'],
                    value_vars=['N', 'O', 'Q', 'V', 'B', 'D', 'H', 'I', 'J', 'K', 'K1', 'K2', 'K3', 'N', 'O', 'P', 'Q', 'S', 'V'],
                    var_name='Item ID',
                    value_name='% Residuals (Weight)')
weight_melted = weight_melted.dropna(subset=['% Residuals (Weight)']).reset_index(drop=True)
weight_melted.head(2)

Unnamed: 0,Facility Name,Trial Stage,Bag Set,Bag Number,Item ID,% Residuals (Weight)
0,Facility 1 ( Windrow),Second Removal,B (green),1,N,0.0
1,Facility 1 ( Windrow),Second Removal,B (green),2,N,1.002848


In [14]:
area_melted = area.melt(id_vars=['Facility Name', 'Trial Stage', 'Bag Set', 'Bag Number'],
                    value_vars=['N', 'O', 'Q', 'V', 'B', 'D', 'H', 'I', 'J', 'K', 'K1', 'K2', 'K3', 'N', 'O', 'P', 'Q', 'S', 'V'],
                    var_name='Item ID',
                    value_name='% Residuals (Area)')
area_melted = area_melted.dropna(subset=['% Residuals (Area)']).reset_index(drop=True)
area_melted.head(2)

Unnamed: 0,Facility Name,Trial Stage,Bag Set,Bag Number,Item ID,% Residuals (Area)
0,Facility 1 ( Windrow),Second Removal,B (green),1,N,0.0
1,Facility 1 ( Windrow),Second Removal,B (green),2,N,0.720347


In [15]:
observations_closed_loop = pd.merge(weight_melted, area_melted, on=['Facility Name', 'Trial Stage', 'Bag Set', 'Bag Number', 'Item ID'], how='outer')
observations_closed_loop.head(2)

Unnamed: 0,Facility Name,Trial Stage,Bag Set,Bag Number,Item ID,% Residuals (Weight),% Residuals (Area)
0,Facility 1 ( Windrow),Second Removal,A (blue),1,B,0.042,0.121
1,Facility 1 ( Windrow),Second Removal,A (blue),1,D,0.014,0.02


In [16]:
observations_closed_loop['Item ID'].isnull().sum()

0

In [17]:
# TODO: We have some missing observations
len(observations_closed_loop), len(weight_melted), len(area_melted)

(787, 781, 772)

In [18]:
observations_closed_loop[observations_closed_loop['% Residuals (Weight)'].isna() | observations_closed_loop['% Residuals (Area)'].isna()]

Unnamed: 0,Facility Name,Trial Stage,Bag Set,Bag Number,Item ID,% Residuals (Weight),% Residuals (Area)
13,Facility 1 ( Windrow),Second Removal,A (blue),2,K3,,0.576
27,Facility 1 ( Windrow),Second Removal,A (blue),5,B,,0.201
31,Facility 1 ( Windrow),Second Removal,A (blue),5,K1,,0.123
126,Facility 10 (Windrow),Second Removal,A (blue),5,I,,0.373
368,Facility 5 (EASP),Second Removal,B (green),4,K1,,0.263
588,Facility 8 (ASP),Second Removal,A (blue),10,K1,,0.539
689,Facility 9 (EASP),Second Removal,A (blue),1,K2,0.052,
696,Facility 9 (EASP),Second Removal,A (blue),3,K2,0.134,
703,Facility 9 (EASP),Second Removal,A (blue),5,K2,0.148,
710,Facility 9 (EASP),Second Removal,A (blue),7,K2,0.048,


In [19]:
obs_cols = ['Facility Name', 'Item ID', '% Residuals (Weight)', '% Residuals (Area)']
observations_closed_loop = observations_closed_loop[obs_cols]

In [20]:
observations_rename_dict = {
    'Facility Name': 'Trial'
}
observations_closed_loop = observations_closed_loop.rename(columns=observations_rename_dict)
observations_closed_loop.head(2)

Unnamed: 0,Trial,Item ID,% Residuals (Weight),% Residuals (Area)
0,Facility 1 ( Windrow),B,0.042,0.121
1,Facility 1 ( Windrow),D,0.014,0.02


#### Join With Items

In [21]:
items_clean.dtypes

Item ID                      object
Item Name                    object
Item Description Refined     object
Material Class I             object
Material Class II            object
Material Class III           object
Start Weight                float64
dtype: object

In [22]:
joined_cl = pd.merge(items_clean, observations_closed_loop, on="Item ID")
joined_cl.head(2)

Unnamed: 0,Item ID,Item Name,Item Description Refined,Material Class I,Material Class II,Material Class III,Start Weight,Trial,% Residuals (Weight),% Residuals (Area)
0,K,CPLA Hot Cup Lid,BÉSICS® 8oz CPLA Hot cup lid,Biopolymer,Rigid Biopolymer (< 0.75mm),CPLA,2.5,Facility 1 ( Windrow),0.766,0.36
1,K,CPLA Hot Cup Lid,BÉSICS® 8oz CPLA Hot cup lid,Biopolymer,Rigid Biopolymer (< 0.75mm),CPLA,2.5,Facility 1 ( Windrow),0.566,0.236


In [23]:
keep_cols = [
    "Trial",
    "Item ID",
    "Item Name",
    "Item Description Refined",
    "Material Class I",
    "Material Class II",
    "Material Class III",
    "Start Weight",
    "% Residuals (Weight)",
    "% Residuals (Area)"
]

In [24]:
joined_cl = joined_cl[keep_cols]

In [25]:
for col in joined_cl.columns:
    print(col)

Trial
Item ID
Item Name
Item Description Refined
Material Class I
Material Class II
Material Class III
Start Weight
% Residuals (Weight)
% Residuals (Area)


### Clean PDF Data

#### Trial AD001

In [26]:
FILEPATH_PDF = DATA_FOLDER + "Compiled Field Results - CFTP Gathered Data.xlsx"

In [27]:
observations_ad001 = pd.read_excel(FILEPATH_PDF, sheet_name=0, skiprows=1)
observations_ad001.head(2)

Unnamed: 0,Trial ID,Trial Bag Set,Trial Bag ID,Item Description From Trial,Item Description Refined,Load Concentration,Number of Items per bag,Residual Item Weight - Wet,Residual Weight - Oven-dry
0,AD001-01,Set A,AD T10 H7,CPLA Lid,BÉSICS® 8oz CPLA Hot cup lid,HIGH,2,6.68,6.3
1,AD001-01,Set A,AD T7 H3,CPLA Lid,BÉSICS® 8oz CPLA Hot cup lid,HIGH,2,0.0,0.0


In [28]:
observations_ad001['Item ID'] = observations_ad001['Item Description Refined'].map(item2id)
observations_ad001.head(2)

Unnamed: 0,Trial ID,Trial Bag Set,Trial Bag ID,Item Description From Trial,Item Description Refined,Load Concentration,Number of Items per bag,Residual Item Weight - Wet,Residual Weight - Oven-dry,Item ID
0,AD001-01,Set A,AD T10 H7,CPLA Lid,BÉSICS® 8oz CPLA Hot cup lid,HIGH,2,6.68,6.3,A16
1,AD001-01,Set A,AD T7 H3,CPLA Lid,BÉSICS® 8oz CPLA Hot cup lid,HIGH,2,0.0,0.0,A16


In [29]:
observations_ad001['Item ID'].isnull().sum()

0

In [30]:
drop_cols = ["Item Description From Trial", "Item Description Refined"]
observations_ad001 = observations_ad001.drop(drop_cols, axis=1)

In [31]:
joined_ad001 = pd.merge(items_clean, observations_ad001, on="Item ID")
joined_ad001['% Residuals (Weight)'] = joined_ad001['Residual Weight - Oven-dry']/(joined_ad001['Start Weight'] * joined_ad001['Number of Items per bag'])
joined_ad001['% Residuals (Area)'] = None
joined_ad001['Trial'] = joined_ad001['Trial ID']
joined_ad001.head(2)

Unnamed: 0,Item ID,Item Name,Item Description Refined,Material Class I,Material Class II,Material Class III,Start Weight,Trial ID,Trial Bag Set,Trial Bag ID,Load Concentration,Number of Items per bag,Residual Item Weight - Wet,Residual Weight - Oven-dry,% Residuals (Weight),% Residuals (Area),Trial
0,A1,PLA-lined Paper Bowl 12oz,BÉSICS® 12 oz Soup bowl,Fiber,Lined Fiber,PLA lined Paper,8.113333,AD001-01,Set B,AD T10 H8,HIGH,2,23.21,18.57,1.144412,,AD001-01
1,A1,PLA-lined Paper Bowl 12oz,BÉSICS® 12 oz Soup bowl,Fiber,Lined Fiber,PLA lined Paper,8.113333,AD001-01,Set B,AD T7 H4,HIGH,2,7.52,6.21,0.382703,,AD001-01


In [32]:
joined_ad001 = joined_ad001[keep_cols]
joined_ad001.head(2)

Unnamed: 0,Trial,Item ID,Item Name,Item Description Refined,Material Class I,Material Class II,Material Class III,Start Weight,% Residuals (Weight),% Residuals (Area)
0,AD001-01,A1,PLA-lined Paper Bowl 12oz,BÉSICS® 12 oz Soup bowl,Fiber,Lined Fiber,PLA lined Paper,8.113333,1.144412,
1,AD001-01,A1,PLA-lined Paper Bowl 12oz,BÉSICS® 12 oz Soup bowl,Fiber,Lined Fiber,PLA lined Paper,8.113333,0.382703,


#### Trial WR001-01

In [33]:
observations_wr001 = pd.read_excel(FILEPATH_PDF, sheet_name=1)
observations_wr001.head(2)

Unnamed: 0,Trial ID,Trial Bag ID,Trial Bag Set,Item Description From Trial,Item Description Refined,Load Concentration,Number of Items per bag,Residual Item Weight - Wet,Residual Weight - Oven-dry
0,OWR001-01,BL1,Full,BESICS Bowl,BÉSICS® 12 oz Soup bowl,LOW,1,8.55,7.74
1,OWR001-01,CL3,Full,BESICS Bowl,BÉSICS® 12 oz Soup bowl,LOW,1,6.73,6.71


In [34]:
observations_wr001['Item ID'] = observations_wr001['Item Description Refined'].map(item2id)
observations_wr001.head(2)

Unnamed: 0,Trial ID,Trial Bag ID,Trial Bag Set,Item Description From Trial,Item Description Refined,Load Concentration,Number of Items per bag,Residual Item Weight - Wet,Residual Weight - Oven-dry,Item ID
0,OWR001-01,BL1,Full,BESICS Bowl,BÉSICS® 12 oz Soup bowl,LOW,1,8.55,7.74,A1
1,OWR001-01,CL3,Full,BESICS Bowl,BÉSICS® 12 oz Soup bowl,LOW,1,6.73,6.71,A1


In [35]:
observations_wr001['Item ID'].isnull().sum()

0

In [36]:
drop_cols = ["Item Description From Trial", "Item Description Refined"]
observations_wr001 = observations_wr001.drop(drop_cols, axis=1)

In [37]:
joined_wr001 = pd.merge(items_clean, observations_wr001, on="Item ID")
joined_wr001['% Residuals (Weight)'] = joined_wr001['Residual Weight - Oven-dry']/(joined_wr001['Start Weight'] * joined_wr001['Number of Items per bag'])
joined_wr001['% Residuals (Area)'] = None
joined_wr001['Trial'] = joined_wr001['Trial ID']
joined_wr001.head(2)

Unnamed: 0,Item ID,Item Name,Item Description Refined,Material Class I,Material Class II,Material Class III,Start Weight,Trial ID,Trial Bag ID,Trial Bag Set,Load Concentration,Number of Items per bag,Residual Item Weight - Wet,Residual Weight - Oven-dry,% Residuals (Weight),% Residuals (Area),Trial
0,A1,PLA-lined Paper Bowl 12oz,BÉSICS® 12 oz Soup bowl,Fiber,Lined Fiber,PLA lined Paper,8.113333,OWR001-01,BL1,Full,LOW,1,8.55,7.74,0.953985,,OWR001-01
1,A1,PLA-lined Paper Bowl 12oz,BÉSICS® 12 oz Soup bowl,Fiber,Lined Fiber,PLA lined Paper,8.113333,OWR001-01,CL3,Full,LOW,1,6.73,6.71,0.827034,,OWR001-01


In [38]:
joined_wr001 = joined_wr001[keep_cols]
joined_wr001.head(2)

Unnamed: 0,Trial,Item ID,Item Name,Item Description Refined,Material Class I,Material Class II,Material Class III,Start Weight,% Residuals (Weight),% Residuals (Area)
0,OWR001-01,A1,PLA-lined Paper Bowl 12oz,BÉSICS® 12 oz Soup bowl,Fiber,Lined Fiber,PLA lined Paper,8.113333,0.953985,
1,OWR001-01,A1,PLA-lined Paper Bowl 12oz,BÉSICS® 12 oz Soup bowl,Fiber,Lined Fiber,PLA lined Paper,8.113333,0.827034,


#### Trial CASP001-01

In [39]:
observations_casp001 = pd.read_excel(FILEPATH_PDF, sheet_name=2)
observations_casp001.head(2)

Unnamed: 0,Trial ID,Trial Bag Set,Trial Bag ID,Item Description From Trial,Item Description Refined,Load Concentration,Number of Items per bag,Residual Item Weight - Wet,Residual Weight - Oven-dry
0,CASP001-01,Set B,ST R1 H8,BESICS Bowl,BÉSICS® 12 oz Soup bowl,HIGH,2,0.0,0.0
1,CASP001-01,Set B,ST R1 H8,BESICS Sleeve,BÉSICS® Sleeve,HIGH,2,3.2,3.03


In [40]:
observations_casp001['Item ID'] = observations_casp001['Item Description Refined'].map(item2id)
observations_casp001.head(2)

Unnamed: 0,Trial ID,Trial Bag Set,Trial Bag ID,Item Description From Trial,Item Description Refined,Load Concentration,Number of Items per bag,Residual Item Weight - Wet,Residual Weight - Oven-dry,Item ID
0,CASP001-01,Set B,ST R1 H8,BESICS Bowl,BÉSICS® 12 oz Soup bowl,HIGH,2,0.0,0.0,A1
1,CASP001-01,Set B,ST R1 H8,BESICS Sleeve,BÉSICS® Sleeve,HIGH,2,3.2,3.03,A67


In [41]:
observations_casp001['Item ID'].isnull().sum()

0

In [42]:
drop_cols = ["Item Description From Trial", "Item Description Refined"]
observations_casp001 = observations_casp001.drop(drop_cols, axis=1)

In [43]:
joined_casp001 = pd.merge(items_clean, observations_casp001, on="Item ID")
joined_casp001['% Residuals (Weight)'] = joined_casp001['Residual Weight - Oven-dry']/(joined_casp001['Start Weight'] * joined_casp001['Number of Items per bag'])
joined_casp001['% Residuals (Area)'] = None
joined_casp001['Trial'] = joined_casp001['Trial ID']
joined_casp001.head(2)

Unnamed: 0,Item ID,Item Name,Item Description Refined,Material Class I,Material Class II,Material Class III,Start Weight,Trial ID,Trial Bag Set,Trial Bag ID,Load Concentration,Number of Items per bag,Residual Item Weight - Wet,Residual Weight - Oven-dry,% Residuals (Weight),% Residuals (Area),Trial
0,A1,PLA-lined Paper Bowl 12oz,BÉSICS® 12 oz Soup bowl,Fiber,Lined Fiber,PLA lined Paper,8.113333,CASP001-01,Set B,ST R1 H8,HIGH,2,0.0,0.0,0.0,,CASP001-01
1,A1,PLA-lined Paper Bowl 12oz,BÉSICS® 12 oz Soup bowl,Fiber,Lined Fiber,PLA lined Paper,8.113333,CASP001-01,Set B,ST R2 H4,HIGH,2,14.76,13.05,0.804232,,CASP001-01


In [44]:
joined_casp001 = joined_casp001[keep_cols]
joined_casp001.head(2)

Unnamed: 0,Trial,Item ID,Item Name,Item Description Refined,Material Class I,Material Class II,Material Class III,Start Weight,% Residuals (Weight),% Residuals (Area)
0,CASP001-01,A1,PLA-lined Paper Bowl 12oz,BÉSICS® 12 oz Soup bowl,Fiber,Lined Fiber,PLA lined Paper,8.113333,0.0,
1,CASP001-01,A1,PLA-lined Paper Bowl 12oz,BÉSICS® 12 oz Soup bowl,Fiber,Lined Fiber,PLA lined Paper,8.113333,0.804232,


#### Set Up Mapping for Missing Items

In [45]:
FILPATH_ITEMS_MAP = DATA_FOLDER + "Item IDS for CASP004 CASP003.xlsx"
df_items_map = pd.read_excel(FILPATH_ITEMS_MAP)
df_items_map.head(2)

Unnamed: 0,OG Description,Item ID
0,These are the ones missing from CASP004:,
1,12 oz Hot cup / Soup bowl,A6


In [46]:
missing_items = df_items_map.set_index("OG Description")['Item ID'].to_dict()
missing_items

{'These are the ones missing from CASP004:': nan,
 '12 oz Hot cup / Soup bowl': 'A6',
 'Hot cup lid': 'A7',
 '16 oz PLA cold cup': 'A36',
 'Cutlery': 'A11',
 'PLA-lined fibre bowl, white': 'Q',
 'Uncoated paper food tray': 'A25',
 'Lined paper food tray with lid': 'A21',
 'Kraft control': 'A42',
 '16oz NoTree Paper Hot Cup SKU CU-SU-16': 'A1',
 '14oz PLA Cold Cup SKU CP-CS-14': 'A78',
 'Fiber Cutlery, Spoon SP-FB-6-LF': 'A77',
 '3Gallon Food Scrap Bag BG-CS-3': 'A4',
 'TPLA Spoon SP-PS-6': 'A72',
 'Large brown bag': 'A50',
 nan: nan,
 'These are the ones missing from CASP003:': nan,
 '2-ply Kraft Control 10"x5"': 'A42',
 'BESICS Fry Tray': 'A25',
 'BESICS 8oz Hot cup lid': 'A48',
 '16oz clear cold cup': 'A36',
 'Fiber Clamshell': 'A43',
 '8oz soup bowl': 'A12',
 'BESICS Box with Folding Lid': 'A21',
 'Multilaminate Stand up Pouch': 'There are actually two products with this as the original description, apologies:',
 'Elk Packaging': 'A35',
 'Alter Eco ': 'A4'}

#### Trial CASP003-01

In [47]:
observations_casp003 = pd.read_excel(FILEPATH_PDF, sheet_name=3)
observations_casp003.head(2)

Unnamed: 0,Trial ID,Trial Bag ID,Trial Bag Colour,Bag Set Detail,Item Description From Trial,Item Description Refined,Number of Items per bag,Initial Item Weight - Aggregate,Final Residual Weight - wet - aggregate,Weight units,Initial Per-Item Weight,Notes on Final Weight
0,CASP003-01,A1,Black,CFTP baseline,Plastic Cup,Fabrikal 16 oz PLA cold cup,1,15,0.0,grams,15.0,
1,CASP003-01,A2,Black,CFTP baseline,Plastic Cup,Fabrikal 16 oz PLA cold cup,1,15,0.0,grams,15.0,


In [48]:
# everything in blug bags was combined and impossible to separate
observations_casp003 = observations_casp003[~(observations_casp003['Trial Bag Colour']=="Blue")]

In [49]:
observations_casp003['Item ID'] = observations_casp003['Item Description Refined'].map(item2id)
observations_casp003.head(2)

Unnamed: 0,Trial ID,Trial Bag ID,Trial Bag Colour,Bag Set Detail,Item Description From Trial,Item Description Refined,Number of Items per bag,Initial Item Weight - Aggregate,Final Residual Weight - wet - aggregate,Weight units,Initial Per-Item Weight,Notes on Final Weight,Item ID
0,CASP003-01,A1,Black,CFTP baseline,Plastic Cup,Fabrikal 16 oz PLA cold cup,1,15,0.0,grams,15.0,,A11
1,CASP003-01,A2,Black,CFTP baseline,Plastic Cup,Fabrikal 16 oz PLA cold cup,1,15,0.0,grams,15.0,,A11


In [50]:
observations_casp003['Item ID'].isnull().sum()

6

In [51]:
unmatched = observations_casp003[observations_casp003['Item ID'].isnull()]['Item Description Refined'].str.strip().unique()
unmatched

array(['2-ply Kraft Control 10"x5"', 'BESICS 8oz Hot cup lid'],
      dtype=object)

In [52]:
observations_casp003['Item ID'] = np.where(observations_casp003['Item Description Refined'].str.strip().isin(unmatched),
                         observations_casp003['Item Description Refined'].str.strip().map(missing_items),
                         observations_casp003['Item ID'])

In [53]:
observations_casp003['Item ID'].isnull().sum()

0

In [54]:
unmatched = observations_casp003[observations_casp003['Item ID'].isnull()]['Item Description Refined'].str.strip().unique()
unmatched

array([], dtype=object)

In [55]:
items[items["Item Description Refined"].str.contains("BÉSICS® Lined Paper Box with Lid")]

Unnamed: 0,Item ID,Item Format,Brand,Manufacturer,Item SKU,Item Name,Item Description Refined,Item Description From Trial,Item ID.1,Material Class I,...,Certification @ time of testing,Kit,Initial Weight 1,Initial Weight 2,Initial Weight 3,"Average Initial Weight, g",Item Dimensions Compiled,"Item Capacity, mL",Notes,Start Weight
6,A7,Box,BÉSICS®,Seaside Paper,,Paper Tray with Lid,BÉSICS® Lined Paper Box with Lid,Lined paper food tray with lid,,Fiber,...,,Baseline,29.77,29.77,29.75,29.763333,4.75’’ x 6’’ x 3.25’’,,,29.763333


In [56]:
drop_cols = ["Item Description From Trial", "Item Description Refined"]
observations_casp003 = observations_casp003.drop(drop_cols, axis=1)

In [57]:
joined_casp003 = pd.merge(items_clean, observations_casp003, on="Item ID")
joined_casp003['% Residuals (Weight)'] = joined_casp003['Final Residual Weight - wet - aggregate']/(joined_casp003['Start Weight'] * joined_casp003['Number of Items per bag'])
joined_casp003['% Residuals (Area)'] = None
joined_casp003['Trial'] = joined_casp003['Trial ID']
joined_casp003.head(2)

Unnamed: 0,Item ID,Item Name,Item Description Refined,Material Class I,Material Class II,Material Class III,Start Weight,Trial ID,Trial Bag ID,Trial Bag Colour,Bag Set Detail,Number of Items per bag,Initial Item Weight - Aggregate,Final Residual Weight - wet - aggregate,Weight units,Initial Per-Item Weight,Notes on Final Weight,% Residuals (Weight),% Residuals (Area),Trial
0,A7,Paper Tray with Lid,BÉSICS® Lined Paper Box with Lid,Fiber,Lined Fiber,PLA-lined fibre,29.763333,CASP003-01,A1,Black,CFTP baseline,1,35,40.0,grams,35.0,,1.343935,,CASP003-01
1,A7,Paper Tray with Lid,BÉSICS® Lined Paper Box with Lid,Fiber,Lined Fiber,PLA-lined fibre,29.763333,CASP003-01,A2,Black,CFTP baseline,1,35,52.0,grams,35.0,,1.747116,,CASP003-01


In [58]:
joined_casp003 = joined_casp003[keep_cols]
joined_casp003.head(2)

Unnamed: 0,Trial,Item ID,Item Name,Item Description Refined,Material Class I,Material Class II,Material Class III,Start Weight,% Residuals (Weight),% Residuals (Area)
0,CASP003-01,A7,Paper Tray with Lid,BÉSICS® Lined Paper Box with Lid,Fiber,Lined Fiber,PLA-lined fibre,29.763333,1.343935,
1,CASP003-01,A7,Paper Tray with Lid,BÉSICS® Lined Paper Box with Lid,Fiber,Lined Fiber,PLA-lined fibre,29.763333,1.747116,


#### Trial WR003-01

In [59]:
observations_wr003 = pd.read_excel(FILEPATH_PDF, sheet_name=4)
observations_wr003.head(2)

Unnamed: 0,Trial ID,Trial Bag ID,Trial Bag Colour,Item ID,Item Description From Trial,Item Description Refined,Number of Items per bag,Fragments Found Y/N,Final Residual Weight - wet,Fragment size (L x W x H),Notes on Final Weight
0,OWR003-01,Blue Zip Tie #1,Blue,1D,CPLA utensil corn starch,WR3 - CPLA Spoon,1,Y,4.76,6x1.5x1,"good disintegration, in 3 fragments"
1,OWR003-01,Blue Zip Tie #1,Blue,1F,Coffee Bag 1,WR3 - Coffee Bag 1,1,Y,17.76,8x7x0.5,entire bag intact


In [60]:
observations_wr003['Item ID'] = observations_wr003['Item Description Refined'].map(item2id)
observations_wr003.head(2)

Unnamed: 0,Trial ID,Trial Bag ID,Trial Bag Colour,Item ID,Item Description From Trial,Item Description Refined,Number of Items per bag,Fragments Found Y/N,Final Residual Weight - wet,Fragment size (L x W x H),Notes on Final Weight
0,OWR003-01,Blue Zip Tie #1,Blue,A52,CPLA utensil corn starch,WR3 - CPLA Spoon,1,Y,4.76,6x1.5x1,"good disintegration, in 3 fragments"
1,OWR003-01,Blue Zip Tie #1,Blue,A48,Coffee Bag 1,WR3 - Coffee Bag 1,1,Y,17.76,8x7x0.5,entire bag intact


In [61]:
observations_wr003['Item ID'].isnull().sum()

7

In [62]:
observations_wr003[observations_wr003['Item ID'].isnull()]

Unnamed: 0,Trial ID,Trial Bag ID,Trial Bag Colour,Item ID,Item Description From Trial,Item Description Refined,Number of Items per bag,Fragments Found Y/N,Final Residual Weight - wet,Fragment size (L x W x H),Notes on Final Weight
5,OWR003-01,Blue Zip Tie #1,Blue,,Container (unlined),WR3 - Soup bowl,1,N,0.0,0,not detected
12,OWR003-01,Blue Zip Tie #2,Blue,,Container (unlined),WR3 - Soup bowl,1,N,0.0,0,not detected
20,OWR003-01,Blue Zip Tie #3,Blue,,Container (unlined),WR3 - Soup bowl,1,N,0.0,0,not detected
30,OWR003-01,Blue Zip Tie #4,Blue,,Container (unlined),WR3 - Soup bowl,1,N,0.0,0,not detected
36,OWR003-01,Blue Zip Tie #5,Blue,,Container (unlined),WR3 - Soup bowl,1,Y,8.0,5.5x3.5x1,2 fragments
45,OWR003-01,Blue Zip Tie #6,Blue,,Container (unlined),WR3 - Soup bowl,1,Y,5.41,6x1.5x1,1 fragment
113,OWR003-01,Red Zip Tie #1,Red,,Besics Box with Lid,BÉSICS® Lined Paper Box with Lid,1,N,0.0,0,not detected


In [63]:
unmatched = observations_wr003[observations_wr003['Item ID'].isnull()]['Item Description Refined'].str.strip().unique()
unmatched

array(['WR3 - Soup bowl', 'BÉSICS® Lined Paper Box with Lid'],
      dtype=object)

In [64]:
observations_wr003['Item ID'] = np.where(observations_wr003['Item Description Refined'].str.strip().isin(unmatched),
                         observations_wr003['Item Description Refined'].str.strip().map(missing_items),
                         observations_wr003['Item ID'])

In [65]:
unmatched = observations_wr003[observations_wr003['Item ID'].isnull()]['Item Description Refined'].str.strip().unique()
unmatched

array(['WR3 - Soup bowl', 'BÉSICS® Lined Paper Box with Lid'],
      dtype=object)

In [66]:
drop_cols = ["Item Description From Trial", "Item Description Refined"]
observations_wr003 = observations_wr003.drop(drop_cols, axis=1)

In [67]:
joined_wr003 = pd.merge(items_clean, observations_wr003, on="Item ID")
joined_wr003['% Residuals (Weight)'] = joined_wr003['Final Residual Weight - wet']/(joined_wr003['Start Weight'] * joined_wr003['Number of Items per bag'])
joined_wr003['% Residuals (Area)'] = None
joined_wr003['Trial'] = joined_wr003['Trial ID']
joined_wr003.head(2)

Unnamed: 0,Item ID,Item Name,Item Description Refined,Material Class I,Material Class II,Material Class III,Start Weight,Trial ID,Trial Bag ID,Trial Bag Colour,Number of Items per bag,Fragments Found Y/N,Final Residual Weight - wet,Fragment size (L x W x H),Notes on Final Weight,% Residuals (Weight),% Residuals (Area),Trial
0,A4,PLA-lined Bagasse Bowl 300mL,BÉSICS® 250mL PLA-lined Bagasse Leaf Bowl,Fiber,Lined Fiber,PLA lined Bagasse,10.88,OWR003-01,Red Zip Tie #2,Red,1,N,0.0,0,not detected,0.0,,OWR003-01
1,A4,PLA-lined Bagasse Bowl 300mL,BÉSICS® 250mL PLA-lined Bagasse Leaf Bowl,Fiber,Lined Fiber,PLA lined Bagasse,10.88,OWR003-01,Red Zip Tie #3,Red,1,N,0.0,0,not detected,0.0,,OWR003-01


In [68]:
joined_wr003 = joined_wr003[keep_cols]
joined_wr003.head(2)

Unnamed: 0,Trial,Item ID,Item Name,Item Description Refined,Material Class I,Material Class II,Material Class III,Start Weight,% Residuals (Weight),% Residuals (Area)
0,OWR003-01,A4,PLA-lined Bagasse Bowl 300mL,BÉSICS® 250mL PLA-lined Bagasse Leaf Bowl,Fiber,Lined Fiber,PLA lined Bagasse,10.88,0.0,
1,OWR003-01,A4,PLA-lined Bagasse Bowl 300mL,BÉSICS® 250mL PLA-lined Bagasse Leaf Bowl,Fiber,Lined Fiber,PLA lined Bagasse,10.88,0.0,


#### Trial CASP004-01

In [69]:
FILEPATH_PDF = DATA_FOLDER + "CASP004-01 - Results Pre-Processed for Analysis from PDF Tables.xlsx"

#### Items

In [70]:
items_casp004 = pd.read_excel(FILEPATH_PDF, sheet_name=2)
items_casp004.head(2)

Unnamed: 0,Org Id,Trial Id,Bag Colour,Bag Set,TrialBagType,Item Name,Brand/Manufacturer,Item Description,Material Composition,Certifications,Size,Weight 1,Weight 2,Weight 3,Weight (average),Weight units
0,44547,44547-01-21,Green,1-3,Standard,12 oz Hot cup / Soup bowl,BÉSICS®,12 oz Hot cup / Soup bowl,"Paper, PLA lining, adhesive, ink",BPI,"3’’ diameter, 2.5’’ H",8.12,8.1,8.12,8.11,grams
1,44547,44547-01-21,Green,1-3,Standard,Hot cup lid,BÉSICS®,Hot cup lid,CPLA based bio-polymer,BPI,"3.5’’ diameter, 0.5’’ H",3.58,3.58,3.56,3.57,grams


In [71]:
items_casp004 = items_casp004.drop_duplicates(subset=['Item Name'])
items_casp004.head(2)

Unnamed: 0,Org Id,Trial Id,Bag Colour,Bag Set,TrialBagType,Item Name,Brand/Manufacturer,Item Description,Material Composition,Certifications,Size,Weight 1,Weight 2,Weight 3,Weight (average),Weight units
0,44547,44547-01-21,Green,1-3,Standard,12 oz Hot cup / Soup bowl,BÉSICS®,12 oz Hot cup / Soup bowl,"Paper, PLA lining, adhesive, ink",BPI,"3’’ diameter, 2.5’’ H",8.12,8.1,8.12,8.11,grams
1,44547,44547-01-21,Green,1-3,Standard,Hot cup lid,BÉSICS®,Hot cup lid,CPLA based bio-polymer,BPI,"3.5’’ diameter, 0.5’’ H",3.58,3.58,3.56,3.57,grams


In [72]:
casp004_weights = items_casp004.set_index('Item Name')['Weight (average)'].to_dict()
casp004_weights

{'12 oz Hot cup / Soup bowl': 8.11,
 'Hot cup lid': 3.57,
 '16 oz PLA cold cup': 14.58,
 'Cutlery': 4.75,
 'PLA-lined fibre bowl, white ': 10.52,
 'Uncoated paper food tray ': 11.87,
 'Lined paper food tray with lid': 29.76,
 'Kraft control': 2.85,
 'Fiber Clamshell, Lined 9x9x3 SKU TO-SC-U9L-LF': 47.78,
 '16oz NoTree Paper Hot Cup SKU CU-SU-16': 13.52,
 '14oz PLA Cold Cup SKU CP-CS-14': 10.17,
 'PLA Lid: 32oz Burrito Bowl SKU BOL-CS-UBB': 15.14,
 'Fiber Cutlery, Spoon SP-FB-6-LF': 3.99,
 '3Gallon Food Scrap Bag BG-CS-3': 7.16,
 '8" Kraft straw ST-PA-8-K': 1.07,
 'TPLA Spoon SP-PS-6': 4.97,
 'Wrapper for TPLA Spoon SP-PS-I': 0.82,
 'Large brown bag': 12.72,
 'Small zippered clear colour bag': 4.83,
 'Printed small bag with brown background and logo': 4.57,
 'Large natural clear colour bag': 8.4}

#### Observations

In [73]:
observations_casp004 = pd.read_excel(FILEPATH_PDF, sheet_name=1)
observations_casp004.head(2)

Unnamed: 0,Org Id,Trial Id,Bag Colour,Bag Id,Stage,Weather,Moisture,Temp,Temp Units,Bulk Density,...,Bag Notes,Product Name,Photo,Fragment Size,Weight 1,Weight 2,Weight 3,Product Weight Avg,Weight Units,Product Notes
0,44547,44547-01-21,Green,A-1,Start,Windy,0.51,65-80,F,754 lbs/CY (manual) \n0.29 g/cc (lab),...,Top depth,12 oz Hot cup / Soup bowl,Y,"3’’ diameter, 2.5’’ H",8.12,8.1,8.12,8.11,grams,
1,44547,44547-01-21,Green,A-1,Start,Windy,0.51,65-80,F,754 lbs/CY (manual) \n0.29 g/cc (lab),...,Top depth,Hot cup lid,Y,"3.5’’ diameter, 0.5’’ H",3.58,3.58,3.56,3.57,grams,


In [74]:
observations_casp004['Start Weight'] = observations_casp004['Product Name'].map(casp004_weights)
observations_casp004.head(2)

Unnamed: 0,Org Id,Trial Id,Bag Colour,Bag Id,Stage,Weather,Moisture,Temp,Temp Units,Bulk Density,...,Product Name,Photo,Fragment Size,Weight 1,Weight 2,Weight 3,Product Weight Avg,Weight Units,Product Notes,Start Weight
0,44547,44547-01-21,Green,A-1,Start,Windy,0.51,65-80,F,754 lbs/CY (manual) \n0.29 g/cc (lab),...,12 oz Hot cup / Soup bowl,Y,"3’’ diameter, 2.5’’ H",8.12,8.1,8.12,8.11,grams,,8.11
1,44547,44547-01-21,Green,A-1,Start,Windy,0.51,65-80,F,754 lbs/CY (manual) \n0.29 g/cc (lab),...,Hot cup lid,Y,"3.5’’ diameter, 0.5’’ H",3.58,3.58,3.56,3.57,grams,,3.57


In [75]:
# These bags were not found
observations_casp004 = observations_casp004[
    (observations_casp004['Stage'] == 'End') &
    (~observations_casp004['Bag Id'].isin(['A-5', 'A-6']))
]

In [76]:
observations_casp004['End Weight'] = observations_casp004[['Weight 1', 'Weight 2', 'Weight 3']].mean(axis=1)
observations_casp004['End Weight'] = observations_casp004['End Weight'].fillna(0)

In [77]:
observations_casp004['Item ID'] = observations_casp004['Product Name'].map(item2id)

In [78]:
for obs in observations_casp004[observations_casp004['Item ID'].isnull()]['Product Name'].unique():
    print(obs)

12 oz Hot cup / Soup bowl
Hot cup lid
16 oz PLA cold cup
Cutlery
PLA-lined fibre bowl, white 
Uncoated paper food tray 
Lined paper food tray with lid
Kraft control
16oz NoTree Paper Hot Cup SKU CU-SU-16
14oz PLA Cold Cup SKU CP-CS-14
Fiber Cutlery, Spoon SP-FB-6-LF
3Gallon Food Scrap Bag BG-CS-3
TPLA Spoon SP-PS-6
Large brown bag


In [79]:
unmatched = observations_casp004[observations_casp004['Item ID'].isnull()]['Product Name'].str.strip().unique()
unmatched

array(['12 oz Hot cup / Soup bowl', 'Hot cup lid', '16 oz PLA cold cup',
       'Cutlery', 'PLA-lined fibre bowl, white',
       'Uncoated paper food tray', 'Lined paper food tray with lid',
       'Kraft control', '16oz NoTree Paper Hot Cup SKU CU-SU-16',
       '14oz PLA Cold Cup SKU CP-CS-14',
       'Fiber Cutlery, Spoon SP-FB-6-LF',
       '3Gallon Food Scrap Bag BG-CS-3', 'TPLA Spoon SP-PS-6',
       'Large brown bag'], dtype=object)

In [80]:
observations_casp004['Item ID'] = np.where(observations_casp004['Product Name'].str.strip().isin(unmatched),
                         observations_casp004['Product Name'].str.strip().map(missing_items),
                         observations_casp004['Item ID'])

In [81]:
observations_casp004['Item ID'].isnull().sum()

0

In [82]:
observations_casp004['Trial'] = observations_casp004['Trial Id']
observations_casp004['% Residuals (Area)'] = None
observations_casp004['% Residuals (Weight)'] = observations_casp004['End Weight']/observations_casp004['Start Weight']
observations_casp004.head(2)

Unnamed: 0,Org Id,Trial Id,Bag Colour,Bag Id,Stage,Weather,Moisture,Temp,Temp Units,Bulk Density,...,Weight 3,Product Weight Avg,Weight Units,Product Notes,Start Weight,End Weight,Item ID,Trial,% Residuals (Area),% Residuals (Weight)
276,44547,44547-01-21,Green,A-1,End,Windy,0.51,65-80,F,754 lbs/CY (manual) \n0.29 g/cc (lab),...,,,,,8.11,0.0,A6,44547-01-21,,0.0
277,44547,44547-01-21,Green,A-1,End,Windy,0.51,65-80,F,754 lbs/CY (manual) \n0.29 g/cc (lab),...,,,,,3.57,0.0,A7,44547-01-21,,0.0


In [83]:
observations_casp004 = observations_casp004.drop(["Start Weight"], axis=1) # workaround for including this based on CASP data
joined_casp004 = pd.merge(items_clean, observations_casp004, on="Item ID")
joined_casp004.head(2)

Unnamed: 0,Item ID,Item Name,Item Description Refined,Material Class I,Material Class II,Material Class III,Start Weight,Org Id,Trial Id,Bag Colour,...,Weight 1,Weight 2,Weight 3,Product Weight Avg,Weight Units,Product Notes,End Weight,Trial,% Residuals (Area),% Residuals (Weight)
0,A1,PLA-lined Paper Bowl 12oz,BÉSICS® 12 oz Soup bowl,Fiber,Lined Fiber,PLA lined Paper,8.113333,44547,44547-01-21,Pink,...,24.46,24.5,24.5,,,,24.486667,44547-01-21,,1.811144
1,A1,PLA-lined Paper Bowl 12oz,BÉSICS® 12 oz Soup bowl,Fiber,Lined Fiber,PLA lined Paper,8.113333,44547,44547-01-21,Pink,...,49.0,47.55,47.63,,,,48.06,44547-01-21,,3.554734


In [84]:
joined_casp004 = joined_casp004[keep_cols]
joined_casp004.head(2)

Unnamed: 0,Trial,Item ID,Item Name,Item Description Refined,Material Class I,Material Class II,Material Class III,Start Weight,% Residuals (Weight),% Residuals (Area)
0,44547-01-21,A1,PLA-lined Paper Bowl 12oz,BÉSICS® 12 oz Soup bowl,Fiber,Lined Fiber,PLA lined Paper,8.113333,1.811144,
1,44547-01-21,A1,PLA-lined Paper Bowl 12oz,BÉSICS® 12 oz Soup bowl,Fiber,Lined Fiber,PLA lined Paper,8.113333,3.554734,


In [85]:
joined_casp004 = joined_casp004[~(joined_casp004['% Residuals (Weight)'] > 10)]

### Join All Trials

In [86]:
# missing casp004, casp003
joined = pd.concat((joined_cl, joined_ad001, joined_wr001, joined_casp001, joined_wr003, joined_casp003, joined_casp004))

  joined = pd.concat((joined_cl, joined_ad001, joined_wr001, joined_casp001, joined_wr003, joined_casp003, joined_casp004))


In [87]:
len(joined), len(joined_cl), len(joined_ad001), len(joined_wr001), len(joined_casp001)

(1324, 749, 94, 102, 85)

In [88]:
joined.head(2)

Unnamed: 0,Trial,Item ID,Item Name,Item Description Refined,Material Class I,Material Class II,Material Class III,Start Weight,% Residuals (Weight),% Residuals (Area)
0,Facility 1 ( Windrow),K,CPLA Hot Cup Lid,BÉSICS® 8oz CPLA Hot cup lid,Biopolymer,Rigid Biopolymer (< 0.75mm),CPLA,2.5,0.766,0.36
1,Facility 1 ( Windrow),K,CPLA Hot Cup Lid,BÉSICS® 8oz CPLA Hot cup lid,Biopolymer,Rigid Biopolymer (< 0.75mm),CPLA,2.5,0.566,0.236


In [89]:
joined[joined['Material Class II']=="Rigid biopolymer"]["Item Description Refined"].unique()

array(['TPLA Spoon - World Centric', 'D&W PLA Lid 32oz',
       'Fabrikal PLA Cold Cup 20oz', 'MPLA Spoon - NaturTec',
       'Fabrikal 16 oz PLA cold cup', 'PLA Cold Cup 14oz'], dtype=object)

In [90]:
joined = joined[~(joined["Material Class II"]=="Mixed Materials")]

In [91]:
# joined[joined['Material Class II'] == "Biopolymer Film/Bag"]
joined[joined['% Residuals (Weight)'] > 3]

Unnamed: 0,Trial,Item ID,Item Name,Item Description Refined,Material Class I,Material Class II,Material Class III,Start Weight,% Residuals (Weight),% Residuals (Area)
6,OWR003-01,A6,BÉSICS® Baseline Containers,BÉSICS® Box with Lid AND Fry Tray,Fiber,Lined Fiber,Lined Tree Fiber and Unlined Fiber,5.603333,9.057109,
7,OWR003-01,A6,BÉSICS® Baseline Containers,BÉSICS® Box with Lid AND Fry Tray,Fiber,Lined Fiber,Lined Tree Fiber and Unlined Fiber,5.603333,4.120761,
9,OWR003-01,A6,BÉSICS® Baseline Containers,BÉSICS® Box with Lid AND Fry Tray,Fiber,Lined Fiber,Lined Tree Fiber and Unlined Fiber,5.603333,4.215348,
1,44547-01-21,A1,PLA-lined Paper Bowl 12oz,BÉSICS® 12 oz Soup bowl,Fiber,Lined Fiber,PLA lined Paper,8.113333,3.554734,
28,44547-01-21,A21,Fiber Bowl 16oz,Moulded fiber bowl 16oz,Fiber,Unlined Fiber,Unlined Molded Fiber (mixed fibers),17.053333,4.493504,


In [92]:
joined = joined[~(joined["Item Name"] == "Multi-laminate stand-up pounch with zipper")]

### Create Visualizations

In [93]:
import plotly.graph_objects as go
import numpy as np
import matplotlib.colors as mcolors

class2color = {
    'Positive Control': '#70AD47',
    'Mixed Materials': '#48646A',
    'Fiber': '#298FC2',
    'Biopolymer': '#FFB600',
}

In [94]:
joined['Material Class I'].unique(), joined['Material Class II'].unique()

(array(['Biopolymer', 'Fiber', 'Positive Control'], dtype=object),
 array(['Rigid Biopolymer (< 0.75mm)', 'Unlined Fiber', 'Lined Fiber',
        'Biopolymer Film/Bag', 'Rigid biopolymer',
        'Rigid Biopolymer (> 0.75mm)', 'Foam biopolymer',
        'Positive Control - Fiber'], dtype=object))

In [95]:
class_I_order = ['Fiber', 'Biopolymer', 'Mixed Materials', 'Positive Control']
class_II_order = ['Unlined Fiber', 'Lined Fiber', 'Biopolymer Film/Bag', 'Rigid Biopolymer (> 0.75mm)', 'Rigid Biopolymer (< 0.75mm)', 'Positive Control - Fiber', 'Positive Control - Film', 'Positive Control - Food Scraps']

In [96]:
def box_and_whisker(df_input, column, class_I=None, cap=False, height=800, width=1000, save=False):
    df = df_input.copy() # prevent modifying actual dataframe
    
    data = []
    x_labels = []

    if cap:
        df[column] = df[column].clip(upper=1)
    if class_I:
        df = df[df['Material Class I'] == class_I]
        
    max_value = df[column].max()
    max_value = max(100, max_value)

    for class_II in class_II_order:
        group = df[df['Material Class II'] == class_II]
        if not group.empty:
            count = group[column].count()
            class_I_name = group['Material Class I'].iloc[0]
            color = class2color.get(class_I_name, '#000')
            trace = go.Box(y=group[column], name=class_II, boxpoints='outliers', marker_color=color, width=.3)
            data.append(trace)
            x_labels.append(f"     {class_II}<br>     n={count}")

    y_axis_title = f"{column}"
    if cap:
        y_axis_title += " Capped"

    layout = go.Layout( 
        title_font=dict(size=14, family='Roboto'),
        font=dict(family='Roboto', size=11),
        height=height,
        width=width,
        showlegend=False,
        xaxis=dict(
            tickmode='array',
            tickvals=list(range(len(x_labels))),
            ticktext=x_labels,
            title_font=dict(size=14),
            tickfont=dict(size=11),
            tickangle=90
        ),
        yaxis=dict(
            title=y_axis_title,
            tickformat=".0%",
            tickmode='array',
            tickvals=np.arange(0, max_value, 0.25),  # Adjust this range if your data is not percentage-based
            title_font=dict(size=16),
            tickfont=dict(size=9),
            rangemode="tozero"
        ),
    )

    fig = go.Figure(data=data, layout=layout)

    if save:
        filepath = column.replace(" ","_") + "_box_and_whisker"

        if cap:
            filepath += "_capped"

        filepath += ".png"
        fig.write_image(filepath)

    fig.show()


In [97]:
def residuals_bar(df_input, class_I=None, cap=False, height=800, width=1000, save=False):
    df = df_input.copy() # prevent modifying actual dataframe
    
    # Create weight columns
    column = '% Residuals (Weight)'
    df['End Weight'] = df[column] * df['Start Weight']

    data = []
    x_labels = []

    if cap:
        df[column] = df[column].clip(upper=1)
    if class_I:
        df = df[df['Material Class I'] == class_I]

    # Weird setup to handle null start weights and still plot correctly
    filtered_class_II_order = [class_II for class_II in class_II_order if not df[df['Material Class II'] == class_II]['Start Weight'].isna().all()]
    x_positions = np.arange(len(filtered_class_II_order))  # Numeric x-axis positions

    for i, class_II in enumerate(filtered_class_II_order):
        group = df[df['Material Class II'] == class_II]
        if not group.empty:
            count = group[column].count()
            class_I_name = group['Material Class I'].iloc[0]
            color = class2color.get(class_I_name, '#000')
            if not group['Start Weight'].isna().all():
                trace_start = go.Bar(x=[x_positions[i] - 0.2], y=[group['Start Weight'].sum()], marker_color=color, width=.3)
                trace_end = go.Bar(x=[x_positions[i] + 0.2], y=[group['End Weight'].sum()], marker_color=color, width=.3, opacity=.6)
                data.append(trace_start)
                data.append(trace_end)
                x_labels.append(f"     {class_II}<br>     n={count}")

    layout = go.Layout(
        barmode='group',
        title_font=dict(size=14, family='Roboto'),
        font=dict(family='Roboto', size=11),
        height=height,
        width=width,
        showlegend=False,
        xaxis=dict(
            tickmode='array',
            tickvals=list(range(len(x_labels))),
            ticktext=x_labels,
            title_font=dict(size=14),
            tickfont=dict(size=11),
            tickangle=90
        ),
        yaxis=dict(
            title="Total Weight in Grams (Start and End)",
            # tickformat=".0%",  # Formats the tick labels as percentages
            tickmode='array',
            title_font=dict(size=16),
            tickfont=dict(size=9),
        ),
    )

    fig = go.Figure(data=data, layout=layout)
    fig.show()

    if save:
        filepath = column.replace(" ","_") + "_double_bar"

        if cap:
            filepath += "_capped"

        filepath += ".png"
        fig.write_image(filepath)

In [102]:
def mean_residuals_bar(df_input, column, class_I=None, cap=False, height=800, width=1000, save=False):
    df = df_input.copy()  # prevent modifying actual dataframe

    if cap:
        df[column] = df[column].clip(upper=1)
    if class_I:
        df = df[df['Material Class I'] == class_I]

    data = []
    x_labels = []
    x_positions = np.arange(len(class_II_order))  # Numeric x-axis positions

    max_value = df[column].max()
    max_value = max(100, max_value)

    for i, class_II in enumerate(class_II_order):
        group = df[df['Material Class II'] == class_II]
        if not group.empty:
            mean_residual = group[column].mean()
            class_I_name = group['Material Class I'].iloc[0]
            color = class2color.get(class_I_name, '#000')
            formatted_text = f"{mean_residual:.0%}"
            trace = go.Bar(x=[x_positions[i]], y=[mean_residual], marker_color=color, name=class_II, width=.3,
                        text=formatted_text, textposition='outside', textfont=dict(size=14))
            data.append(trace)
            x_labels.append(f"     {class_II}<br>     n={len(group)}")

    y_axis_title = f"Mean {column}"
    if cap:
        y_axis_title += " Capped"

    layout = go.Layout(
        barmode='group',
        title_font=dict(size=14, family='Roboto'),
        font=dict(family='Roboto', size=11),
        height=height,
        width=width,
        showlegend=False,
        xaxis=dict(
            tickmode='array',
            tickvals=x_positions,
            ticktext=x_labels,
            title_font=dict(size=14),
            tickfont=dict(size=11),
            tickangle=90,
            title_standoff=25
        ),
        yaxis=dict(
            title=y_axis_title,
            tickformat=".0%",
            tickvals=list(range(len(x_labels))),
            range=[0,1],
            tickmode='array',
            title_font=dict(size=16),
            tickfont=dict(size=9),
        ),
    )

    fig = go.Figure(data=data, layout=layout)
    fig.show()

    if save:
        filepath = column.replace(" ","_") + "_bar"

        if cap:
            filepath += "_capped"

        filepath += ".png"
        fig.write_image(filepath)

In [99]:
joined['Material Class I'].unique()

array(['Biopolymer', 'Fiber', 'Positive Control'], dtype=object)

In [100]:
# HIGHEST PRIORITY
# Start/End weight
# Mean bar graph, material category 1 without "mixed materials"
# weight
# surface area
# Boxplot, material category 1 without "mixed materials"
# weight
# surface area
# SECONDARY PRIORITY
# The 2 mean bar graphs and 2 boxplots with >100% residuals capped at 100%

In [103]:
column = '% Residuals (Weight)'
box_and_whisker(joined, column, width=1200, save=True)
# residuals_bar(joined, width=1200, save=True)
mean_residuals_bar(joined, column, width=1200, save=True)

box_and_whisker(joined, column, width=1200, cap=True, save=True)
# residuals_bar(joined, width=1200, cap=True, save=True)
mean_residuals_bar(joined, column, width=1200, cap=True, save=True)

In [104]:
column = '% Residuals (Area)'
box_and_whisker(joined, column, width=1200, save=True)
mean_residuals_bar(joined, column, width=1200, save=True)

box_and_whisker(joined, column, width=1200, cap=True, save=True)
mean_residuals_bar(joined, column, width=1200, cap=True, save=True)