In [169]:
import pandas as pd
from thefuzz import fuzz
import re

In [170]:
def remove_special_chars_except_alnum_parentheses(s):
    # Regular expression to match all special characters at the beginning or end of the string
    pattern = r'^[^a-zA-Z0-9(]+|[^a-zA-Z0-9)]+$'
    return re.sub(pattern, '', s)

def filter_strings_with_parentheses(strings_set):
    filtered_strings = [s for s in strings_set if '(' in s and ')' in s]
    return filtered_strings

def split_string_with_parentheses(input_string):
    # Find the index of '(' and ')'
    start_index = input_string.find('(')
    end_index = input_string.find(')')

    if start_index != -1 and end_index != -1 and end_index > start_index:
        first = input_string[:start_index].strip() + input_string[end_index + 1:]
        second = input_string[start_index + 1:end_index] + input_string[end_index + 1:]
        
        return first.strip(), second.strip()
    else:
        return None, None
    
def filter_strings_with_slash(strings_set):
    filtered_strings = [s for s in strings_set if '/' in s]
    return filtered_strings    


def split_string_with_slash(s):
    parts = re.split(r'[/()]', s)
    cleaned_parts = [part.strip() for part in parts if part.strip()]
    return cleaned_parts
    # 3 scenarios: 1 - only one '/'; 2 - more than 1 '/'; 3 - '()' and '/'

In [374]:
harmful_ingredients = {"Hydroquinone", 
                       "Retinol", "Retinoid","retinoic acid", "retinyl palmitate", "retinaldehyde", "adapalene", 
                       "tretinoin", "tazarotene", "isotretinoin",
                       "Salicylic Acid", "Beta Hydroxy Acid", "Alpha Hydroxy Acid", "salicylate", "sodium salicylate",
                       "willow extract", "Tropic acid", "Trethocanic acid",
                       "dihydroxyacetone", "DHA",
                       "Phthalates", "PAEs", "dibutylphthalate", "DBP", "dimethylphthalate", "DMP", "diethylphthalate",
                       "DEP", "diethyl", "dibutyl", "benzylbutyl phthalate", "Paraben","phthalate",
                       "Formaldehyde","Phenoxyethanol", "quaternium-15", "DMDM", "hydantoin", "formalin", 
                       "methylene glycol", "ethylene oxide", "benzyl benzoate",
                       "Toluene", 
                       "Thioglycolic acid", 
                       "A-aldehyde", "Retinaldehyde", "A-alcohol", "A-ester", "Retinyl palmitate", "A-acid", 
                       "Salicylic acid", "Growth hormone", "Benzene peroxide", "Tetracycline",
                       "PROPYLENE GLYCOL", "Octinoxate", "PARABENS", "SODIUM BENZOATE", "DIMETHICONE", "BHA",
                       "TRICLOSAN", "BHT", "FORMALDEHYDE", "BPA","Magnesium hydroxide", "GLYCERIN", "PETROLEUM JELLY" 
                      };
doubleful_ingredients = {"Polyacrylamide", "Acrylamide", 
                         "Polyacrylate", "Polyquaternium", "Acrylate","ethanol","PEG","Polyethylene Glycol",
                         "Fragrance","fragrance (parfum)"}

In [375]:
red_list = {x.lower() for x in harmful_ingredients}
amber_list = {x.lower() for x in doubleful_ingredients}

In [376]:
red_list

{'a-acid',
 'a-alcohol',
 'a-aldehyde',
 'a-ester',
 'adapalene',
 'alpha hydroxy acid',
 'benzene peroxide',
 'benzyl benzoate',
 'benzylbutyl phthalate',
 'beta hydroxy acid',
 'bha',
 'bht',
 'bpa',
 'dbp',
 'dep',
 'dha',
 'dibutyl',
 'dibutylphthalate',
 'diethyl',
 'diethylphthalate',
 'dihydroxyacetone',
 'dimethicone',
 'dimethylphthalate',
 'dmdm',
 'dmp',
 'ethylene oxide',
 'formaldehyde',
 'formalin',
 'glycerin',
 'growth hormone',
 'hydantoin',
 'hydroquinone',
 'isotretinoin',
 'magnesium hydroxide',
 'methylene glycol',
 'octinoxate',
 'paes',
 'paraben',
 'parabens',
 'petroleum jelly',
 'phenoxyethanol',
 'phthalate',
 'phthalates',
 'propylene glycol',
 'quaternium-15',
 'retinaldehyde',
 'retinoic acid',
 'retinoid',
 'retinol',
 'retinyl palmitate',
 'salicylate',
 'salicylic acid',
 'sodium benzoate',
 'sodium salicylate',
 'tazarotene',
 'tetracycline',
 'thioglycolic acid',
 'toluene',
 'trethocanic acid',
 'tretinoin',
 'triclosan',
 'tropic acid',
 'willow ext

In [377]:
safe_products = {''}
amber_products = {''}
red_products = {''}

In [378]:
amber_list

{'acrylamide',
 'acrylate',
 'ethanol',
 'fragrance',
 'fragrance (parfum)',
 'peg',
 'polyacrylamide',
 'polyacrylate',
 'polyethylene glycol',
 'polyquaternium'}

In [379]:
green_list = {'water','aqua'}

In [380]:
def get_the_input_list(input_string):
    # convert the long string to list of components
    input_list = re.split(r'[,･.·•]', input_string)
    if len(input_list) <= 1:
        print('Cannot Recognise the Split Symbol in the Ingredients List')
        raise
    # remove empty strings
    input_list = {s for s in input_list if any(c.isalpha() for c in s)}
    # remove the space or special character fron each string
    # convert the components to lower characters
    input_list = {remove_special_chars_except_alnum_parentheses(x).lower() for x in input_list}
    # filter components with ()
    # for components with (), 
    ## seperate the components into 2 equivalent components - part 1: deal with anything in front of () + the rest
    ## - part 2: content in () + the rest 
    inputs_with_parentheses = filter_strings_with_parentheses(input_list)
    # add the parentheses splitted inputs into the input_list.
    for input_with_parentheses in inputs_with_parentheses:
        part1, part2 = split_string_with_parentheses(input_with_parentheses)
        input_list.update({part1, part2})
    # filter components with /
    # for components with /, 
    ## seperate the components into parts that seperated by /
    inputs_with_slash = filter_strings_with_slash(input_list)
    # add the parentheses splitted inputs into the input_list.
    for input_with_slash in inputs_with_slash:
        parts = split_string_with_slash(input_with_slash)
        input_list.update(set(parts))   
    ## for each of the seperated words pair, intersect with red and amber list and return intersected words.
    return input_list

def get_the_red_amber_list(input_list):
    red = input_list.intersection(red_list)
    amber = input_list.intersection(amber_list)
    return red, amber
def update_the_green_list(input_list, red_list, amber_list):
    input_list.difference_update(red_list)
    input_list.difference_update(amber_list)
    green_list.update(input_list)

In [381]:
def process_product_ingredients(product_name:str, input_string:str):
    input_list = get_the_input_list(input_string)
    
    product_name = product_name.lower()

    red, amber = get_the_red_amber_list(input_list)

    if len(red)==0 and len(amber)==0:
        safe_products.update({product_name})
        print("all ingredients in %s are safe" % product_name)
    elif len(amber) > 0 and len(red)==0:
        amber_products.update({product_name})
        print("The doubltful ingredients in %s are %s" % (product_name, ','.join(amber)))
    elif len(amber) == 0 and len(red)>0:
        red_products.update({product_name})
        print("The harmful ingredients in %s are %s" % (product_name, ','.join(red)))
    else:
        red_products.update({product_name})
        amber_products.update({product_name})
        print("The harmful ingredients in %s are %s" % (product_name, ','.join(red)))
        print("The doubltful ingredients in %s are %s" % (product_name, ','.join(amber)))
        
    update_the_green_list(input_list, red, amber)
    
    return input_list, red, amber

In [382]:
product_name = "NARS LIGHT REFLECTING SETTING POWDER - PRESSED"
input_string = "SILICA · METHYL METHACRYLATE CROSSPOLYMER · DIMETHICONE · LAUROYL LYSINE · TRIMETHYLSILOXYSILICATE · GLYCERIN · TOCOPHERYL ACETATE · ASCOPHYLLUM NODOSUM EXTRACT · SEA WATER/MARIS AQUA/EAU DE MER · ETHYLHEXYLGLYCERIN · MAGNESIUM SILICATE · POLYSILICONE-2 · WATER/AQUA/EAU · BARIUM SULFATE · ALUMINA · CITRIC ACID · CHLORPHENESIN · PHENOXYETHANOL · POTASSIUM SORBATE ·  MICA · TITANIUM DIOXIDE (CI 77891) · IRON OXIDES (CI 77491) · IRON OXIDES (CI 77492) · IRON OXIDES (CI 77499)"
_all, red, amber = process_product_ingredients(product_name, input_string)

The harmful ingredients in nars light reflecting setting powder - pressed are glycerin,phenoxyethanol,dimethicone


In [383]:
product_name = "Nars LIGHT REFLECTING ADVANCED SKINCARE FOUNDATION"
input_string = "WATER/AQUA/EAU · C9-12 ALKANE · BUTYLENE GLYCOL · UNDECANE · TRIDECANE · ISODECYL NEOPENTANOATE · HYDROGENATED POLYISOBUTENE · POLYGLYCERYL-6 POLYRICINOLEATE · POLYGLYCERYL-2 DIISOSTEARATE · DISTEARDIMONIUM HECTORITE · DIISOSTEARYL MALATE · GLYCERIN · SYNTHETIC WAX · SODIUM CHLORIDE · OPHIOPOGON JAPONICUS ROOT EXTRACT · ASCOPHYLLUM NODOSUM EXTRACT · THEOBROMA CACAO (COCOA) SEED EXTRACT · CURCUMA LONGA (TURMERIC) ROOT EXTRACT · SILYBUM MARIANUM FRUIT EXTRACT · ALUMINUM HYDROXIDE · MAGNESIUM CHLORIDE · SODIUM DILAURAMIDOGLUTAMIDE LYSINE · TOCOPHEROL · COCO-CAPRYLATE/CAPRATE · ETHYLHEXYLGLYCERIN · PENTYLENE GLYCOL · HYDROXYPHENYL PROPAMIDOBENZOIC ACID · ASCORBYL PALMITATE · CITRIC ACID · SEA WATER/MARIS AQUA/EAU DE MER · TRISODIUM EDTA · BARIUM SULFATE · ALUMINA · CHLORPHENESIN · PHENOXYETHANOL · POTASSIUM SORBATE · IRON OXIDES (CI 77491, CI 77492, CI 77499) · MICA · TITANIUM DIOXIDE (CI 77891)"
_all, red, amber = process_product_ingredients(product_name, input_string)

The harmful ingredients in nars light reflecting advanced skincare foundation are glycerin,phenoxyethanol


In [384]:
product_name = "NATURIE HATOMUGI SKIN CONDITIONER LOTION"
input_string2022 = "Water, DPG, BG, Glycerin, Eyelet Extract, 2K Glycyrrhizinate, (Acrylates/Ethylhexyl Acrylate) Copolymer, Citric Acid, Sodium Citric Acid, Methylparaben"
input_string2021 =  "Water, DPG, BG, Glycerin, Glycyrrhizic acid 2K, Hatomugi extract, (styrene / Acrylate) copolymer, Ethanol, Citric acid, Na citrate, Methylparaben, Proxparaben"
_all, red, amber = process_product_ingredients(product_name, input_string2022)

The harmful ingredients in naturie hatomugi skin conditioner lotion are glycerin


In [385]:
product_name = "THREE Flawless Ethereal Fluid Foundation"
input_string = "Water, cyclopentasiloxane, BG, caprylic/capric propanediol, talc, propanediol, zinc oxide, starch octenylsuccinate Al, squalane, sorbitan sesquiisostearate, PEG-10 dimethicone, argania spinosa kernel oil, Shea butter, tea seed oil, jojoba seed oil, beeswax, rose canina fruit oil, evening primrose oil, pulquenetia volubilis seed oil, polyhydroxystearic acid, methyl methacrylate crosspolymer, sorbitan sesquioleate, phenoxyethanol, silica, alumina, glycerin , sodium chloride, stearic acid, Al dimyristate, tocopherol, di(phytosteryl/octyldodecyl) lauroyl glutamate, mica, synthetic fluorophlogopite, synthetic iron phlogopite, lauroyl lysine, acrylates copolymer, disodium stearoyl glutamate, hydrogen dimethicone , titanium oxide, iron oxide, aluminum hydroxide"
_all, red, amber = process_product_ingredients(product_name, input_string)

The harmful ingredients in three flawless ethereal fluid foundation are glycerin,phenoxyethanol


In [356]:
product_name = "Ren Evercalm Global Protection Day Cream"
input_string = "Aqua (Water), Cetearyl Alcohol, Camellia Oleifera Seed Oil, Cetearyl Ethylhexanoate, Myristyl Myristate, Sesamum Indicum (Sesame) Seed Oil, Glycerin, Butyrospermum Parkii (Shea) Butter, Caprylic/Capric Triglyceride, Triheptanoin, Cetearyl Glucoside, Myristyl Laurate, Oryzanol, Helianthus Annuus (Sunflower) Seed Oil, Ethylhexylglycerin, Vaccinium Macrocarpon (Cranberry) Seed Oil, Bisabolol, Ribes Nigrum (Black Currant) Seed Oil, Carbomer, Hippophae Rhamnoides Fruit Extract, Citrus Nobilis (Mandarin Orange) Peel Oil, Tocopherol, Anthemis Nobilis Flower Oil, Cinnamomum Camphora Linalloliferum (Ho Wood) Leaf Oil, Pelargonium Graveolens Flower Oil, Phenoxyethanol, Sodium Dehydroacetate, Pueraria Lobata (Kudzu) Symbiosome Extract, Laminaria Ochroleuca Extract, Cassia Alata Leaf Extract, Calendula Officinalis Flower Extract, Glucose, Parfum* (Fragrance), Rosmarinus Officinalis (Rosemary) Leaf Extract, Lactic Acid, Sodium Hydroxide, Citronellol, Geraniol, Limonene, Linalool"
_all, red, amber = process_product_ingredients(product_name, input_string)

The harmful ingredients in ren evercalm global protection day cream are phenoxyethanol
The doubltful ingredients in ren evercalm global protection day cream are fragrance


In [357]:
input_string = "Aqua (Water), Glycerin, Cetearyl Alcohol, Caprylyl Caprylate/Caprate, Olus Oil, Lactobacillus Ferment, Butyrospermum, Parkii (Shea) Butter, Helianthus Annus (Sunflower) Seed Wax, Simmondsia Chinesis (Jojoba) Seed Oil, Cetearyl Glucoside, Propanediol, Algae Extract, Cetyl Alcohol, Lactobacillus, Alpha-Glucan Oligosaccharide, Parfum* (Fragrance), Tocopheryl Acetate, Caprylic/Capric Triglyceride Panthenol, Carbomer, Vaccinium Vitas-Idaea (Lingonberry) Seed Oil, Xanthan Gum, Arnica Montana Flower Extract, Camelina Sativa Seed Oil, Cocus Nucifera (Coconut) Fruit Extract, Tocopherol, Magnesium Carboxymethyl Beta-Glucan, Malachite Extract, Albatrellus Ovinus Extract, Laminaria Ochroleuca Extract, Glucose, Phenoxyethanol, Helianthus Annuus (Sunflower) Seed Oil, Citric Acid, Sodium Hydroxide, Rosmarinus Officinalis Leat Extract, Citronellol, Geraniol, Limonene, Linalool"
product_name = "Ren Evercalm Ultra Comforting Rescue Mask"
_all, red, amber = process_product_ingredients(product_name, input_string)

The harmful ingredients in ren evercalm ultra comforting rescue mask are phenoxyethanol
The doubltful ingredients in ren evercalm ultra comforting rescue mask are fragrance


In [358]:
product_name = "Mustela Hydra Bebe Body Lotion"
input_string = "AQUA/WATER/EAU, HELIANTHUS ANNUUS (SUNFLOWER) SEED OIL, GLYCERIN, POLYGLYCERYL 6 DISTEARATE, CETYL ALCOHOL, 1,2 HEXANEDIOL, CERA ALBA/BEESWAX/CIRE D’ABEILLE, HYDROXYETHYL ACRYLATE/SODIUM ACRYLOYLDIMETHYL TAURATE COPOLYMER, PARFUM (FRAGRANCE), JOJOBA ESTERS, CAPRYLYL GLYCOL, SODIUM STEAROYL GLUTAMATE, POLYGLYCERYL3 BEESWAX/ POLYGLYCERYL 3 CIRE D’ABEILLE, TOCOPHERYL ACETATE, POLYSORBATE 60, SORBITAN ISOSTEARATE, CITRIC ACID, PERSEA GRATISSIMA (AVOCADO) FRUIT EXTRACT"
_all, red, amber = process_product_ingredients(product_name, input_string)

The doubltful ingredients in mustela hydra bebe body lotion are fragrance


In [359]:
input_string = "Cetyl Ethylhexanoate, Butylene Glycol Diisononanoate, Polyglyceryl-10 Diisostearate, Polyglyceryl-20 Hexacaprylate, Polyglyceryl-20 Octaisononanoate, Glycerin, Tridecane,Dicaprylyl Ether, Limnanthes Alba (Meadowfoam) Seed Oil, Glyceryl Behenate/Eicosadioate, Diphenylsiloxy Phenyl Trimethicone, Diglycerin, Humulus Lupulus (Hops) Extract, Camellia Sinensis Leaf Extract, Rubus Ellipticus Root Extract, Phytosteryl/Isostearyl/Cetyl/Stearyl/Behenyl Dimer Dilinoleate, Helianthus Annuus (Sunflower) Seed Oil, Butylene Glycol, Pentylene Glycol, PEG/PPG/Polybutylene Glycol-8/5/3 Glycerin, PPG-2 Arginine, Stearoyl Inulin, Water, Tocopherol, Lactic Acid"
product_name = "Fancl Mild Cleansing Oil"
_all, red, amber = process_product_ingredients(product_name, input_string)

The doubltful ingredients in fancl mild cleansing oil are peg


In [360]:
input_string = "Caprylic/Capric Triglyceride, Olive Fruit Oil, Ethylhexyl Palmitate, PEG-20 Glyceryl Triisostearate, PEG-12 Isostearate, Bergamot Fruit Oil, Majorana Leaf Oil, Nymphalospermum Oil, Rosemary Leaf Oil, Bitter Orange leaf/branch oil, orange peel oil, saffron flower extract, burdock root extract, tea seed oil, borage seed oil, moringa seed oil, soybean oil, panax ginseng root extract, rhubarb root extract, bucrose extract, carrot root extract, tripoli Dipentaerythrityl hydroxystearate, pentylene glycol, water, isostearic acid, tocopherol, ethanol, BG"
product_name = "three balancing cleasing oil"
_all, red, amber = process_product_ingredients(product_name, input_string)

The doubltful ingredients in three balancing cleasing oil are ethanol


In [361]:
input_string = "Water, butylene glycol, sasa kurilensis (bamboo) water, pentylene glycol, sodium citrate, lithospermum erythrorhizone root extract, sodium hyaluronate, sodium chondroitin sulfate, dipotassium glycyrrhizate, (C/T) seaweed extract, citric acid, sea salt, hydrolyzed soy protein, succinic acid, aluminimum chloride"
product_name = "Haba G-lotion"
_all, red, amber = process_product_ingredients(product_name, input_string)

all ingredients in haba g-lotion are safe


In [362]:
input_string = "AQUA / WATER • PROPANEDIOL • PROPYLENE GLYCOL • PHENOXYETHANOL • SODIUM PCA • SODIUM BENZOATE • ALLANTOIN • CHAMOMILLA RECUTITA FLOWER EXTRACT / MATRICARIA FLOWER EXTRACT • GLUCOSE • CUCUMIS SATIVUS FRUIT EXTRACT / CUCUMBER FRUIT EXTRACT • ALOE BARBADENSIS LEAF JUICE POWDER • CAMPHOR • URTICA DIOICA EXTRACT / NETTLE EXTRACT • EQUISETUM ARVENSE EXTRACT • BETULA ALBA LEAF EXTRACT • CI 19140 / YELLOW 5 • POTASSIUM SORBATE • CI 14700 / RED 4 • CI 42090 / BLUE 1 • SODIUM HYDROXIDE • CITRIC ACID"
product_name = "kiehl's Cucumber Herbal Alcohol-Free Toner"
_all, red, amber = process_product_ingredients(product_name, input_string)

The harmful ingredients in kiehl's cucumber herbal alcohol-free toner are phenoxyethanol


In [363]:
product_name = "bioeffect EGF Serum"
input_string = "GLYCERIN, WATER (AQUA), SODIUM HYALURONATE, TROMETHAMINE, SODIUM CHLORIDE, BARLEY (HORDEUM VULGARE) SEED EXTRACT, EGF (BARLEY SH-OLIGOPEPTIDE-1)"
_all, red, amber = process_product_ingredients(product_name, input_string)

all ingredients in bioeffect egf serum are safe


In [364]:
product_name = "bioeffect hydrating cream"
input_string = "WATER (AQUA), CAPRYLIC/CAPRIC TRIGLYCERIDE, BUTYLENE GLYCOL, C12-20 ACID PEG-8 ESTER, CETYL ALCOHOL, DL-ALPHA TOCOPHEROL, PHENOXYETHANOL, SODIUM HYALURONATE, ETHYLHEXYLGLYCERIN, CARBOMER, SORBITAN OLEATE, POTASSIUM SORBATE, POTASSIUM HYDROXIDE, CITRIC ACID, BARLEY (HORDEUM VULGARE) SEED EXTRACT, EGF (BARLEY SH-OLIGOPEPTIDE-1)"
_all, red, amber = process_product_ingredients(product_name, input_string)

The harmful ingredients in bioeffect hydrating cream are phenoxyethanol


In [365]:
input_string = "Water/Aqua, Glycerin, Cetyl-PG Hydroxyethyl Palmitamide, Dimethicone, Neopentyl Glycol Dicaprate, Squalane, Polysorbate 60, Butylene Glycol, Sorbitan Stearate, Cholesterol, Cholesteryl Isostearate, Allantoin, Cetyl Alcohol, Sodium Methyl Stearoyl Taurate, Stearyl Alcohol, Succinic Acid, Bis-Methoxypropylamido Isodocosane, Eucalyptus Globulus Leaf Extract, Methylparaben"
product_name = "Curél Moisture Facial Milk 120ml"
_all, red, amber = process_product_ingredients(product_name, input_string)

all ingredients in curél moisture facial milk 120ml are safe


In [366]:
input_string = "Aqua/Water/Eau, Glycerin, Caprylic/Capric Triglyceride, Glyceryl Stearate SE, Isononyl Isononanoate, Dicaprylyl Carbonate, Dimethicone, Triticum Vulgare (Wheat) Germ Oil, Butyrospermum Parkii (Shea) Butter, Chlorella Vulgaris Extract, Padina Pavonica Thallus Extract, Daucus Carota Sativa (Carrot) Root Extract, Porphyridium Cruentum Extract, Acacia Decurrens (Mimosa) Flower Extract, Rosa Centifolia (Rose) Flower Extract, Ginkgo Biloba Leaf Extract, Tocopherol, Phenoxyethanol, Polyacrylate-13, Stearic Acid, Tocopheryl Acetate (Vitamin E), Coco-Caprylate, Cetyl Alcohol, Xanthan Gum, Glyceryl Polyacrylate, Polyisobutene, Fragrance (Parfum), Citric Acid, Chlorphenesin, Glyceryl Acrylate/Acrylic Acid Copolymer, Sodium Dehydroacetate, Disodium EDTA, Polysorbate 20, Sorbitan Isostearate, Linalool, Citronellol, Potassium Sorbate, Sodium Benzoate, Geraniol, Limonene"
product_name = "Elemis Pro-Collagen Marine Cream"
_all, red, amber = process_product_ingredients(product_name, input_string)

The harmful ingredients in elemis pro-collagen marine cream are phenoxyethanol
The doubltful ingredients in elemis pro-collagen marine cream are fragrance (parfum),fragrance


In [367]:
input_string = "talc, sodium cocoyl isecthionate, olefin (C14-16), sodium sulfonate, sodium laururic glutamate, potassium lauric acid, myristoyl glutamate, caraginan, silk, methicone, isosteryl alcohol, BHT, DPG, ethylglucoside, potassium hydroxide, protease. Sodium phosphate, lye Paze, Methylparaben"
product_name = "suisai Beauty Clear Powder Wash/Enzyme Facial Cleansing Powder"
_all, red, amber = process_product_ingredients(product_name, input_string)

all ingredients in suisai beauty clear powder wash/enzyme facial cleansing powder are safe


In [369]:
input_string = "talc, maltitol, sodium cocoyl isecthionate, sodium mirystyl glutamate, sodium lauraul glutamate, olefin (C14-16), sodium sulfonate, potassium laurate, caraginan, czophylane root extract, squarane, citric acid, sodium hyaluronic acid, sunflower seed oil, abocado oil, bt, bt, de, bt, da. PG, Silk, E. Chilglucoside, Proteaze, Lipase, Sodium Benzoate"
product_name = "suisai Beauty Clear gold Powder Wash"
_all, red, amber = process_product_ingredients(product_name, input_string)

all ingredients in suisai beauty clear gold powder wash are safe


In [368]:
input_string = "Water, cetearyl ethylhexanoate, cetearyl alcohol, caprylic/capric triglyceride, sesamum indicum (sesame) seed oil, butyrospermum parkii (shea) butter, myristyl myristate, glycerin, cetearyl glucoside, ribes nigrum (black currant) seed oil, helianthus annuus (sunflower) seed oil, sodium stearoyl glutamate, oryzanol, bisabolol, myristyl laurate, hippophae rhamnoides fruit extract, calendula officinalis flower extract, tocopherol, xanthan gum, phenoxyethanol, citrus nobilis (mandarin orange) peel oil, anthemis nobilis (chamomile) flower oil, sodium dehydroacetate, cinnamomum camphora linalloliferum (ho wood) leaf oil, graveolens flower oil, foeniculum vulgare (fennel) fruit extract, ethylhexylglycerin, glucose, fragrance*, rosmarinus officinalis (rosemary) leaf extract, citric acid, citronellol, limonene, linalool."
product_name = "Ren Skincare EverCalm Gentle Cleansing Milk 150ml"
_all, red, amber = process_product_ingredients(product_name, input_string)

The harmful ingredients in ren skincare evercalm gentle cleansing milk 150ml are phenoxyethanol
The doubltful ingredients in ren skincare evercalm gentle cleansing milk 150ml are fragrance


In [370]:
input_string = "Water, glycerin, myristic acid, potassium hydroxide, cocamide DEA, palmitic acid, stearic acid, beeswax, cocamidopropyl betaine, PEG-150 distearate, glycol distearate, pentylene glycol, squalane, silk, dipotassium glycyrrhizinate, carbonic acid. Hydrogen Na"
product_name = "Haba Facial Foam"
_all, red, amber = process_product_ingredients(product_name, input_string)

all ingredients in haba facial foam are safe


In [371]:
input_string = "Water, oryza sativa (rice) extract(30 %), dibutyl adipate, propanediol, diethylamino hydroxybenzoyl hexyl benzoate, polymethylsilsesquioxane, ethylhexyl triazone, niacinamide, methylene bis-benzotriazolyl tetramethyl butylphenol, coco-caprylate/caprate, caprylyl methicone, diethylhexyl butamido triazone, glycerin, butylene glycol, oryza sativa (r ice) germ extract(1,000 ppm), camellia sinensis leaf extract, lactobacillus/pumpkin ferment extract, bacillus/soybean ferment extract, saccharum officinarum (sugarcane) extract, macrocystis pyrifera (kelp) extract, cocos nucifera (coconut) fruit extract, panax ginseng root extract, monascus/rice ferment(0.12 ppm), pentylene glycol, behenyl alcohol, poly c 10-30 alkyl acrylate, polyglyceryl-3 methylglucose distearate, decyl glucoside, tromethamine, carbomer, acrylates/c10-30 alkyl acrylate crosspolymer, 1,2-hexanediol, sodium stearoyl glutamate, polyacrylate crosspolymer-6, ethylhexylglycerin, adenosine, xanthan gum, tocopherol, lactobacillus/rice ferment (0,12 ppm), aspergillus ferment, saccharomyces/r ice ferment filtrate(0.12 ppm)"
product_name = "Beauty of Joseon Relief Sun Rice + Probiotics SPF 50+ PA ++++ 50 ml"
_all, red, amber = process_product_ingredients(product_name, input_string)

all ingredients in beauty of joseon relief sun rice + probiotics spf 50+ pa ++++ 50 ml are safe


In [372]:
input_string ="Water, millistinate, glycerin, potassium hydroxide, palmitic acid, stearate, lauric acid, glyceryl stearate, BG, diglycerin, sodium steroyl glutamate, soy milk isoflavone, soy milk fermentation, EDTA-2Na, ethanol, cyclodextrin, diz seed extract, hydroxide seed extract. Chill cellulose, Sodium Polyacrylate, Polyquateranium-51, Lauramid Propyl Betaine"
product_name = "SANA Nameraka Honpo Moist Cleansing Face Wash"
_all, red, amber = process_product_ingredients(product_name, input_string)

The doubltful ingredients in sana nameraka honpo moist cleansing face wash are ethanol


In [270]:
input_list = re.split(r'[,･.·]', input_string)
if len(input_list) <= 1:
    print('Cannot Recognise the Split Symbol in the Ingredients List')
# remove empty strings
input_list = {s for s in input_list if any(c.isalpha() for c in s)}
# remove the space or special character fron each string
# convert the components to lower characters
#input_list = {remove_special_chars_except_alnum_parentheses(x).lower() for x in input_list}
# # filter components with ()
# # for components with (), 
# ## seperate the components into 2 equivalent components - part 1: deal with anything in front of () + the rest
# ## - part 2: content in () + the rest 
# inputs_with_parentheses = filter_strings_with_parentheses(input_list)
# # add the parentheses splitted inputs into the input_list.
# for input_with_parentheses in inputs_with_parentheses:
#     part1, part2 = split_string_with_parentheses(input_with_parentheses)
#     input_list.update({part1, part2})
# # filter components with /
# # for components with /, 
# ## seperate the components into parts that seperated by /
# inputs_with_slash = filter_strings_with_slash(input_list)
# # add the parentheses splitted inputs into the input_list.
# for input_with_slash in inputs_with_slash:
#     parts = split_string_with_slash(input_with_slash)
#     input_list.update(set(parts))  
print(len(input_list))
input_list

24


{'  MICA ',
 ' ALUMINA ',
 ' ASCOPHYLLUM NODOSUM EXTRACT ',
 ' BARIUM SULFATE ',
 ' CHLORPHENESIN ',
 ' CITRIC ACID ',
 ' DIMETHICONE ',
 ' ETHYLHEXYLGLYCERIN ',
 ' GLYCERIN ',
 ' IRON OXIDES (CI 77491) ',
 ' IRON OXIDES (CI 77492) ',
 ' IRON OXIDES (CI 77499)',
 ' LAUROYL LYSINE ',
 ' MAGNESIUM SILICATE ',
 ' METHYL METHACRYLATE CROSSPOLYMER ',
 ' PHENOXYETHANOL ',
 ' POLYSILICONE-2 ',
 ' POTASSIUM SORBATE ',
 ' SEA WATER/MARIS AQUA/EAU DE MER ',
 ' TITANIUM DIOXIDE (CI 77891) ',
 ' TOCOPHERYL ACETATE ',
 ' TRIMETHYLSILOXYSILICATE ',
 ' WATER/AQUA/EAU ',
 'SILICA '}

In [None]:
safe_products

In [202]:
green_list

{'(hydroxyethyl acrylate/sodium acryloyldimethyltaurate) copolymer',
 '4-dicarboxylate',
 'acetic acid',
 'acrylates copolymer',
 'agar',
 'alcohol',
 'aqua',
 'asparagopsis armata extract',
 'asparagopsis armata extract (elaspro)',
 'behenyl alcohol',
 'bg',
 'bisethoxydiglycol cyclohexane-1',
 'butylene glyvol',
 'caprooil tetrapeptide-3',
 'caprylate/caprate/myristate/stearate',
 'caprylic/capric triglyceride',
 'cholesteryl stearate',
 'citric acid',
 'coix lacryma-jobi (job’s tears) seed extract',
 'coix lacryma-jobi seed extract',
 'collagen',
 'collaplus',
 'copolymer',
 'dextran',
 'dimethicone',
 'dipotassium glycyrrhizate',
 'dipropylene glycol',
 'edta-2na',
 'elasglow',
 'elaspro',
 'erasrich',
 'ethanol',
 'gagome extract',
 'gagome extract (erasrich)',
 'gluconic acid',
 'glycerin',
 'glyceryl stearate',
 'glyceryl tri',
 'glyceryl tri(caprylate/caprate/myristate/stearate)',
 'hydrogenated lecithin',
 'hydrolyzed collagen',
 'hydrolyzed elastin',
 'hydrolyzed soybean prot

In [187]:
red = input_list.intersection(red_list)
amber = input_list.intersection(amber_list)
print(red)
print(amber)
input_list.difference_update(red_list)
input_list.difference_update(amber_list)
green_list.update(input_list)

set()
set()


In [166]:
inputs_with_slash = filter_strings_with_slash(input_list)
print(inputs_with_slash)
# add the parentheses splitted inputs into the input_list.
for input_with_slash in inputs_with_slash:
    parts = split_string_with_slash(input_with_slash)
    input_list.update(set(parts)) 
input_list

['caprylate/caprate/myristate/stearate', 'WATER(AQUA/EAU)', 'Water/Aqua', 'glyceryl tri(caprylate/caprate/myristate/stearate)', 'hydroxyethyl acrylate/sodium acryloyldimethyltaurate copolymer']


{'AQUA',
 'Aqua',
 'EAU',
 'WATER',
 'WATER(AQUA/EAU)',
 'Water',
 'Water/Aqua',
 'caprate',
 'caprylate',
 'caprylate/caprate/myristate/stearate',
 'glyceryl tri',
 'glyceryl tri(caprylate/caprate/myristate/stearate)',
 'hydroxyethyl acrylate',
 'hydroxyethyl acrylate/sodium acryloyldimethyltaurate copolymer',
 'myristate',
 'sodium acryloyldimethyltaurate copolymer',
 'stearate'}

In [152]:
input_list = {
    "WATER(AQUA/EAU)",
    "Water/Aqua",
    "hydroxyethyl acrylate/sodium acryloyldimethyltaurate copolymer",
    "glyceryl tri(caprylate/caprate/myristate/stearate)"}

In [156]:
input_list

{'WATER(AQUA/EAU)',
 'Water/Aqua',
 'caprylate/caprate/myristate/stearate',
 'glyceryl tri',
 'glyceryl tri(caprylate/caprate/myristate/stearate)',
 'hydroxyethyl acrylate/sodium acryloyldimethyltaurate copolymer'}

In [116]:
input_string = "Water, glycerin, squalane, BG, caprylic/capric triglyceride, behenyl alcohol, shea butter, polyglyceryl-10 stearate, dimethicone, glyceryl stearate, (hydroxyethyl acrylate/sodium acryloyldimethyltaurate) copolymer, peony root Extract, Caprooil Tetrapeptide-3, Collagen, Majorana Leaf Extract, Maltodextrin, Tetrapeptide-5 (Elasglow), Gagome Extract (Erasrich), Asparagopsis Armata Extract (Elaspro), Hydrolyzed Soybean Protein (Collaplus) , hydrolyzed elastin, hydrolyzed collagen, gluconic acid, bisethoxydiglycol cyclohexane-1,4-dicarboxylate, hydrogenated lecithin, glyceryl tri(caprylate/caprate/myristate/stearate), cholesteryl stearate, acetic acid. Tocopherol, agar, pullulan, PVP, dextran, ethanol, polysorbate 60, phenoxyethanol, EDTA-2Na, sodium benzoate, fragrance"

In [34]:
input_string = "Aqua (Water), Prunus Amygdalus Dulcis (Sweet Almond) Oil, Cetearyl Glucoside, Simmondsia, Chinensis (Jojoba) Seed Oil ∆, Cera Alba (Beeswax), Glycerin, Butyrospermum Parkii (Shea) Butter, Triticum Vulgare (Wheat) Germ Oil, Calendula Officinalis Flower Oil∆, Squalane, Persea Gratissima (Avocado) Oil∆, Dehydroacetic acid & Benzyl alcohol, Xanthan Gum, Sodium lactate, Hippophae Rhamnoides Fruit Oil∆, Aniba Rosaeodora (Rosewood) Wood Oil, Citrus Medica Limonum (Lemon) Oil∆, Commiphora Myrrha Oil∆, Citrus Aurantifolia (Lime) Oil∆, Calophyllum Inophyllum Seed Oil∆, Matricaria Discoidea Flower/Leaf/Stem Extract*, Anthyllis Vul- neraria Flower Extract*, Capsella Bursa-Pastoris Extract*, Cetraria Islandica Extract*, Euphrasia Officinalis Extract, Lavandula Angustifolia (Lavender) Flower Extract, Matricaria Maritima Extract*, Stellaria Media (Chickweed) Extract*, Symphytum Officinale Leaf Extract, Trifolium Pratense (Clover) Flower Extract*, Viola Tricolor Extract*, Lactic Acid, Benzyl Benzoate**, Citral**, Geraniol**, Limonene**, Linalool**."

In [89]:
input_string = "WATER(AQUA/EAU)･PENTAERYTHRITYL TETRAETHYLHEXANOATE･SQUALANE･BUTYLENE GLYCOL･GLYCERIN･DIPROPYLENE GLYCOL･BEHENYL ALCOHOL･DIMETHICONE･DIPHENYLSILOXY PHENYL TRIMETHICONE･MYRISTYL MYRISTATE･POTASSIUM METHOXYSALICYLATE･HYDROGENATED POLYISOBUTENE･STEARYL ALCOHOL･BEHENETH-20･PEG-450･PHENOXYETHANOL･HYDROGENATED PALM OIL･DIMETHICONE/PHENYL VINYL DIMETHICONE CROSSPOLYMER･ELAEIS GUINEENSIS (PALM) KERNEL OIL･POLYVINYL ALCOHOL･DIMETHYLACRYLAMIDE/SODIUM ACRYLOYLDIMETHYLTAURATE CROSSPOLYMER･ELAEIS GUINEENSIS (PALM) OIL･FRAGRANCE (PARFUM)･DISODIUM EDTA･TOCOPHERYL ACETATE･XANTHAN GUM･ROSA DAMASCENA FLOWER WATER･RETINYL ACETATE･SODIUM CITRATE･HELIANTHUS ANNUUS (SUNFLOWER) SEED OIL･BHT･ALCOHOL･CAFFEINE･SODIUM METABISULFITE･LAVANDULA ANGUSTIFOLIA (LAVENDER) OIL･CITRIC ACID･SODIUM METAPHOSPHATE･PPG-3 DIPIVALATE･LIMONENE･HEXYL CINNAMAL･TOCOPHEROL･LINALOOL･IRON OXIDES (CI 77492)･CITRONELLOL･SODIUM ACETYLATED HYALURONATE･ANGELICA ACUTILOBA ROOT EXTRACT･ANGELICA KEISKEI LEAF/STEM EXTRACT･IRON OXIDES (CI 77491)･OLEA EUROPAEA (OLIVE) LEAF EXTRACT･SANGUISORBA OFFICINALIS ROOT EXTRACT･LAMIUM ALBUM FLOWER/LEAF/STEM EXTRACT･CAMELLIA SINENSIS LEAF EXTRACT･INOSITOL･CARTHAMUS TINCTORIUS (SAFFLOWER) FLOWER EXTRACT･PINUS SYLVESTRIS CONE EXTRACT･ZIZIPHUS JUJUBA FRUIT EXTRACT･ROSMARINUS OFFICINALIS (ROSEMARY) LEAF EXTRACT (ROSMARINUS OFFICINALIS LEAF EXTRACT)･EUCHEUMA SERRA/GRATELOUPIA SPARSA/SACCHARINA ANGUSTATA/ULVA LINZA/UNDARIA PINNATIFIDA EXTRACT･SACCHARINA ANGUSTATA/UNDARIA PINNATIFIDA EXTRACT･BUPLEURUM FALCATUM ROOT EXTRACT･COIX LACRYMA-JOBI MA-YUEN SEED EXTRACT･CELLULOSE･"

In [70]:
input_string = "Aqua (Water), Glycerin, Cetearyl Alcohol, Caprylyl Caprylate/Caprate, Olus Oil, Lactobacillus Ferment, Butyrospermum, Parkii (Shea) Butter, Helianthus Annus (Sunflower) Seed Wax, Simmondsia Chinesis (Jojoba) Seed Oil, Cetearyl Glucoside, Propanediol, Algae Extract, Cetyl Alcohol, Lactobacillus, Alpha-Glucan Oligosaccharide, Parfum* (Fragrance), Tocopheryl Acetate, Caprylic/Capric Triglyceride Panthenol, Carbomer, Vaccinium Vitas-Idaea (Lingonberry) Seed Oil, Xanthan Gum, Arnica Montana Flower Extract, Camelina Sativa Seed Oil, Cocus Nucifera (Coconut) Fruit Extract, Tocopherol, Magnesium Carboxymethyl Beta-Glucan, Malachite Extract, Albatrellus Ovinus Extract, Laminaria Ochroleuca Extract, Glucose, Phenoxyethanol, Helianthus Annuus (Sunflower) Seed Oil, Citric Acid, Sodium Hydroxide, Rosmarinus Officinalis Leat Extract, Citronellol, Geraniol, Limonene, Linalool *100% Natural Fragrance - Parfum 100% Naturel"

In [73]:
input_string = "WATER(AQUA/EAU)･ALCOHOL･DIPROPYLENE GLYCOL･GLYCERIN･NIACINAMIDE･CYCLOHEXASILOXANE･CETYL ETHYLHEXANOATE･HYDROGENATED POLYDECENE･PHYTOSTERYL/OCTYLDODECYL LAUROYL GLUTAMATE･DIMETHICONE･PPG-3 DIPIVALATE･MYRISTYL MYRISTATE･METHYL METHACRYLATE CROSSPOLYMER･HYDROGENATED PALM OIL･AMMONIUM ACRYLOYLDIMETHYLTAURATE/VP COPOLYMER･BEHENYL ALCOHOL･POLYSORBATE 60･PEG-30 PHYTOSTEROL･PHENOXYETHANOL･DIMETHICONE/VINYL DIMETHICONE CROSSPOLYMER･BATYL ALCOHOL･TOCOPHERYL ACETATE･ERYTHRITOL･PEG/PPG-14/7 DIMETHYL ETHER･PEG/PPG-17/4 DIMETHYL ETHER･BUTYLENE GLYCOL･FRAGRANCE (PARFUM)･CARBOMER･ACRYLATES/C10-30 ALKYL ACRYLATE CROSSPOLYMER･POTASSIUM HYDROXIDE･CAFFEINE･DISODIUM EDTA･SODIUM METAPHOSPHATE･TOCOPHEROL･SAPINDUS MUKOROSSI PEEL EXTRACT･IRON OXIDES (CI 77492)･LINALOOL･SODIUM METABISULFITE･LIMONENE･CITRONELLOL･ANGELICA KEISKEI LEAF/STEM EXTRACT･GERANIOL･CAMELLIA SINENSIS LEAF EXTRACT･CITRUS JUNOS SEED EXTRACT･HDI/TRIMETHYLOL HEXYLLACTONE CROSSPOLYMER･ZIZIPHUS JUJUBA FRUIT EXTRACT･IRON OXIDES (CI 77491)･EUCHEUMA SERRA/GRATELOUPIA SPARSA/SACCHARINA ANGUSTATA/ULVA LINZA/UNDARIA PINNATIFIDA EXTRACT･CURCUMA LONGA (TURMERIC) RHIZOME EXTRACT･SACCHARINA ANGUSTATA/UNDARIA PINNATIFIDA EXTRACT･CHLORELLA VULGARIS EXTRACT･SILICA･"

In [81]:
input_string = "Water/Aqua, Glycerin, Cetyl-PG Hydroxyethyl Palmitamide, Dimethicone, Neopentyl Glycol Dicaprate, Squalane, Polysorbate 60, Butylene Glycol, Sorbitan Stearate, Cholesterol, Cholesteryl Isostearate, Allantoin, Cetyl Alcohol, Sodium Methyl Stearoyl Taurate, Stearyl Alcohol, Succinic Acid, Bis-Methoxypropylamido Isodocosane, Eucalyptus Globulus Leaf Extract, Methylparaben"

In [66]:
input_list = input_string.split("･")

In [67]:
input_list

['WATER(AQUA/EAU)',
 'PENTAERYTHRITYL TETRAETHYLHEXANOATE',
 'SQUALANE',
 'BUTYLENE GLYCOL',
 'GLYCERIN',
 'DIPROPYLENE GLYCOL',
 'BEHENYL ALCOHOL',
 'DIMETHICONE',
 'DIPHENYLSILOXY PHENYL TRIMETHICONE',
 'MYRISTYL MYRISTATE',
 'POTASSIUM METHOXYSALICYLATE',
 'HYDROGENATED POLYISOBUTENE',
 'STEARYL ALCOHOL',
 'BEHENETH-20',
 'PEG-450',
 'PHENOXYETHANOL',
 'HYDROGENATED PALM OIL',
 'DIMETHICONE/PHENYL VINYL DIMETHICONE CROSSPOLYMER',
 'ELAEIS GUINEENSIS (PALM) KERNEL OIL',
 'POLYVINYL ALCOHOL',
 'DIMETHYLACRYLAMIDE/SODIUM ACRYLOYLDIMETHYLTAURATE CROSSPOLYMER',
 'ELAEIS GUINEENSIS (PALM) OIL',
 'FRAGRANCE (PARFUM)',
 'DISODIUM EDTA',
 'TOCOPHERYL ACETATE',
 'XANTHAN GUM',
 'ROSA DAMASCENA FLOWER WATER',
 'RETINYL ACETATE',
 'SODIUM CITRATE',
 'HELIANTHUS ANNUUS (SUNFLOWER) SEED OIL',
 'BHT',
 'ALCOHOL',
 'CAFFEINE',
 'SODIUM METABISULFITE',
 'LAVANDULA ANGUSTIFOLIA (LAVENDER) OIL',
 'CITRIC ACID',
 'SODIUM METAPHOSPHATE',
 'PPG-3 DIPIVALATE',
 'LIMONENE',
 'HEXYL CINNAMAL',
 'TOCOPHEROL'

In [165]:
from thefuzz import fuzz
import re

def remove_special_chars_except_alnum_parentheses(s):
    # Regular expression to match all special characters at the beginning or end of the string
    pattern = r'^[^a-zA-Z0-9(]+|[^a-zA-Z0-9)]+$'
    return re.sub(pattern, '', s)

def filter_strings_with_parentheses(strings_set):
    filtered_strings = [s for s in strings_set if '(' in s and ')' in s]
    return filtered_strings

def split_string_with_parentheses(input_string):
    # Find the index of '(' and ')'
    start_index = input_string.find('(')
    end_index = input_string.find(')')

    if start_index != -1 and end_index != -1 and end_index > start_index:
        first = input_string[:start_index].strip() + input_string[end_index + 1:]
        second = input_string[start_index + 1:end_index] + input_string[end_index + 1:]
        
        return first.strip(), second.strip()
    else:
        return None, None
    
def filter_strings_with_slash(strings_set):
    filtered_strings = [s for s in strings_set if '/' in s]
    return filtered_strings    


def split_string_with_slash(s):
    parts = re.split(r'[/()]', s)
    cleaned_parts = [part.strip() for part in parts if part.strip()]
    return cleaned_parts
    # 3 scenarios: 1 - only one '/'; 2 - more than 1 '/'; 3 - '()' and '/'

In [79]:
inputs_with_parentheses = filter_strings_with_parentheses(input_list)
inputs_with_parentheses
for input_with_parentheses in inputs_with_parentheses:
    part1, part2 = split_string_with_parentheses(input_wiht_parentheses)

['iron oxides (ci 77491)',
 'fragrance (parfum)',
 'curcuma longa (turmeric) rhizome extract',
 'water(aqua/eau)',
 'iron oxides (ci 77492)']

In [62]:
input_string = 'aniba rosaeodora (rosewood) wood oil'
split_string_with_parentheses(input_string)

('aniba rosaeodora wood oil', 'rosewood wood oil')

In [118]:
# convert the long string to list of components
input_list = re.split(r'[,･.]', input_string)
if len(input_list) <= 1:
    print('Cannot Recognise the Split Symbol in the Ingredients List')
# remove empty strings
input_list = {s for s in input_list if any(c.isalpha() for c in s)}
# remove the space or special character fron each string
# convert the components to lower characters
input_list = {remove_special_chars_except_alnum_parentheses(x).lower() for x in input_list}
# filter components with ()
# for components with (), 
## seperate the components into 2 equivalent components - part 1: deal with anything in front of () + the rest
## - part 2: content in () + the rest 
inputs_with_parentheses = filter_strings_with_parentheses(input_list)
inputs_with_parentheses
for input_with_parentheses in inputs_with_parentheses:
    part1, part2 = split_string_with_parentheses(input_with_parentheses)
    input_list.update({part1, part2})
## for each of the seperated words pair, intersect with red and amber list and return intersected words.
input_list

{'(hydroxyethyl acrylate/sodium acryloyldimethyltaurate) copolymer',
 '4-dicarboxylate',
 'acetic acid',
 'agar',
 'asparagopsis armata extract (elaspro)',
 'behenyl alcohol',
 'bg',
 'bisethoxydiglycol cyclohexane-1',
 'caprooil tetrapeptide-3',
 'caprylic/capric triglyceride',
 'cholesteryl stearate',
 'collagen',
 'dextran',
 'dimethicone',
 'edta-2na',
 'ethanol',
 'fragrance',
 'gagome extract (erasrich)',
 'gluconic acid',
 'glycerin',
 'glyceryl stearate',
 'glyceryl tri(caprylate/caprate/myristate/stearate)',
 'hydrogenated lecithin',
 'hydrolyzed collagen',
 'hydrolyzed elastin',
 'hydrolyzed soybean protein (collaplus)',
 'majorana leaf extract',
 'maltodextrin',
 'peony root extract',
 'phenoxyethanol',
 'polyglyceryl-10 stearate',
 'polysorbate 60',
 'pullulan',
 'pvp',
 'shea butter',
 'sodium benzoate',
 'squalane',
 'tetrapeptide-5 (elasglow)',
 'tocopherol',
 'water'}

{'phenoxyethanol'}
{'fragrance'}


In [120]:
input_list

{'(hydroxyethyl acrylate/sodium acryloyldimethyltaurate) copolymer',
 '4-dicarboxylate',
 'acetic acid',
 'agar',
 'asparagopsis armata extract (elaspro)',
 'behenyl alcohol',
 'bg',
 'bisethoxydiglycol cyclohexane-1',
 'caprooil tetrapeptide-3',
 'caprylic/capric triglyceride',
 'cholesteryl stearate',
 'collagen',
 'dextran',
 'dimethicone',
 'edta-2na',
 'ethanol',
 'fragrance',
 'gagome extract (erasrich)',
 'gluconic acid',
 'glycerin',
 'glyceryl stearate',
 'glyceryl tri(caprylate/caprate/myristate/stearate)',
 'hydrogenated lecithin',
 'hydrolyzed collagen',
 'hydrolyzed elastin',
 'hydrolyzed soybean protein (collaplus)',
 'majorana leaf extract',
 'maltodextrin',
 'peony root extract',
 'phenoxyethanol',
 'polyglyceryl-10 stearate',
 'polysorbate 60',
 'pullulan',
 'pvp',
 'shea butter',
 'sodium benzoate',
 'squalane',
 'tetrapeptide-5 (elasglow)',
 'tocopherol',
 'water'}

In [125]:
inputs_with_parentheses = filter_strings_with_parentheses(input_list)
inputs_with_parentheses
for input_with_parentheses in inputs_with_parentheses:
    part1, part2 = split_string_with_parentheses(input_with_parentheses)
    input_list.update({part1, part2})

In [129]:
green_list

{'(hydroxyethyl acrylate/sodium acryloyldimethyltaurate) copolymer',
 '4-dicarboxylate',
 'acetic acid',
 'agar',
 'aqua',
 'asparagopsis armata extract',
 'asparagopsis armata extract (elaspro)',
 'behenyl alcohol',
 'bg',
 'bisethoxydiglycol cyclohexane-1',
 'caprooil tetrapeptide-3',
 'caprylate/caprate/myristate/stearate',
 'caprylic/capric triglyceride',
 'cholesteryl stearate',
 'collagen',
 'collaplus',
 'copolymer',
 'dextran',
 'dimethicone',
 'edta-2na',
 'elasglow',
 'elaspro',
 'erasrich',
 'ethanol',
 'gagome extract',
 'gagome extract (erasrich)',
 'gluconic acid',
 'glycerin',
 'glyceryl stearate',
 'glyceryl tri',
 'glyceryl tri(caprylate/caprate/myristate/stearate)',
 'hydrogenated lecithin',
 'hydrolyzed collagen',
 'hydrolyzed elastin',
 'hydrolyzed soybean protein',
 'hydrolyzed soybean protein (collaplus)',
 'hydroxyethyl acrylate/sodium acryloyldimethyltaurate copolymer',
 'majorana leaf extract',
 'maltodextrin',
 'peony root extract',
 'polyglyceryl-10 stearate'

In [138]:
import re

def split_string_with_slash(s):
    parts = re.split(r'[/()]', s)
    cleaned_parts = [part.strip() for part in parts if part.strip()]
    return cleaned_parts

# Test the function
strings = [
    "WATER(AQUA/EAU)",
    "Water/Aqua",
    "hydroxyethyl acrylate/sodium acryloyldimethyltaurate copolymer",
    "glyceryl tri(caprylate/caprate/myristate/stearate)"
]

input_list = {'Aqua'}

inputs_with_parentheses = filter_strings_with_parentheses(strings)
print(inputs_with_parentheses)
for input_with_parentheses in inputs_with_parentheses:
    part1, part2 = split_string_with_parentheses(input_with_parentheses)
    input_list.update({part1, part2})
    
input_list

['WATER(AQUA/EAU)', 'glyceryl tri(caprylate/caprate/myristate/stearate)']


{'AQUA/EAU',
 'Aqua',
 'WATER',
 'caprylate/caprate/myristate/stearate',
 'glyceryl tri'}

In [141]:
input_string = "glyceryl tri(caprylate/caprate/myristate/stearate)"
split_string_with_parentheses(input_string)

('glyceryl tri', 'caprylate/caprate/myristate/stearate')

In [142]:

for s in strings:
    parts = split_string_with_slash(s)
    print(parts)

['WATER', 'AQUA', 'EAU']
['Water', 'Aqua']
['hydroxyethyl acrylate', 'sodium acryloyldimethyltaurate copolymer']
['glyceryl tri', 'caprylate', 'caprate', 'myristate', 'stearate']
