# Imports

In [41]:
import pandas as pd
from tqdm import notebook
from collections import defaultdict
import numpy as np
from numpy import dot
from numpy.linalg import norm

# Data

In [3]:
inci_prod = pd.read_csv('./data/inci_prod_final.csv')
inci_prod

Unnamed: 0,product name,ingredients,formatted ingredients
0,001-skincare-active-marine-collagen-serum,"['Aqua', 'Aloe Barbadensis Leaf Extract', 'Pol...","['water', 'aloe-barbadensis-leaf-extract', 'po..."
1,001-skincare-alpha-glow-flash-facial-serum-pri...,"['Aqua', 'Rosa Damascena Flower Water', 'Aloe ...","['water', 'rosa-damascena-flower-water', 'aloe..."
2,001-skincare-alpha-paradise-cleansing-dew,"['Prunus Amygdalus Dulcis Oil (Sweet Almond)',...","['prunus-amygdalus-dulcis-oil', 'simmondsia-ch..."
3,001-skincare-amino-acids-lipids-recovery-cream,"['Aqua', 'Helianthus Annuus Seed Oil*', 'Butyr...","['water', 'helianthus-annuus-seed-oil', 'butyr..."
4,001-skincare-beighten-plump-emulsion-clay-mask,"['Aqua', 'Kaolin', 'Glycerin', 'Bentonite', 'P...","['water', 'kaolin', 'glycerin', 'bentonite', '..."
...,...,...,...
76995,zymogen-fermented-soybean-firming-serum,"['Water', 'Butylene Glycol', 'Peg/Ppg-18/4 Cop...","['water', 'butylene-glycol', 'peg-ppg-18-4-cop..."
76996,zymogen-houttuynia-cordata-ferment-mist,"['Water', 'Butylene Glycol', 'Bacillus/Glutami...","['water', 'butylene-glycol', 'bacillus-glutami..."
76997,zymogen-houttuynia-cordata-ferment-serum,"['Water', 'Alcohol', 'Butylene Glycol', 'Sodiu...","['water', 'alcohol', 'butylene-glycol', 'sodiu..."
76998,zymogen-long-lasting-power-sun-cream,"['Water', 'Ethylhexyl Methoxycinnamate', 'Glyc...","['water', 'ethylhexyl-methoxycinnamate', 'glyc..."


In [102]:
ing_effect = pd.read_csv('./data/skca_ing_count_effect.csv')
ing_lst = ing_effect['ingredient'].unique()

# master_df

In [103]:
column_names = ['product']
column_names.extend(ing_lst)
master_df = pd.DataFrame(columns = column_names)
master_df

Unnamed: 0,product,water,glycerin,phenoxyethanol,parfum,butylene-glycol,ethylhexylglycerin,disodium-edta,citric-acid,xanthan-gum,...,stachys-officinalis-flower-leaf-stem,taxillus-chinensis-extract,aquirliria-crassna,croton-lechleri,undecylenoyl-oat-amino-acids,lactobacillus-pueraria-mirifica-root-ferment-filtrate,aspergillus-saccharomyces-pumpkin-seed-ferment-filtrate,saccharomyces-prunus-mume-extract-ferment-filtrate,saccharomyces-day-lily-flower-honey-ferment-filtrate,aspergillus-saccharomyces-glycyrrhiza-uralensis-root-extract-ferment-filtrate


# Modules

In [130]:
def get_cnt_vec(product):
    vector = [product]
    tmp_lst = inci_prod[inci_prod['product name']==product]['formatted ingredients'].values[0]
    prod_ings = []
    for txt in tmp_lst.split("'"):
        if "," not in txt and "[" not in txt and "]" not in txt:
            prod_ings.append(txt)
    
    for ing in ing_lst:
        i = 1 if ing in prod_ings else 0
        vector.append(i)
    return vector[1:]


In [121]:
def get_sqc_inv_vec(product):
    vector = [product]
    prod_ings = get_inglst(product)

    for ing in ing_lst:
        i = 1/(1+prod_ings.index(ing)) if ing in prod_ings else 0
        vector.append(i)
    return vector[1:]

In [106]:
def get_inglst(prod):
    df = inci_prod[inci_prod['product name']==prod]
    inglst = []
    tmp = list(df['formatted ingredients'])
    for txt in tmp[0].split("'"):
        if "," not in txt and "[" not in txt and "]" not in txt:
            inglst.append(txt)
    return inglst

In [107]:
def compare_prods(pname1,pname2):
    '''returns union,interx,unique1,unique2'''
    prd1,prd2 = get_inglst(pname1),get_inglst(pname2)
    
    union = list(set(prd1) | set(prd2))
    interx = list(set(prd1) & set(prd2))
    uq1 = list(set(prd1) - set(prd2))
    uq2 = list(set(prd2) - set(prd1))
    return union, interx, uq1, uq2

In [108]:
def ing_df(inglst):
    df = pd.DataFrame(columns=['ingredient','count','used %','effects','main effect'])
    for ing in inglst:
        tmp_df = ing_effect[ing_effect['ingredient']==ing]
        df = pd.concat([df,tmp_df],ignore_index=True)
    return df

In [109]:
def cos_sim(x,y):
    return round(dot(x, y)/(norm(x)*norm(y)),3)

def euc_dist(x,y):   
    return round(np.sqrt(np.sum((x-y)**2)),3)

In [110]:
def create_link_inci_decoder(prod1,prod2):
    p1 = inci_prod[inci_prod['product name'].str.contains(prod1)]['product name'].values[0]
    p2 = inci_prod[inci_prod['product name'].str.contains(prod2)]['product name'].values[0]
    return f'https://incidecoder.com/compare-products/{p1}/{p2}'

In [111]:
def comparison_summary(prod1,prod2):
    testA = inci_prod[inci_prod['product name'].str.contains(prod1)]['product name'].values[0]
    testB = inci_prod[inci_prod['product name'].str.contains(prod2)]['product name'].values[0]
    cos_sim_cnt = cos_sim(get_cnt_vec(testA),get_cnt_vec(testB))
    cos_sim_sqc = cos_sim(get_sqc_inv_vec(testA),get_sqc_inv_vec(testB))
    # euc_dist_cnt = euc_dist(get_cnt_vec(testA),get_cnt_vec(testB))
    # euc_dist_sqc = euc_dist(get_sqc_inv_vec(testA),get_sqc_inv_vec(testB))
    union,interx,unique1,unique2 = compare_prods(testA,testB)
    print('='*130)
    print('[ Products Ingredients Comparison Summary ]')
    print()
    print('Product A:',testA)
    print('Product B:',testB)
    print(create_link_inci_decoder(testA,testB))
    print()
    print('-'*130)
    print('[ Similarity Measurement ]')
    print('Cosine Similarity - Count-based: ',cos_sim_cnt)
    print('Cosine Similarity - Sequence-based(inverse): ',cos_sim_sqc)
    # print()
    # print('Euclidean Distance - Count-based: ',euc_dist_cnt)
    # print('Euclidean Distance - Sequence-based(inverse): ',euc_dist_sqc)
    print('-'*130)
    print('[ Ingredients Comparison ]')
    print('Total # of Ingredients:',len(union))
    print('Total # of Common Ingredients:',len(interx))
    print('Product A # of Unique:', len(unique1))
    print('Product B # of Unique:', len(unique2))
    print('-'*130)
    print('[ Common Ingredients ]')
    display(ing_df(interx))
    print()
    print('-'*130)
    print(f'[ Unique Ingredients of {testA} ]')
    display(ing_df(unique1))
    print()
    print('-'*130)
    print(f'[ Unique Ingredients of {testB} ]')
    display(ing_df(unique2))
    print('='*130)

# Product Comparison Summary

In [141]:
#comparison_summary('joah-skin-polish-gold-peel-off-mask','7th-heaven-tea-tree-peel-off')
comparison_summary('foreo-serum-serum-serum','aprilskin-20-vitamin-a-c-e-brightening-serum')
#comparison_summary('foreo-serum-serum-serum','awake-beauty-glow-pill-super-serum')

[ Products Ingredients Comparison Summary ]

Product A: foreo-serum-serum-serum
Product B: aprilskin-20-vitamin-a-c-e-brightening-serum
https://incidecoder.com/compare-products/foreo-serum-serum-serum/aprilskin-20-vitamin-a-c-e-brightening-serum

----------------------------------------------------------------------------------------------------------------------------------
[ Similarity Measurement ]
Cosine Similarity - Count-based:  0.303
Cosine Similarity - Sequence-based(inverse):  0.714
----------------------------------------------------------------------------------------------------------------------------------
[ Ingredients Comparison ]
Total # of Ingredients: 91
Total # of Common Ingredients: 16
Product A # of Unique: 29
Product B # of Unique: 46
----------------------------------------------------------------------------------------------------------------------------------
[ Common Ingredients ]


Unnamed: 0,ingredient,count,used %,effects,main effect
0,butylene-glycol,24942,32.39,"['moisturizer/humectant', 'solvent']",moisturizer/humectant
1,1-2-hexanediol,13093,17.0,['solvent'],solvent
2,hyaluronic-acid,3496,4.54,"['skin-identical ingredient', 'moisturizer/hum...",skin-identical ingredient
3,propanediol,12112,15.73,"['solvent', 'moisturizer/humectant']",solvent
4,ethylhexylglycerin,23514,30.54,['preservative'],preservative
5,pentylene-glycol,8601,11.17,"['solvent', 'moisturizer/humectant']",solvent
6,sodium-hyaluronate-crosspolymer,1213,1.58,"['skin-identical ingredient', 'antioxidant', '...",skin-identical ingredient
7,water,67796,88.05,['solvent'],solvent
8,panthenol,15715,20.41,"['soothing', 'moisturizer/humectant']",soothing
9,sodium-hyaluronate,19878,25.82,"['skin-identical ingredient', 'moisturizer/hum...",skin-identical ingredient



----------------------------------------------------------------------------------------------------------------------------------
[ Unique Ingredients of foreo-serum-serum-serum ]


Unnamed: 0,ingredient,count,used %,effects,main effect
0,peg-ppg-17-6-copolymer,469,0.61,"['emollient', 'solvent']",emollient
1,peg-ppg-14-7-dimethyl-ether,187,0.24,[''],
2,glyceryl-polymethacrylate,467,0.61,['viscosity controlling'],viscosity controlling
3,betaine,5850,7.6,['moisturizer/humectant'],moisturizer/humectant
4,serine,1879,2.44,['skin-identical ingredient'],skin-identical ingredient
5,glucose,2331,3.03,['moisturizer/humectant'],moisturizer/humectant
6,glyceryl-glucoside,469,0.61,['moisturizer/humectant'],moisturizer/humectant
7,trehalose,3085,4.01,['moisturizer/humectant'],moisturizer/humectant
8,ci-14700,939,1.22,['colorant'],colorant
9,octyldodeceth-16,225,0.29,['emulsifying'],emulsifying



----------------------------------------------------------------------------------------------------------------------------------
[ Unique Ingredients of aprilskin-20-vitamin-a-c-e-brightening-serum ]


Unnamed: 0,ingredient,count,used %,effects,main effect
0,silica,7919,10.28,['viscosity controlling'],viscosity controlling
1,hemerocallis-fulva-flower-extract,39,0.05,[''],
2,dipotassium-glycyrrhizate,3264,4.24,"['soothing', 'moisturizer/humectant']",soothing
3,sorbitol,3124,4.06,['moisturizer/humectant'],moisturizer/humectant
4,glyceryl-stearate,13648,17.72,"['emollient', 'emulsifying']",emollient
5,cholesterol,2340,3.04,"['skin-identical ingredient', 'emollient']",skin-identical ingredient
6,tocopherol,22333,29.0,['antioxidant'],antioxidant
7,beta-glucan,2167,2.81,"['soothing', 'moisturizer/humectant']",soothing
8,c12-14-pareth-12,326,0.42,"['emulsifying', 'surfactant/cleansing']",emulsifying
9,cellulose-gum,1677,2.18,['viscosity controlling'],viscosity controlling




In [146]:
ing_effect

Unnamed: 0,ingredient,count,used %,effects,main effect
0,water,67796,88.05,['solvent'],solvent
1,glycerin,55954,72.67,"['skin-identical ingredient', 'moisturizer/hum...",skin-identical ingredient
2,phenoxyethanol,36755,47.73,['preservative'],preservative
3,parfum,36094,46.88,['perfuming'],perfuming
4,butylene-glycol,24942,32.39,"['moisturizer/humectant', 'solvent']",moisturizer/humectant
...,...,...,...,...,...
15181,lactobacillus-pueraria-mirifica-root-ferment-f...,1,0.00,[''],
15182,aspergillus-saccharomyces-pumpkin-seed-ferment...,1,0.00,[''],
15183,saccharomyces-prunus-mume-extract-ferment-filt...,1,0.00,['moisturizer/humectant'],moisturizer/humectant
15184,saccharomyces-day-lily-flower-honey-ferment-fi...,1,0.00,[''],
