## 1. Import Review Data

In [4]:
import pandas as pd
df = pd.read_csv('review.csv')
df.head()

Unnamed: 0,Review,Verified Purchase,Month,Year,Brand,SPF,Oz,Retailer
0,Reliable clear sunscreen that goes great under...,1,1,2022,Supergoop!,40,1.7,Dermstore
1,Yes. Just yes.HG status for me and I never say...,1,1,2021,Supergoop!,40,1.7,Dermstore
2,The GOAT of Daily SPF!Having dry skin all year...,1,1,2021,Supergoop!,40,1.7,Dermstore
3,Good sunscreenGood lightweight sunscreen so I ...,1,1,2021,Supergoop!,40,1.7,Dermstore
4,The only sunscreen I wearI feel like I've trie...,1,1,2021,Supergoop!,40,1.7,Dermstore


## 2. Explore Data

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6067 entries, 0 to 6066
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Review             6065 non-null   object 
 1   Verified Purchase  6067 non-null   int64  
 2   Month              6067 non-null   int64  
 3   Year               6067 non-null   int64  
 4   Brand              6067 non-null   object 
 5   SPF                6067 non-null   int64  
 6   Oz                 6067 non-null   float64
 7   Retailer           6067 non-null   object 
dtypes: float64(1), int64(4), object(3)
memory usage: 379.3+ KB


In [6]:
# Drop Null Review
df_review = df.dropna()

# Drop Duplicate Review
df_review = df_review.drop_duplicates(['Review'])

df_review.info()
df_review.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6042 entries, 0 to 6066
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Review             6042 non-null   object 
 1   Verified Purchase  6042 non-null   int64  
 2   Month              6042 non-null   int64  
 3   Year               6042 non-null   int64  
 4   Brand              6042 non-null   object 
 5   SPF                6042 non-null   int64  
 6   Oz                 6042 non-null   float64
 7   Retailer           6042 non-null   object 
dtypes: float64(1), int64(4), object(3)
memory usage: 424.8+ KB


Unnamed: 0,Review,Verified Purchase,Month,Year,Brand,SPF,Oz,Retailer
0,Reliable clear sunscreen that goes great under...,1,1,2022,Supergoop!,40,1.7,Dermstore
1,Yes. Just yes.HG status for me and I never say...,1,1,2021,Supergoop!,40,1.7,Dermstore
2,The GOAT of Daily SPF!Having dry skin all year...,1,1,2021,Supergoop!,40,1.7,Dermstore
3,Good sunscreenGood lightweight sunscreen so I ...,1,1,2021,Supergoop!,40,1.7,Dermstore
4,The only sunscreen I wearI feel like I've trie...,1,1,2021,Supergoop!,40,1.7,Dermstore


In [4]:
df['Brand'].value_counts()

Elta MD       3557
Supergoop!    2510
Name: Brand, dtype: int64

In [5]:
df['Retailer'].value_counts()

Dermstore    2506
Amazon       2051
Sephora      1510
Name: Retailer, dtype: int64

# Count the numbers of reviews per brand
cnt_review = df_review['Brand'].value_counts()
cnt_review

In [8]:
# import SentimentIntensityAnalyzer class
# from vaderSentiment.vaderSentiment module.
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
 
# function to print sentiments
# of the sentence.
def sentiment_scores(sentence):
 
    # Create a SentimentIntensityAnalyzer object.
    sid_obj = SentimentIntensityAnalyzer()
 
    # polarity_scores method of SentimentIntensityAnalyzer
    # object gives a sentiment dictionary.
    # which contains pos, neg, neu, and compound scores.
    sentiment_dict = sid_obj.polarity_scores(sentence)
     
    print("Overall sentiment dictionary is : ", sentiment_dict)
    print("sentence was rated as ", sentiment_dict['neg']*100, "% Negative")
    print("sentence was rated as ", sentiment_dict['neu']*100, "% Neutral")
    print("sentence was rated as ", sentiment_dict['pos']*100, "% Positive")
 
    print("Sentence Overall Rated As", end = " ")
 
    # decide sentiment as positive, negative and neutral
    if sentiment_dict['compound'] >= 0.05 :
        print("Positive")
 
    elif sentiment_dict['compound'] <= - 0.05 :
        print("Negative")
 
    else :
        print("Neutral")
 
 
   
# Driver code
if __name__ == "__main__" :
 
    print("\n1st statement :")
    sentence = "Geeks For Geeks is the best portal for \
                the computer science engineering students."
 
    # function calling
    sentiment_scores(sentence)
 
    print("\n2nd Statement :")
    sentence = "study is going on as usual"
    sentiment_scores(sentence)
 
    print("\n3rd Statement :")
    sentence = "I am very sad today."
    sentiment_scores(sentence)


1st statement :
Overall sentiment dictionary is :  {'neg': 0.165, 'neu': 0.588, 'pos': 0.247, 'compound': 0.5267}
sentence was rated as  16.5 % Negative
sentence was rated as  58.8 % Neutral
sentence was rated as  24.7 % Positive
Sentence Overall Rated As Positive

2nd Statement :
Overall sentiment dictionary is :  {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
sentence was rated as  0.0 % Negative
sentence was rated as  100.0 % Neutral
sentence was rated as  0.0 % Positive
Sentence Overall Rated As Neutral

3rd Statement :
Overall sentiment dictionary is :  {'neg': 0.459, 'neu': 0.541, 'pos': 0.0, 'compound': -0.5256}
sentence was rated as  45.9 % Negative
sentence was rated as  54.1 % Neutral
sentence was rated as  0.0 % Positive
Sentence Overall Rated As Negative


In [9]:
sentiment_scores(df_review['Review'][(df['Retailer'] == 'Amazon') & (df['Brand'] == 'Elta MD')])

Overall sentiment dictionary is :  {'neg': 0.066, 'neu': 0.76, 'pos': 0.173, 'compound': 1.0}
sentence was rated as  6.6000000000000005 % Negative
sentence was rated as  76.0 % Neutral
sentence was rated as  17.299999999999997 % Positive
Sentence Overall Rated As Positive


In [10]:
sentiment_scores(df_review['Review'][(df['Retailer'] == 'Amazon') & (df['Brand'] == 'Supergoop!')])

Overall sentiment dictionary is :  {'neg': 0.057, 'neu': 0.742, 'pos': 0.201, 'compound': 1.0}
sentence was rated as  5.7 % Negative
sentence was rated as  74.2 % Neutral
sentence was rated as  20.1 % Positive
Sentence Overall Rated As Positive


In [11]:
sentiment_scores(df_review['Review'][(df['Retailer'] == 'Sephora') & (df['Brand'] == 'Elta MD')])

Overall sentiment dictionary is :  {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compound': 0.0}
sentence was rated as  0.0 % Negative
sentence was rated as  0.0 % Neutral
sentence was rated as  0.0 % Positive
Sentence Overall Rated As Neutral


In [12]:
sentiment_scores(df_review['Review'][(df['Retailer'] == 'Sephora') & (df['Brand'] == 'Supergoop!')])

Overall sentiment dictionary is :  {'neg': 0.052, 'neu': 0.739, 'pos': 0.209, 'compound': 1.0}
sentence was rated as  5.2 % Negative
sentence was rated as  73.9 % Neutral
sentence was rated as  20.9 % Positive
Sentence Overall Rated As Positive


In [13]:
sentiment_scores(df_review['Review'][(df['Retailer'] == 'Dermstore') & (df['Brand'] == 'Elta MD')])

Overall sentiment dictionary is :  {'neg': 0.044, 'neu': 0.738, 'pos': 0.219, 'compound': 1.0}
sentence was rated as  4.3999999999999995 % Negative
sentence was rated as  73.8 % Neutral
sentence was rated as  21.9 % Positive
Sentence Overall Rated As Positive


In [14]:
sentiment_scores(df_review['Review'][(df['Retailer'] == 'Dermstore') & (df['Brand'] == 'Supergoop!')])

Overall sentiment dictionary is :  {'neg': 0.053, 'neu': 0.717, 'pos': 0.23, 'compound': 1.0}
sentence was rated as  5.3 % Negative
sentence was rated as  71.7 % Neutral
sentence was rated as  23.0 % Positive
Sentence Overall Rated As Positive


In [15]:
sentiment_scores(df_review['Review'][df['Brand'] == 'Elta MD'])

Overall sentiment dictionary is :  {'neg': 0.051, 'neu': 0.745, 'pos': 0.203, 'compound': 1.0}
sentence was rated as  5.1 % Negative
sentence was rated as  74.5 % Neutral
sentence was rated as  20.3 % Positive
Sentence Overall Rated As Positive


In [16]:
sentiment_scores(df_review['Review'][df['Brand'] == 'Supergoop!'])

Overall sentiment dictionary is :  {'neg': 0.054, 'neu': 0.737, 'pos': 0.209, 'compound': 1.0}
sentence was rated as  5.4 % Negative
sentence was rated as  73.7 % Neutral
sentence was rated as  20.9 % Positive
Sentence Overall Rated As Positive


## Key Words per Brand and Merchant

In [2]:
def BERT(title):
    
    array_text = pd.DataFrame(df[df['Brand'] == title]['Review']).to_numpy()
    
    bow = []
    from keybert import KeyBERT
    kw_extractor = KeyBERT('distilbert-base-nli-mean-tokens')
    for j in range(len(array_text)):
        keywords = kw_extractor.extract_keywords(array_text[j][0])
        bow.append(keywords)
        
    new_bow = []
    for i in range(0, len(bow)):
        for j in range(len(bow[i])):
            new_bow.append(bow[i][j])
            
    keyword = pd.DataFrame(new_bow, columns=['keyword','weight'])
    print(keyword.groupby('keyword').agg('sum').sort_values('weight', ascending=False).head(20))

In [18]:
BERT('Elta MD')

Downloading:   0%|          | 0.00/690 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/3.99k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/550 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/122 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/229 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/265M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/450 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

                 weight
keyword                
sunscreen      896.8877
love           151.9620
sunscreens     149.6481
sunscreeni     100.8923
favorite        98.7953
white           88.4982
sun             79.2322
oily            78.3739
sunscreenthis   73.0215
dermatologist   72.7848
moisturizer     68.5683
best            62.9479
light           61.3085
great           58.7730
skin            50.8567
daily           47.8090
moisturizing    41.5258
acne            40.4357
easy            40.2609
makeup          39.3623


In [8]:
df=df.dropna()

In [9]:
BERT('Supergoop!')

              weight
keyword             
sunscreen   624.7785
love        142.3952
sunscreens   99.5966
oily         86.7740
helpful      80.2727
makeup       79.8932
smooth       67.4052
white        53.7297
sun          49.0846
light        48.5773
favorite     41.0710
greasy       40.3145
great        39.3816
skin         36.2673
best         30.5023
primer       29.8325
silicone     26.3827
product      25.2256
perfect      24.8459
clear        24.5414
