## Sentiment analysis

In [27]:
# pip install pandas
# pip install -q transformers
# !pip3 install emoji

Collecting emoji
  Using cached emoji-1.7.0-py3-none-any.whl
Installing collected packages: emoji
Successfully installed emoji-1.7.0


In [1]:
import pandas as pd
from transformers import pipeline

In [2]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

In [3]:
sentiment_pipeline = pipeline("sentiment-analysis", model="finiteautomata/bertweet-base-sentiment-analysis", truncation=True)

In [13]:
sep = pd.read_csv('./datasets/sep_combi_final_preprocessed.csv', usecols = ['num_comments','score', 'link_flair_text', 'post', 'covid_onset', 
       'cleaned_text'] ,low_memory=False)
sep.shape

(205594, 6)

In [44]:
sep_sent = sep[(sep['num_comments'] > 13) & (sep['post'] == 'submission')].reset_index(drop=True)
print(sep_sent.shape)
sep_sent.head()

(3764, 6)


Unnamed: 0,num_comments,score,link_flair_text,post,covid_onset,cleaned_text
0,15,4,,submission,no,Trans woman here wanting a Sephora makeover I ...
1,17,3,,submission,no,Does Sephora still give out physical cards for...
2,14,3,,submission,no,VIB Rouge Welcome Gift Availability I became V...
3,14,6,,submission,no,Pressed powder foundation brush recommendation...
4,18,4,,submission,no,Sephora Play Subscription Box Anyone here memb...


In [45]:
sep_sent['cleaned_text'] = sep_sent['cleaned_text'].astype(str)

In [46]:
sep_sent['tokenised_text'] = sep_sent['cleaned_text'].apply(lambda x: tokenizer(x))

In [48]:
sep_sent['hugging_outcome'] = sep_sent['cleaned_text'].apply(lambda x: sentiment_pipeline(x))

# function to unpack out list containing dictionaries or dictionaries
def unpack_cell(df, unpacked_col, new_col, key):
    """
    create new column within dataframe storing what is requested and return a collection of unique dictionaries
    
    Parameters
    ----------
    Parameters to pass as part of num_col_null
    
    df : dataframe
        dataframe containing column.
    unpacked_col : string
        column name in dataframe
    new_col : string
        column name in dataframe
    key : string
        'key' that is being unpacked from dictionary
        
    Return
    ------
    value returned after calling the function
    
    list_col : list
        list containing a collection of unique dictionaries that was unpacked
    
    """

    
    #============
    
    # store all possible dictionaries
    list_col = [] 
    
    # store list to attach to new_col
    cat_list = []
    
    # loop to extract dictionary
    for index, row in tqdm(df[unpacked_col].iteritems(), total=len(df)):
    #    if index > 55:
    #        break

        # create list to store value
        temp_list = []   
        
        # check if row is a list
        if type(row) == list:
            # check if is empty
            if row == []:
                cat_list.append(np.nan)
            else:
                # loop the list within the cell
                for i in range(len(row)):
                    # check if is existing identified value
                    if row[i] not in list_col:
                        # if is not in existing identified value, append
                        list_col.append(row[i])
                    temp_list.append(row[i].get(key, np.nan))
                cat_list.append(temp_list)
        else:
            if row not in list_col:
            # if is not in existing identified value, append
                list_col.append(row)
            cat_list.append(row.get(key, np.nan))
    
    # attach list to new_col in df
    df[new_col] = cat_list
    
    return list_col

In [49]:
pd.set_option('display.max_colwidth', None)
sep_sent.head()

Unnamed: 0,num_comments,score,link_flair_text,post,covid_onset,cleaned_text,tokenised_text,hugging_outcome
0,15,4,,submission,no,Trans woman here wanting a Sephora makeover I am transgender and know so little about makeup I booked an appointment at Sephora for their 50dollar program thing but I am unsure what it entails What I need is a sit down with someone who will show me what works best for my face and what I should buy Is that how it works o,"[input_ids, attention_mask]","[{'label': 'NEU', 'score': 0.895317554473877}]"
1,17,3,,submission,no,Does Sephora still give out physical cards for Rouge I saw a post on MUA about getting the Rouge card and was confused because when I went to my local Sephora a week ago the cashier told me they no longer give out physical cards Is it just no longer available in Canada or is it still possible for me to get the Rouge card somehow,"[input_ids, attention_mask]","[{'label': 'NEU', 'score': 0.908152163028717}]"
2,14,3,,submission,no,VIB Rouge Welcome Gift Availability I became VIB Rouge in November during the VIB sale and received an email telling me I could pick up my welcome gift Nars Goulue blush in stores or in the reward bazaar online It has never appeared in my rewards bazaar and I am rarely in a physical Sephora store Emailed Sephora in November and earlier today both times they told me both times it would be in stock soon Has anyone ever received it Is the rouge gift a Sephora urban myth,"[input_ids, attention_mask]","[{'label': 'NEU', 'score': 0.7346374988555908}]"
3,14,6,,submission,no,Pressed powder foundation brush recommendations So I was being lazy and blow drying my NARS Hanamachi Kabuki Brush and managed to singe the whole top by drying it too close to the hair dryer And I do not think anywhere sells it anymore I was wondering if anyone has recommendations for similar small high quality dense brushes that will pick up lots of powder foundation for good coverage without caking Maybe something I can get at Sephora for 50 or less I am hoping to find something similar to It was small and did not have a huge handle so it was good for travel And tips for washingdrying these hella dense brushes Thanks,"[input_ids, attention_mask]","[{'label': 'NEU', 'score': 0.8498795032501221}]"
4,18,4,,submission,no,Sephora Play Subscription Box Anyone here members of their subscription box My boyfriend just signed me up for my birthday and I am super psyched,"[input_ids, attention_mask]","[{'label': 'POS', 'score': 0.9553959965705872}]"


In [50]:
sep_sent['hugging_outcome'][0]

[{'label': 'NEU', 'score': 0.895317554473877}]

In [51]:
# Using list comprehension
# Get values of particular key in list of dictionaries
def get_sentiment(x):
    value =  [sub['label'] for sub in x]
    senti = (value)[0]
    return senti

In [52]:
def get_score(x):
    value =  [sub['score'] for sub in x]
    score = (value)[0]   
    return score

In [53]:
sep_sent['sentiment'] = sep_sent['hugging_outcome'].apply(lambda x: get_sentiment(x))

In [54]:
sep_sent['score'] = sep_sent['hugging_outcome'].apply(lambda x: get_score(x))

In [55]:
sep_sent.tail()

Unnamed: 0,num_comments,score,link_flair_text,post,covid_onset,cleaned_text,tokenised_text,hugging_outcome,sentiment
3759,80,0.890445,Discussion,submission,yes,How many foundations do you own I have so many foundations that I want to try How many do you own I have 2 already and do not use them every day so I am afraid they will spoil before I get to use them,"[input_ids, attention_mask]","[{'label': 'NEU', 'score': 0.8904446959495544}]",NEU
3760,81,0.502281,PSA,submission,yes,4X Points on Entire Order when you purchase any Sephora Collection item,"[input_ids, attention_mask]","[{'label': 'NEU', 'score': 0.5022813081741333}]",NEU
3761,20,0.981611,CANADA,submission,yes,You Are telling me I wasted 100 points on THIS Sephora be out here scamming woman facepalming mediumlight skin tone,"[input_ids, attention_mask]","[{'label': 'NEG', 'score': 0.9816107749938965}]",NEG
3762,19,0.879546,PSA,submission,yes,FYI the Points Multiplier only works on Sephora Collection items,"[input_ids, attention_mask]","[{'label': 'NEU', 'score': 0.8795461654663086}]",NEU
3763,18,0.967482,Question,submission,yes,Best skin like matte foundation that is actually long lasting I Have usually usually the long lasting foundations are super matte and heavy And the skin like foundations fade really quickly What Is the perfect foundation that combines both,"[input_ids, attention_mask]","[{'label': 'POS', 'score': 0.967482328414917}]",POS


In [57]:
pd.DataFrame(sep_sent).to_csv('datasets/sep_sentiment_analysis_bigger.csv', index=False)