## Sentiment Analysis on Instagram comments


In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import warnings
warnings.filterwarnings("ignore")

import glob
import pandas as pd
import ast
import json
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import nltk
from nltk.stem import WordNetLemmatizer 
import matplotlib.pyplot as plt

In [None]:
# function to print sentiments
# of the sentence.
def sentiment_scores(sentence):
# Create a SentimentIntensityAnalyzer object.
    sid_obj = SentimentIntensityAnalyzer()
# polarity_scores method of SentimentIntensityAnalyzer
# oject gives a sentiment dictionary.
# which contains pos, neg, neu, and compound scores.
    sentiment_dict = sid_obj.polarity_scores(sentence)

    if sentiment_dict['compound'] >= 0.05 :
        result = "Positive"
    elif sentiment_dict['compound'] <= - 0.05 :
        result = "Negative"
    else :
        result = "Neutral"
    
#     print(result)
    return result

## Sentiment Analysis on comments of posts by santacruzbicycles

### Reading json file from scraped datasets

In [None]:
import glob

comment_files = []
for name in glob.glob('santacruzbicycles/*comments.json'):
    print(name)
    comment_files.append(name)

### Sentiment Analysis on comments of each post per account

In [None]:
comment_results = pd.DataFrame(columns=['file_name','post_id','comment',
                                        'Negative', 'Neutral', 'Positive', 'Compound', 'Sentiment'])


for i in range(len(comment_files)):
    # Opening JSON file
    f = open(comment_files[i])
 
    # returns JSON object as
    # a dictionary
    data = json.load(f)
    file_name = comment_files[i].split('/')[1] # get the json file name to check 
    print("json filename:", file_name)
    
    # Closing file
    f.close()
    
    for comment in data:
        og_sentence = comment['text']
        
        filter(lambda x:x[0]!='@', og_sentence.split())
        sentence = " ".join(filter(lambda x:x[0]!='@', og_sentence.split()))
        
        print("original comment: " + og_sentence)
        print("cleaned comment: " + sentence)
        
        sentiment_dict = SentimentIntensityAnalyzer().polarity_scores(sentence)
        comment_results = comment_results.append({'file_name':file_name,
                                                'post_id': comment['id'], 
                                                'comment': og_sentence,
                                                'Negative': sentiment_dict['neg'],
                                                'Neutral':  sentiment_dict['neu'],
                                                'Positive': sentiment_dict['pos'],
                                                'Compound': sentiment_dict['compound'],
                                                'Sentiment': sentiment_scores(sentence)}, ignore_index=True)
        print("Overall sentiment dictionary is : ", sentiment_dict)
        print("sentence was rated as ", sentiment_dict['neg']*100, "% Negative")
        print("sentence was rated as ", sentiment_dict['neu']*100, "% Neutral")
        print("sentence was rated as ", sentiment_dict['pos']*100, "% Positive")
        print("Sentence Overall Rated As", sentiment_scores(sentence))
        print()

In [None]:
comment_results

In [None]:
comment_results.to_csv('santacruzbicycles_sentiments.csv')

In [None]:
santacruzbicycles_sentiments = pd.read_csv('santacruzbicycles_sentiments.csv')
del santacruzbicycles_sentiments['Unnamed: 0']
santacruzbicycles_sentiments

In [None]:
santacruzbicycles_sentiments['date'] = santacruzbicycles_sentiments['file_name'].apply(lambda x: x[:10])
santacruzbicycles_sentiments.head()

In [None]:
santacruzbicycles_sentiments.tail()

In [None]:
santacruzbicycles_date = santacruzbicycles_sentiments.groupby('date').mean('Compound')
santacruzbicycles_date = santacruzbicycles_date.drop(['post_id'], axis=1)
santacruzbicycles_date.reset_index(inplace=True)
santacruzbicycles_date = santacruzbicycles_date.rename(columns = {'index':'date'})
santacruzbicycles_date

In [None]:
santacruzbicycles_date.to_csv('santacruzbicycles_date.csv', index=False)

In [None]:
# Python program to generate WordCloud 
lemmatizer = WordNetLemmatizer()

text = " ".join(lemmatizer.lemmatize(com.lower()) for com in santacruzbicycles_sentiments.comment.astype(str))
print ("There are {} words in the combination of all comments in unsprungsg account.".format(len(text)))

# Create stopword list:
# remove words that we want to exclude

stopwords = set(STOPWORDS)
stopwords.update(["santacruzbicycle","santacruzbicycles","santa","cruz","bike"])

# Generate a word cloud image

wordcloud = WordCloud(stopwords=stopwords, background_color="white", width=800, height=400).generate(text)

# Display the generated image:
# the matplotlib way:

plt.axis("off")
plt.figure( figsize=(40,20))
plt.tight_layout(pad=0)
plt.imshow(wordcloud, interpolation='bilinear')
plt.show()

## Sentiment Analysis on comments of posts by unsprungsg

### Reading json file from scraped datasets

In [None]:
import glob

comment_files = []
for name in glob.glob('unsprungsg/*comments.json'):
    print(name)
    comment_files.append(name)

### Sentiment Analysis on comments of each post per account

In [None]:
unsprungsg_results = pd.DataFrame(columns=['file_name','post_id','comment',
                                        'Negative', 'Neutral', 'Positive', 'Compound', 'Sentiment'])


for i in range(len(comment_files)):
    # Opening JSON file
    f = open(comment_files[i])
 
    # returns JSON object as
    # a dictionary
    data = json.load(f)
    file_name = comment_files[i].split('/')[1] # get the json file name to check 
    print("json filename:", file_name)
    
    # Closing file
    f.close()
    
    for comment in data:
        og_sentence = comment['text']
        
        filter(lambda x:x[0]!='@', og_sentence.split())
        sentence = " ".join(filter(lambda x:x[0]!='@', og_sentence.split()))
        
        print("original comment: " + og_sentence)
        print("cleaned comment: " + sentence)
        
        sentiment_dict = SentimentIntensityAnalyzer().polarity_scores(sentence)
        unsprungsg_results = unsprungsg_results.append({'file_name':file_name,
                                                'post_id': comment['id'], 
                                                'comment': og_sentence,
                                                'Negative': sentiment_dict['neg'],
                                                'Neutral':  sentiment_dict['neu'],
                                                'Positive': sentiment_dict['pos'],
                                                'Compound': sentiment_dict['compound'],
                                                'Sentiment': sentiment_scores(sentence)}, ignore_index=True)
        print("Overall sentiment dictionary is : ", sentiment_dict)
        print("sentence was rated as ", sentiment_dict['neg']*100, "% Negative")
        print("sentence was rated as ", sentiment_dict['neu']*100, "% Neutral")
        print("sentence was rated as ", sentiment_dict['pos']*100, "% Positive")
        print("Sentence Overall Rated As", sentiment_scores(sentence))
        print()

In [None]:
unsprungsg_results

In [None]:
unsprungsg_results.to_csv('unsprungsg_sentiments.csv')

In [None]:
unsprungsg_sentiments = pd.read_csv('unsprungsg_sentiments.csv')
del unsprungsg_sentiments['Unnamed: 0']
unsprungsg_sentiments

In [None]:
unsprungsg_sentiments['date'] = unsprungsg_sentiments['file_name'].apply(lambda x: x[:10])
unsprungsg_date = unsprungsg_sentiments.groupby('date').mean('Compound')
unsprungsg_date = unsprungsg_date.drop(['post_id'], axis=1)
unsprungsg_date.reset_index(inplace=True)
unsprungsg_date = unsprungsg_date.rename(columns = {'index':'date'})
unsprungsg_date

In [None]:
unsprungsg_date.to_csv('unsprungsg_date.csv', index=False)

In [None]:
# Python program to generate WordCloud 
lemmatizer = WordNetLemmatizer()

text = " ".join(lemmatizer.lemmatize(com) for com in unsprungsg_sentiments.comment.astype(str))
print ("There are {} words in the combination of all comments in unsprungsg account.".format(len(text)))

# Create stopword list:
# remove words that we want to exclude

stopwords = set(STOPWORDS)
stopwords.update(["unsprungsg","bike"])

# Generate a word cloud image

wordcloud = WordCloud(stopwords=stopwords, background_color="white", width=800, height=400).generate(text)

# Display the generated image:
# the matplotlib way:

plt.axis("off")
plt.figure( figsize=(40,20))
plt.tight_layout(pad=0)
plt.imshow(wordcloud, interpolation='bilinear')
plt.show()

## Sentiment Analysis on comments of posts by forbiddenbikecompany

### Reading json file from scraped datasets

In [None]:
import glob

comment_files = []
for name in glob.glob('forbiddenbikecompany/forbiddenbikecompany/*comments.json'):
    print(name)
    comment_files.append(name)

### Sentiment Analysis on comments of each post per account

In [None]:
forbiddenbikecompany_results = pd.DataFrame(columns=['file_name','post_id','comment',
                                        'Negative', 'Neutral', 'Positive', 'Compound', 'Sentiment'])


for i in range(len(comment_files)):
    # Opening JSON file
    f = open(comment_files[i])
 
    # returns JSON object as
    # a dictionary
    data = json.load(f)
    file_name = comment_files[i].split('/')[2] # get the json file name to check 
    print("json filename:", file_name)
    
    # Closing file
    f.close()
    
    for comment in data:
        og_sentence = comment['text']
        
        filter(lambda x:x[0]!='@', og_sentence.split())
        sentence = " ".join(filter(lambda x:x[0]!='@', og_sentence.split()))
        
        print("original comment: " + og_sentence)
        print("cleaned comment: " + sentence)
        
        sentiment_dict = SentimentIntensityAnalyzer().polarity_scores(sentence)
        forbiddenbikecompany_results = forbiddenbikecompany_results.append({'file_name':file_name,
                                                'post_id': comment['id'], 
                                                'comment': og_sentence,
                                                'Negative': sentiment_dict['neg'],
                                                'Neutral':  sentiment_dict['neu'],
                                                'Positive': sentiment_dict['pos'],
                                                'Compound': sentiment_dict['compound'],
                                                'Sentiment': sentiment_scores(sentence)}, ignore_index=True)
        print("Overall sentiment dictionary is : ", sentiment_dict)
        print("sentence was rated as ", sentiment_dict['neg']*100, "% Negative")
        print("sentence was rated as ", sentiment_dict['neu']*100, "% Neutral")
        print("sentence was rated as ", sentiment_dict['pos']*100, "% Positive")
        print("Sentence Overall Rated As", sentiment_scores(sentence))
        print()

In [None]:
forbiddenbikecompany_results

In [None]:
forbiddenbikecompany_results.to_csv('forbiddenbikecompany_sentiments.csv')

In [None]:
forbiddenbikecompany_sentiments = pd.read_csv('forbiddenbikecompany_sentiments.csv')
del forbiddenbikecompany_sentiments['Unnamed: 0']
forbiddenbikecompany_sentiments

In [None]:
# comments with highest sentiment score
forbiddenbikecompany_sentiments.nlargest(10,['Compound'])

In [None]:
forbiddenbike_date = forbiddenbikecompany_sentiments.groupby('file_name').mean('Compound')
forbiddenbike_date = forbiddenbike_date.drop(['post_id'], axis=1)
forbiddenbike_date.reset_index(inplace=True)
forbiddenbike_date = forbiddenbike_date.rename(columns = {'index':'date'})
forbiddenbike_date

In [None]:
forbiddenbikecompany_sentiments['date'] = forbiddenbikecompany_sentiments['file_name'].apply(lambda x: x[:10])
forbiddenbike_date = forbiddenbikecompany_sentiments.groupby('date').mean('Compound')
forbiddenbike_date = forbiddenbike_date.drop(['post_id'], axis=1)
forbiddenbike_date.reset_index(inplace=True)
forbiddenbike_date = forbiddenbike_date.rename(columns = {'index':'date'})
forbiddenbike_date

In [None]:
# Python program to generate WordCloud 
lemmatizer = WordNetLemmatizer()

text = " ".join(lemmatizer.lemmatize(com) for com in forbiddenbikecompany_sentiments.comment.astype(str))
print ("There are {} words in the combination of all comments in forbiddenbikecompany account.".format(len(text)))

# Create stopword list:
# remove words that we want to exclude

stopwords = set(STOPWORDS)
stopwords.update(["forbiddenbikecompany","bike"])

# Generate a word cloud image

wordcloud = WordCloud(stopwords=stopwords, background_color="white", width=800, height=400).generate(text)

# Display the generated image:
# the matplotlib way:

plt.axis("off")
plt.figure( figsize=(40,20))
plt.tight_layout(pad=0)
plt.imshow(wordcloud, interpolation='bilinear')
plt.show()

## Sentiment Analysis on comments of posts by yeticycles
### Reading json file from scraped datasets

In [None]:
import glob

comment_files = []
for name in glob.glob('yeticycles/yeticycles/*comments.json'):
    print(name)
    comment_files.append(name)

### Sentiment Analysis on comments of each post per account

In [None]:
yeticycles_results = pd.DataFrame(columns=['file_name','post_id','comment',
                                        'Negative', 'Neutral', 'Positive', 'Compound', 'Sentiment'])


for i in range(len(comment_files)):
    # Opening JSON file
    f = open(comment_files[i])
 
    # returns JSON object as
    # a dictionary
    data = json.load(f)
    file_name = comment_files[i].split('/')[2] # get the json file name to check 
    print("json filename:", file_name)
    
    # Closing file
    f.close()
    
    for comment in data:
        og_sentence = comment['text']
        
        filter(lambda x:x[0]!='@', og_sentence.split())
        sentence = " ".join(filter(lambda x:x[0]!='@', og_sentence.split()))
        
        print("original comment: " + og_sentence)
        print("cleaned comment: " + sentence)
        
        sentiment_dict = SentimentIntensityAnalyzer().polarity_scores(sentence)
        yeticycles_results = yeticycles_results.append({'file_name':file_name,
                                                'post_id': comment['id'], 
                                                'comment': og_sentence,
                                                'Negative': sentiment_dict['neg'],
                                                'Neutral':  sentiment_dict['neu'],
                                                'Positive': sentiment_dict['pos'],
                                                'Compound': sentiment_dict['compound'],
                                                'Sentiment': sentiment_scores(sentence)}, ignore_index=True)
        print("Overall sentiment dictionary is : ", sentiment_dict)
        print("sentence was rated as ", sentiment_dict['neg']*100, "% Negative")
        print("sentence was rated as ", sentiment_dict['neu']*100, "% Neutral")
        print("sentence was rated as ", sentiment_dict['pos']*100, "% Positive")
        print("Sentence Overall Rated As", sentiment_scores(sentence))
        print()

In [None]:
yeticycles_results

In [None]:
yeticycles_results

In [None]:
yeticycles_results.to_csv('yeticycles_sentiments.csv')

In [None]:
yeticycles_sentiments = pd.read_csv('yeticycles_sentiments.csv')
del yeticycles_sentiments['Unnamed: 0']
yeticycles_sentiments

In [None]:
yeticycles_sentiments['date'] = yeticycles_sentiments['file_name'].apply(lambda x: x[:10])
yeticycles_date = yeticycles_sentiments.groupby('date').mean('Compound')
yeticycles_date = yeticycles_date.drop(['post_id'], axis=1)
yeticycles_date.reset_index(inplace=True)
yeticycles_date = yeticycles_date.rename(columns = {'index':'date'})
yeticycles_date

## Wordcloud for comments on yeticycle

In [None]:
# Python program to generate WordCloud 
lemmatizer = WordNetLemmatizer()

text = " ".join(lemmatizer.lemmatize(com) for com in yeticycles_sentiments.comment.astype(str))
print ("There are {} words in the combination of all comments in yeticycles account.".format(len(text)))

# Create stopword list:
# remove words that we want to exclude

stopwords = set(STOPWORDS)
stopwords.update(["yeti", "yeticycles","bike"])

# Generate a word cloud image

wordcloud = WordCloud(stopwords=stopwords, background_color="white", width=800, height=400).generate(text)

# Display the generated image:
# the matplotlib way:

plt.axis("off")
plt.figure( figsize=(40,20))
plt.tight_layout(pad=0)
plt.imshow(wordcloud, interpolation='bilinear')
plt.show()

In [None]:
yeticycles_date['brand'] = 'yeticycles'
forbiddenbike_date['brand'] = 'forbiddenbike'
unsprungsg_date['brand'] = 'unsprungsg'
santacruzbicycles_date['brand'] = 'santacruzbicycles'

In [None]:
forbiddenbike_date

In [None]:
yeticycles_date

In [None]:
frames = [santacruzbicycles_date, unsprungsg_date, forbiddenbike_date, yeticycles_date]

sentiments_date = pd.concat(frames)

In [None]:
sentiments_date

In [None]:
sentiments_date.to_csv('sentiments_date.csv')