### This creates the data which we will be using for the API, and stores the data in a CSV file

In [29]:
import os
import sys
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [3]:
## To import the lib module
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [4]:
from lib.api import get_subfeddits, get_subfeddit_info, get_comments

In [9]:
# we first get all the subfeddit IDs
subfeddit_ids = []

for subfeddit in get_subfeddits()['subfeddits']:
    subfeddit_ids.append(subfeddit['id'])

In [10]:
subfeddit_ids

[1, 2, 3]

In [26]:
# now we have to get the comments and store that into a dataframe
def get_all_comments_subfeddit(subfeddit_id):
    start_index = 0
    page_size = 1000
    has_comments = True

    comments_list = []

    while(has_comments):
        comments = get_comments(subfeddit_id=subfeddit_id, skip=start_index, limit=page_size)

        if len(comments['comments']) > 0:
            comments_list = comments_list + comments['comments']
            print(f'Loaded {len(comments_list)} comments in subfeddit id {subfeddit_id}')
            start_index = start_index + page_size
        else:
            has_comments = False
            break
    
    return comments_list

In [27]:
comments_list = get_all_comments_subfeddit(1)

Loaded 1000 comments in subfeddit id 1
Loaded 2000 comments in subfeddit id 1
Loaded 3000 comments in subfeddit id 1
Loaded 4000 comments in subfeddit id 1
Loaded 5000 comments in subfeddit id 1
Loaded 6000 comments in subfeddit id 1
Loaded 7000 comments in subfeddit id 1
Loaded 8000 comments in subfeddit id 1
Loaded 9000 comments in subfeddit id 1
Loaded 10000 comments in subfeddit id 1
Loaded 11000 comments in subfeddit id 1
Loaded 12000 comments in subfeddit id 1
Loaded 13000 comments in subfeddit id 1
Loaded 14000 comments in subfeddit id 1
Loaded 15000 comments in subfeddit id 1
Loaded 16000 comments in subfeddit id 1
Loaded 17000 comments in subfeddit id 1
Loaded 18000 comments in subfeddit id 1
Loaded 19000 comments in subfeddit id 1
Loaded 20000 comments in subfeddit id 1
Loaded 21000 comments in subfeddit id 1
Loaded 22000 comments in subfeddit id 1
Loaded 23000 comments in subfeddit id 1
Loaded 24000 comments in subfeddit id 1
Loaded 25000 comments in subfeddit id 1
Loaded 26

In [28]:
len(comments_list)

33730

In [36]:
def get_comment_sentiment(comment_text):
    analyzer = SentimentIntensityAnalyzer()
    scores = analyzer.polarity_scores(comment_text)
    sentiment_class = ''
    if scores['compound'] >= 0.05:
        sentiment_class = 'positive'
    elif scores['compound'] > -0.05 and scores['compound'] < 0.05:
        sentiment_class = 'neutral'
    else:
        sentiment_class = 'negative'
    
    return (sentiment_class, scores['compound'])

Lets now find out the sentiment using Vader

In [37]:
df = pd.DataFrame(comments_list)

In [38]:
df.head(10)

Unnamed: 0,id,username,text,created_at
0,1,user_0,It looks great!,1711449247
1,2,user_1,Love it.,1711445647
2,3,user_2,Awesome.,1711442047
3,4,user_3,Well done!,1711438447
4,5,user_4,Looks decent.,1711434847
5,6,user_5,What you did was right.,1711431247
6,7,user_6,Thumbs up.,1711427647
7,8,user_7,Like it a lot!,1711424047
8,9,user_8,Good work.,1711420447
9,10,user_9,Luckily you did it.,1711416847


In [41]:
df[['sentiment_class', 'sentiment_score']] = df['text'].apply(get_comment_sentiment).apply(pd.Series)

In [42]:
df.head(10)

Unnamed: 0,id,username,text,created_at,sentiment_class,sentiment_score
0,1,user_0,It looks great!,1711449247,positive,0.6588
1,2,user_1,Love it.,1711445647,positive,0.6369
2,3,user_2,Awesome.,1711442047,positive,0.6249
3,4,user_3,Well done!,1711438447,positive,0.3382
4,5,user_4,Looks decent.,1711434847,neutral,0.0
5,6,user_5,What you did was right.,1711431247,neutral,0.0
6,7,user_6,Thumbs up.,1711427647,neutral,0.0
7,8,user_7,Like it a lot!,1711424047,positive,0.4199
8,9,user_8,Good work.,1711420447,positive,0.4404
9,10,user_9,Luckily you did it.,1711416847,positive,0.5106
