## Libraries

In [1]:
import praw
import pandas as pd
import datetime as dt
import json
from bs4 import BeautifulSoup
from reddit_helpers.text_processor import reddit_text_preprocessing
import re
import nltk
from tqdm import tqdm
from nltk.sentiment.vader import SentimentIntensityAnalyzer as SIA

## Get Cred

In [2]:
# Load credentials from json file\n
with open("reddit_credentials.json", "r") as file:
    creds = json.load(file)

In [3]:
creds

{'client_id': 'Tt3sc9zHX1U4Pg',
 'client_secret': 'Tl_rWZZtVo0k46FFkM2i0BBCWQM',
 'user_agent': 'Scraping_data',
 'username': '311Sheetal',
 'password': 'Reddit'}

In [4]:
reddit = praw.Reddit(client_id = creds['client_id'],
                     client_secret = creds['client_secret'],
                     user_agent = creds['user_agent'],
                     username = creds['username'],
                     password = creds['password'])

## Helper Functions

In [5]:
def get_date(created):
    return dt.datetime.fromtimestamp(created)

In [6]:
def fetch_comments(reddit_instance,postids):
    
    comments_dict = {
        "created": [],
        "comment_id": [],
        "author": [],
        "body": [],
        "parent_id":[],
        "submission_id":[],
        "score":[],
        "subreddit":[],
        "subreddit_id":[]
    }

    submission = reddit_instance.submission(postids)
    for postid in postids:
        submission = reddit_instance.submission(postid)
        submission.comments.replace_more(limit=None)
        for comment in submission.comments.list():
            comments_dict['created'].append(comment.created_utc)
            comments_dict['comment_id'].append(comment.id)
            comments_dict['author'].append(comment.author)
            comments_dict['body'].append(comment.body)
            comments_dict['parent_id'].append(comment.parent_id)
            comments_dict['submission_id'].append(postid)
            comments_dict['score'].append(comment.score)
            comments_dict['subreddit'].append(comment.subreddit)
            comments_dict['subreddit_id'].append(comment.subreddit_id)
            
    return pd.DataFrame(comments_dict)

## Load Comments of a Post

In [7]:
comments_info = fetch_comments(reddit, ['9u948a'])

In [8]:
_timestamp = comments_info["created"].apply(get_date)
comments_info = comments_info.assign(timestamp = _timestamp)

In [9]:
# check linked users
comments_info['linked_users'] = comments_info['body'].apply(lambda x: re.findall('/u/[A-Za-z0-9_-]+',x))
# check linked subreddits
comments_info['linked_subreddits'] = comments_info['body'].apply(lambda x: re.findall('r/[A-Za-z0-9_-]+',x))

# remove numbers etc
comments_info['processed_body'] = comments_info['body'].str.replace("[^a-zA-Z#]", " ")

In [10]:
# process text
comments_info['processed_body'] = comments_info['processed_body'].apply(lambda x: reddit_text_preprocessing(x).replace_abbreviations().remove_short_words().lower_case().process_html().remove_urls().decode_text().stopwords_remove().stopwords_remove().lemmatize().text)

In [11]:
comments_info.head()

Unnamed: 0,created,comment_id,author,body,parent_id,submission_id,score,subreddit,subreddit_id,timestamp,linked_users,linked_subreddits,processed_body
0,1541387000.0,e92pd71,TheLizardKing25,I’m republican and glad she lost but this is a...,t3_9u948a,9u948a,202,Republican,t5_2qndt,2018-11-04 22:06:03,[],[],republican glad lose terrible comparisignifica...
1,1541424000.0,e93ctfa,Russilito,This is a bit of a stretch...,t3_9u948a,9u948a,15,Republican,t5_2qndt,2018-11-05 08:22:01,[],[],stretake careh
2,1541393000.0,e92uzr2,TakenStankForever,Are you trying to tell me that UCF *aren't* na...,t3_9u948a,9u948a,4,Republican,t5_2qndt,2018-11-04 23:35:41,[],[],try tell national champ
3,1541456000.0,e94fu5d,bisemutum,"$100,000,000,000 Billion is 5 million times th...",t3_9u948a,9u948a,3,Republican,t5_2qndt,2018-11-05 17:20:07,[],[],billion million tinstant message illegal insta...
4,1541391000.0,e92tnh4,Poopsmith89,I dont completely understand the scoring syste...,t3_9u948a,9u948a,7,Republican,t5_2qndt,2018-11-04 23:12:27,[],[],dont completely understand score soonte-mail e...


In [12]:
## Use google language api

# def gc_sentiment(text, credentials):  
#     from google.cloud import language
    
#     client = language.LanguageServiceClient(credentials = credentials)
#     document = language.types.Document(
#             content=text,
#             type=language.enums.Document.Type.PLAIN_TEXT)
#     annotations = client.analyze_sentiment(document=document)
#     score = annotations.document_sentiment.score
#     magnitude = annotations.document_sentiment.magnitude
#     return score, magnitude

# import os
# from google.oauth2 import service_account
# credentials = service_account.Credentials.from_service_account_file('./ecbm4040-up2138-b37eacd8e36c.json')
# print('Credendtials from environ: {}'.format(os.environ.get('GOOGLE_APPLICATION_CREDENTIALS')))

# gc_results = [gc_sentiment(row, credentials) for row in tqdm(comments_info['processed_body'])]
# gc_score, gc_magnitude = zip(*gc_results) # Unpacking the result into 2 lists
# gc = list(zip(comments_info['processed_body'], gc_score, gc_magnitude))
# columns = ['text', 'score', 'magnitude']
# gc_df = pd.DataFrame(gc, columns = columns)

# gc_df

In [15]:
sia = SIA()

In [60]:
pol_score = sia.polarity_scores(comments_info.body[102])

In [61]:
pol_score

{'neg': 0.178, 'neu': 0.791, 'pos': 0.03, 'compound': -0.8997}

In [55]:
comments_info.body[]

"You realize I never said freeloaders shouldn't be able to vote, right.  And you realize I corrected the previous poster on that point already, right.\n\nAnd you realize I'm focusing on addressing issues with my comments, right.  So the other points you reference are not germaine.  For that matter I could also start talking about the illegal immigrants issue and the annual $100,000,000,000 Billion price tag on that. And the chief culprit again being California."

In [59]:
comments_info.body[102]

'All your counterpoints just to rationalize your wild left zealotry and ideologies.  You can square it anyway you desire, but it will never be squared in our wallets and pocketbooks where it counts - in reality!  I guess we have to actually bankrupt ourselves before people wake up to these very cruel facts.'