# Text Analysis using Wordcloud

In [None]:
# Import relevant libraries
import pandas as pd

from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer

from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator  # stopwords are added if user requires them

import praw
from praw.models import MoreComments

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# read the personal data from files and assign it to relevant variables
with open('YOUR FILE PATH', 'r') as f:
    pw, client_id, secret, username = f.read().splitlines()    
    
# configuring praw
reddit = praw.Reddit(
    client_id=client_id,
    client_secret=secret,
    password=pw,
    user_agent="FirstBotapp 1.1",
    username=username
)

In [12]:
def wordcloud_generator(summarize=True, no_phrases=1):
    """
    This function will extract and parse all (or most) of the comments belonging to a single Reddit submission.
    The function will input the user to provide a Reddit submission link to extract all the comments.
    If summarize is true, the reddit comments will be summarized into number of phrase specified, by default 1.
    Finally, a wordcloud will be generated based on the text gathered from the Reddit submission.
    """

    url = str(input('Please enter the url.')).lower()
    submission = reddit.submission(url=url)  # obtain a submission object
    submission.comments.replace_more(limit=None)  # returns a list of all the top-level comments, followed by second-level, third-level, etc
    text = ' '.join(comment.body for comment in submission.comments.list())  # concatenate comment bodies directly
    
    if summarize:
        parser = PlaintextParser.from_string(text, Tokenizer("english"))
        summarizer = LsaSummarizer()
        summary = summarizer(parser.document, no_phrases)
        
        wordcloud = WordCloud(background_color=None, mode="RGBA", min_word_length=3).generate(summary)
   
    else: # Without summarization
        wordcloud = WordCloud(background_color=None, mode="RGBA", min_word_length=3).generate(text)
    plt.figure(figsize=(15, 10))
    plt.imshow(wordcloud, interpolation="bilinear")
    plt.axis("off")
    plt.show()

In [None]:
# call the function
wordcloud_generator(summarize=True, no_phrases=5)  # will ask for the Reddit url