![reddit banner](https://cdn.dribbble.com/users/1761084/screenshots/3587716/reddit.gtif)

In [13]:
# Importing important libraries
import praw
import pandas as pd
import configparser

In [14]:
# For reading configuration files for Reddit Credentials
config = configparser.ConfigParser()
config.read('reddit_credentials.ini')

# Storing credential info in local variables
user_agent = config.get('credentials', 'user_agent')
client_id = config.get('credentials', 'client_id')
client_secret = config.get('credentials', 'client_secret')
redirect_url = config.get('credentials', 'redirect_url')

In [15]:
# Creating read-only Reddit instance
reddit = praw.Reddit(user_agent = user_agent,
                    client_id = client_id,
                    client_secret = client_secret,
                    redirect_url = redirect_url)

## Extracting Comments
For our project we are going to use top 3 most popular Reddit communities -
* Machine Learning - [r/MachineLearning](https://www.reddit.com/r/MachineLearning/)
* Artificial Intelligence - [r/artificial](https://www.reddit.com/r/Artificial/)
* Data Science - [r/DataScience](https://www.reddit.com/r/DataScience/)

We will extract top 1000 post of all time from each sub-reddit to create our dataset along with some other useful information like Post URL (& ID), User posted, Post title, number of comments, time created, upvote ratio and score. 

In [16]:
# Extracting top 1000 posts from each subreddit
posts = reddit.subreddit('MachineLearning+artificial+datascience').top(time_filter = 'all', limit = 3000)

In [None]:
posts_df = []

for post in posts:
    posts_df.append({
        'post_id' : post.id,
        'subreddit' : post.subreddit,
        'time_created' : post.created_utc,
        'selftext' : post.selftext,
        'post_title' : post.url,
        'link_flair_text' : post.link_flair_text,
        'score' : post.score,
        'comments' : post.num_comments,
        'upvote_ratio' : post.upvote_ratio
    })

In [None]:
posts_df = pd.DataFrame(posts_df)

In [None]:
posts_df.head()