In [18]:
import pandas as pd
import praw
import os
from datetime import datetime
from dotenv import load_dotenv

In [19]:
load_dotenv()

reddit = praw.Reddit(
    client_id=os.getenv('CLIENT_ID'),
    client_secret=os.getenv('CLIENT_SECRET'),
    user_agent=os.getenv('USER_AGENT')
)

In [20]:
subreddits = ['harvard', 'mit', 'stanford', 'ucla', 'berkeley', 'uofm', 'gatech', 'nyu']
num_post = 100
comments_per_post = 5

In [21]:
data = []

In [22]:
for subreddit in subreddits:
    subreddit_instance = reddit.subreddit(subreddit)
    for submission in subreddit_instance.hot(limit=num_post):
        submission.comments.replace_more(limit=0)
        comments = [comment.body for comment in submission.comments.list()[:comments_per_post]]

        post_data = {
            'id': submission.id,
            'university': subreddit,
            'title': submission.title,
            'content': submission.selftext,
            'comments': comments,
            'author': submission.author.name if submission.author else '[deleted]',
            'created_utc': datetime.fromtimestamp(submission.created_utc),
            'score': submission.score
        }
        data.append(post_data)

df = pd.DataFrame(data)

In [23]:
print(df.head())

        id university                                              title  \
0  1fylsog    harvard  Register to vote by October 26th, and vote in ...   
1  1g0gvaa    harvard  Has the Harvard Crimson ever retracted/apologi...   
2  1g0dbjv    harvard              Harvard Alumnus Wins Chemistry Nobel    
3  1g0k72t    harvard  Americans Victor Ambros of UMass Chan and Gary...   
4  1g09rjw    harvard                                 History of Science   

                                             content  \
0  On November 5th, Massachusetts will vote not j...   
1  This is a very specific question, and it obvio...   
2                                                      
3                                                      
4  History of science majors/those of you familia...   

                                            comments             author  \
0                                                 []           EllieDai   
1  [College kids hold misguided views. More news ...    

In [24]:
df.to_csv('reddit-university-data.csv', index=False)