In [1]:
import pandas as pd
import praw
import os
from datetime import datetime
from dotenv import load_dotenv

In [2]:
load_dotenv()

reddit = praw.Reddit(
    client_id=os.getenv('CLIENT_ID'),
    client_secret=os.getenv('CLIENT_SECRET'),
    user_agent=os.getenv('USER_AGENT')
)

In [3]:
subreddits = ['harvard', 'mit', 'stanford', 'ucla', 'berkeley', 'uofm', 'gatech', 'nyu']
num_post = 50
comments_per_post = 5

In [4]:
data = []

In [5]:
for subreddit in subreddits:
    subreddit_instance = reddit.subreddit(subreddit)
    for submission in subreddit_instance.hot(limit=num_post):
        submission.comments.replace_more(limit=0)
        comments = [comment.body for comment in submission.comments.list()[:comments_per_post]]

        post_data = {
            'id': submission.id,
            'university': subreddit,
            'title': submission.title,
            'content': submission.selftext,
            'comments': comments,
            'author': submission.author.name if submission.author else '[deleted]',
            'created_utc': submission.created_utc,
            'score': submission.score
        }
        data.append(post_data)

df = pd.DataFrame(data)

In [7]:
print(df.head())

        id university                                              title  \
0  1fylsog    harvard  Register to vote by October 26th, and vote in ...   
1  1fyzjck    harvard                          Harvard mentioned in xkcd   
2  1fzl93q    harvard                         Historical Tour of Harvard   
3  1fzbmh1    harvard             Scenic Coastal/Lake Areas Near Harvard   
4  1fz275z    harvard  A Conversation with Kongjian Yu at the Museum ...   

                                             content  \
0  On November 5th, Massachusetts will vote not j...   
1                                                      
2  My family is coming to visit next month and I ...   
3  My 25th reunion is coming up and I want to gat...   
4  On Wednesday, October 23 join us for a convers...   

                                            comments              author  \
0                                                 []            EllieDai   
1  [I feel like the MIT comma would be a semicolo...  

In [9]:
df.to_csv('reddit-university-data.csv', index=False)