In [2]:
import praw # python reddit api wrapper
from keys import client_name_scr, client_key_scr, client_name_web, client_key_web
import time
import datetime
import os

In [5]:
def get_political_comments_from_top_posts(subreddit_name, max_posts=100, time_filter="all"):
    """Fetches politically relevant comments from top posts in a subreddit."""
    print("_"*100)
    print(f"r/{subreddit_name}")
    
    # Define Indian political keywords for filtering
    political_keywords = [
        # Political parties and organizations
        'bjp', 'congress', 'aap', 'tmc', 'shiv sena', 'ncp', 'rss', 
        'left front', 'dmk', 'aiadmk', 'jdu', 'rjd', 'bsp', 'sp',
        
        # Politicians
        'modi', 'rahul gandhi', 'amit shah', 'kejriwal', 'mamata', 
        'yogi', 'sonia', 'kharge', 'fadnavis',
        
        # Government entities
        'parliament', 'lok sabha', 'rajya sabha', 'supreme court', 
        'high court', 'cabinet', 'ministry', 'pmo', 'cbi', 'ed',
        
        # Political issues
        'election', 'vote', 'democracy', 'constitution', 'policy', 
        'bill', 'act', 'law', 'legislation', 'scam', 'corruption',
        'protest', 'movement', 'reform', 'budget', 'tax', 'subsidy',
        
        # Government schemes
        'ayushman', 'swachh bharat', 'make in india', 'gst', 'aadhar',
        'ration', 'welfare', 'scheme', 'yojana',
        
        # Political ideologies
        'left wing', 'right wing', 'liberal', 'conservative', 'secular',
        'communist', 'socialist', 'capitalist', 'hindutva', 'nationalism'
    ]
    
    subreddit = reddit.subreddit(subreddit_name)
    political_posts = []
    
    for i, post in enumerate(subreddit.top(limit=max_posts, time_filter=time_filter)):
        time.sleep(1)  # to Avoid hitting rate limits
        
        # Check if title or post content contains political keywords
        title_lower = post.title.lower()
        text_lower = post.selftext.lower() if post.selftext else ""
        
        is_political_post = any(keyword in title_lower or keyword in text_lower for keyword in political_keywords)
        
        # Initialize flag for tracking if any comments are political
        has_political_comments = False
        
        post_data = {
            'title': post.title,
            'text': post.selftext.strip() if post.selftext else "",
            'created_utc': post.created_utc,
            'is_political_post': is_political_post,
            'political_comments': [],
            'num_comments': post.num_comments
        }
        
        # Print status with political indicator
        political_indicator = "[POLITICAL]" if is_political_post else "[NON-POLITICAL]"
        print(f"{i} | Fetching comments from: {political_indicator} {post.title}")
        
        # Fetch all comments (flat, no hierarchy)
        post.comments.replace_more(limit=None)  # Get all comments
        
        for comment in post.comments.list():
            comment_text = comment.body
            comment_lower = comment_text.lower()
            
            # Check if comment contains political keywords
            is_political_comment = any(keyword in comment_lower for keyword in political_keywords)
            
            if is_political_comment:
                has_political_comments = True
                post_data['political_comments'].append({
                    'text': comment_text,
                    'created_utc': comment.created_utc,
                })
        
        # Only add posts that are either political themselves or have political comments
        if is_political_post or has_political_comments:
            political_posts.append(post_data)
        print(f"---> {len(post_data['political_comments'])} political comments saved")
        time.sleep(1)  # to Avoid hitting rate limits
    
    print(f"Found {len(political_posts)} politically relevant posts out of {max_posts} checked")
    print("_"*100)
    
    return political_posts

In [7]:
def get_comments_from_top_posts(subreddit_name, max_posts=100, time_filter="all"):
    """Fetches comments from top posts in a subreddit."""
    print("_"*100)
    print(f"r/{subreddit_name}")
    
    subreddit = reddit.subreddit(subreddit_name)
    posts = []
    for i, post in enumerate(subreddit.hot(limit=max_posts, time_filter=time_filter)):
        print(f"{i} | Fetching comments from: {post.title}")

        post_data = {
            'title': post.title,
            'text': post.selftext.strip() if post.selftext else "",
            'created_utc': post.created_utc,
            'comments': []
        }
        
        # Fetch all comments (flat, no hierarchy)
        post.comments.replace_more(limit=None)  # Get all comments

        for comment in post.comments.list():
            post_data['comments'].append(comment.body)
        
        posts.append(post_data)
        time.sleep(1)  # to Avoid hitting rate limits
    print("_"*100)
    return posts

def save(main_dic):
    text = ""
    for post in main_dic:
        text += post['title']
        for comm in post['comments']:
            text += comm + "\n"
    
    with open("india.txt", "w", encoding = "utf-8") as f:
        f.write(text)

def scrap_info(main_dic):
    print(f"Top posts fetched : {len(main_dic)}")
    print(f"Total comments under all posts : {sum([len(x['comments']) for x in main_dic])}")

In [9]:
# Initialize Reddit API
reddit = praw.Reddit(
    client_id = client_name_web,
    client_secret = client_key_web,
    user_agent="testscript by u/Outside_Addition_647)"
)

In [6]:
#print(reddit.auth.limits)

{'remaining': None, 'reset_timestamp': None, 'used': None}


In [7]:
"""subreddit = reddit.subreddit("all")  # Public subreddit
for post in subreddit.hot(limit=5):
    print(post.title)"""

Canadian NHL Legend Wayne Gretzky posing in MAGA hat
Europe cannot be vassal of US, Macron says amid Trump's foreign policy shifts. French President Emmanuel Macron called upon Europe to "rediscover taste for risk, ambition and power"
She has claimed her human
Some Other Guy
Mind blowing revelation


In [20]:
"""try:
    print(reddit.user.me())  # Should print 'None' in read-only mode
    print("Authentication successful!")
except Exception as e:
    print(f"Auth error: {e}")"""

None
Authentication successful!


### r/ india- 2.5M
### r/ IndiaSpeaks - 1.0 M 
### r/ UnitedStatesofIndia - 470k 
### r/ Delhi - 1.0 M
### r/ Mumbai - 900 K
### r/ Bangaluru - 899 K
### r/ Pune

### Sentiment towards Goverment, Bueracracy and Opposition

In [None]:
india = get_political_comments_from_top_posts("india")

____________________________________________________________________________________________________
r/india
0 | Fetching comments from: [NON-POLITICAL] 'We've only been here a few hours and have seen half a dozen people die while they wait for treatment.' - Sky News ground report from Delhi
---> 1066 political comments saved
1 | Fetching comments from: [NON-POLITICAL] Today is my birthday and my parents bought me this. I am 27.
---> 449 political comments saved
2 | Fetching comments from: [POLITICAL] Neeraj Chopra Creates History !! Wins India's Second Ever Individual Gold Medal in the Olympics with an amazing throw of 87.58m !! A proud moment for every Indian .
---> 535 political comments saved
3 | Fetching comments from: [POLITICAL] How to not get raped in India
---> 651 political comments saved
4 | Fetching comments from: [POLITICAL] It's 2021 and India is still doing brown face instead of actually hiring darker skin actors.
---> 1478 political comments saved
5 | Fetching comments 

In [None]:
save(india)
scrap_info(india)

NameError: name 'india' is not defined

In [None]:
subr[sub] = get_comments_from_top_posts(subreddit_name)