In [1]:
import numpy as np
import pandas as pd
import json
import tweepy
import praw
from collections import defaultdict
from datetime import datetime

In [2]:
def loadKeys(key_file):
    with open(key_file) as f:
        key_dict = json.load(f)
    return key_dict['api_key'], key_dict['api_secret'], key_dict['token'], key_dict['token_secret']

In [3]:
KEY_FILE = 'twitterkeys.json'
api_key, api_secret, token, token_secret = loadKeys(KEY_FILE)
auth = tweepy.OAuthHandler(api_key, api_secret)
auth.set_access_token(token, token_secret)
api = tweepy.API(auth)

### 1a) Keywords used - 
- Covid-19
- Coronavirus
- Pandemic
- WHO
- CDC
- Mask
- Lockdown

### 1b)

In [4]:
search_term = "#Covid-19 OR #Coronavirus OR #Pandemic OR #WHO OR #CDC OR #Mask OR #Lockdown" + "-filter:retweets"
no_of_pages = 100 
tweets_save = []
for page in tweepy.Cursor(api.search, q = search_term, lang="en",).pages(no_of_pages):
    for status in page:
        tweets_save.append(status.text)

In [5]:
tweets_df = pd.DataFrame({'col': tweets_save})

In [6]:
tweets_df.to_csv('pandemictweets.csv', encoding='utf-8')

### 2a)

In [7]:
user = ['WHO', 'UN']
tweets_dict = defaultdict(list)
no_of_pages = 5
for users in user:
    for page in tweepy.Cursor(api.user_timeline, id = users).pages(no_of_pages):
        for status in page:
            tweets_dict['user_handle'].append(users)
            tweets_dict['text'].append(status.text)
            tweets_dict['location'].append(status.user.location)
            tweets_dict['friends_count'].append(status.user.friends_count)
            tweets_dict['created_at'].append(status.created_at)
            tweets_dict['followers_count'].append(status.user.followers_count)

In [8]:
tweets_data = pd.DataFrame(tweets_dict)
tweets_data.head()

Unnamed: 0,user_handle,text,location,friends_count,created_at,followers_count
0,WHO,RT @DrTedros: My thoughts are with @captaintom...,"Geneva, Switzerland",1730,2021-01-31 22:13:38,8949368
1,WHO,"Through the elimination initiative, WHO is aim...","Geneva, Switzerland",1730,2021-01-31 21:54:44,8949368
2,WHO,Stopping discrimination and promoting inclusio...,"Geneva, Switzerland",1730,2021-01-31 16:42:43,8949368
3,WHO,"Untreated, #leprosy can cause progressive and ...","Geneva, Switzerland",1730,2021-01-31 12:22:06,8949368
4,WHO,#Leprosy is curable with multidrug therapy. Tr...,"Geneva, Switzerland",1730,2021-01-31 11:17:53,8949368


In [9]:
tweets_data.to_csv('WHOandUNtweets.csv')

### Reddit

### 3a) Subreddits:
- CoronavirusUS
- CoronavirusRecession
- COVID19
- WHO

### 3b)

In [10]:
reddit = praw.Reddit(client_id="***", 
                     client_secret="**",
                     password="***",
                     user_agent="***",
                     username="***",)

In [11]:
posts_dict = defaultdict(list)

In [12]:
l = ['CoronavirusUS', 'CoronavirusRecession', 'COVID19', 'Who']

In [13]:
for element in l:
    subreddit_1 = reddit.subreddit(element)
    for submission in subreddit_1.rising(limit=30):
        posts_dict["id"].append(submission.id)
        posts_dict["title"].append(submission.title)
        posts_dict["author"].append(submission.author)
        posts_dict["clicked"].append(submission.clicked)
        posts_dict["edited"].append(submission.edited)
        posts_dict["num_comments"].append(submission.num_comments)
        posts_dict["score"].append(submission.score)
        posts_dict["upvote_ratio"].append(submission.upvote_ratio)
        posts_dict["url"].append(submission.url)
        posts_dict["is_original_content"].append(submission.is_original_content)

In [14]:
news_data = pd.DataFrame(posts_dict)

In [15]:
news_data.head()

Unnamed: 0,id,title,author,clicked,edited,num_comments,score,upvote_ratio,url,is_original_content
0,l9odep,Some Experts Claim Covid-19 Reinfections Are ‘...,thinkB4WeSpeak,False,False,4,4,0.83,https://www.forbes.com/sites/leahrosenbaum/202...,False
1,l9ito1,6 US states’ Covid-19 deaths hit 1 in 500 mark,Silverseren,False,False,14,164,0.99,https://www.dhakatribune.com/world/north-ameri...,False
2,l9e3q2,U.S. Labor Department issues COVID-19 workplac...,thinkB4WeSpeak,False,False,18,276,1.0,https://www.reuters.com/article/us-usa-economy...,False
3,l9388a,"Even after being vaccinated, health experts ag...",altaccountfiveyaboi,False,False,55,609,0.98,/r/UnpopularFacts/comments/l92skl/even_after_b...,False
4,l9g7mn,How did West Virginia become a national leader...,gigivictoria,False,False,6,48,1.0,https://www.nbcnews.com/health/health-care/we-...,False


### 3c)

In [16]:
news_data.to_csv('pandemicReddit.csv', encoding='utf-8')

### 3d)

In [17]:
# One way to do it. You can also use intersect() or merge()
hot_1 = []
common_1 = []
for element in l:
    subreddit_1 = reddit.subreddit(element)
    for submission in subreddit_1.hot(limit=50):
        hot_1.append(submission.id)
    for submission in subreddit_1.rising(limit=50):
        if submission.id in hot_1:
            common_1.append(submission.title)
    print(f"Common tweets of {element}")
    print(common_1)
    print('---------------------------------------')

Common tweets of CoronavirusUS
['Some Experts Claim Covid-19 Reinfections Are ‘Not A Huge Problem’—But Nobody’s Tracking The Numbers', '6 US states’ Covid-19 deaths hit 1 in 500 mark', 'U.S. Labor Department issues COVID-19 workplace safety guidance', 'Even after being vaccinated, health experts agree you should keep wearing a mask until a large portion of the population is inoculated', 'How did West Virginia become a national leader in Covid vaccination?', 'Wisconsin’s No. 1 mink farming industry now seen as a COVID-19 risk', 'Anti-vaccine protesters temporarily shut down major coronavirus vaccine site at Dodger Stadium in Los Angeles', 'Computer-Shy Elderly Are Shouldered Aside in Vaccination Race', 'Essential workers get lost in the vaccine scrum as states prioritize the elderly', 'Scientists decode how coronavirus damages lung cells within hours', 'Some Health-Care Workers Are Still Saying No to a Covid-19 Vaccine', '30 Colorado schools, 6 colleges report new COVID-19 outbreaks in 

Common tweets of Who
['Some Experts Claim Covid-19 Reinfections Are ‘Not A Huge Problem’—But Nobody’s Tracking The Numbers', '6 US states’ Covid-19 deaths hit 1 in 500 mark', 'U.S. Labor Department issues COVID-19 workplace safety guidance', 'Even after being vaccinated, health experts agree you should keep wearing a mask until a large portion of the population is inoculated', 'How did West Virginia become a national leader in Covid vaccination?', 'Wisconsin’s No. 1 mink farming industry now seen as a COVID-19 risk', 'Anti-vaccine protesters temporarily shut down major coronavirus vaccine site at Dodger Stadium in Los Angeles', 'Computer-Shy Elderly Are Shouldered Aside in Vaccination Race', 'Essential workers get lost in the vaccine scrum as states prioritize the elderly', 'Scientists decode how coronavirus damages lung cells within hours', 'Some Health-Care Workers Are Still Saying No to a Covid-19 Vaccine', '30 Colorado schools, 6 colleges report new COVID-19 outbreaks in January', 

### Bonus Question

In [18]:
for element in l:
    subreddit = reddit.subreddit(element)
    print(f"Rules and Time of Subreddit: {element}")
    print('\n')
    for submission in subreddit.rules:
        print(submission, datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S'))
    print('-------------------------------------')

Rules and Time of Subreddit: CoronavirusUS


Posts must be specific to the USA/Canada 2020-02-13 04:32:40
No medical advice and no treatments until phase 1 clinical trials are completed (no new treatments) 2020-03-15 08:59:14
Be polite and respectful. 2020-02-13 04:30:16
No Reposts. 2020-02-13 04:38:17
No rants. Posts must have a point. 2020-03-13 01:44:47
No misinformation. Specific claims require credible sources. 2020-02-13 04:35:11
No personal anxiety posts. 2020-02-14 23:47:55
No sales posts of any type without permission. 2020-03-14 12:18:17
No Unconstructive Politics! We're all Americans. 2020-02-21 05:28:47
Media policy 2020-02-13 04:36:42
No memes 2020-03-19 03:32:10
-------------------------------------
Rules and Time of Subreddit: CoronavirusRecession


Be kind. 2020-03-19 12:26:50
Stay On-Topic. 2020-03-19 12:28:04
No Medical Advice. 2020-03-19 12:29:47
No Conspiracy theories. 2020-03-19 12:31:17
No Edited/Sensationalized article titles. 2020-03-22 02:04:29
----------------