In [1]:
# Installing dependencies
#!pip install praw
#!pip install psaw
#!pip install yfinance

In [2]:
import praw
from psaw import PushshiftAPI
import json
import pandas as pd
import yfinance as yf
from datetime import datetime, timedelta

# Suppress warning messages
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Load client_id, secret_id, and user_agent
with open('info.json') as f:
     info = json.load(f)
        
info = dict(info)

In [4]:
# Initialize Reddit and PushshiftAPI instances
reddit = praw.Reddit(client_id=info["client_id"], user_agent=info["user_agent"], client_secret=info["client_secret"])
api = PushshiftAPI(reddit)

In [5]:
# Store results of a search in a DataFrame
subm_dicts = [{k:getattr(praw_obj, k) for k in vars(praw_obj)} for praw_obj in api.search_submissions(subreddit='stocks', q="TWTR", filter=['url','author', 'title', 'subreddit'], limit=100)]
df = pd.DataFrame(subm_dicts)
df

Unnamed: 0,comment_limit,comment_sort,_reddit,approved_at_utc,subreddit,selftext,author_fullname,saved,mod_reason_title,gilded,...,url,subreddit_subscribers,created_utc,num_crossposts,media,is_video,_fetched,_comments_by_id,post_hint,preview
0,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,[removed],t2_3u5upg0j,False,,0,...,https://www.reddit.com/r/stocks/comments/twykl...,3852890,1.649175e+09,0,,False,False,{},,
1,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,#Good morning traders and investors of the r/s...,t2_eaak0,False,,0,...,https://www.reddit.com/r/stocks/comments/twts1...,3852890,1.649162e+09,0,,False,False,{},self,{'images': [{'source': {'url': 'https://extern...
2,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,[removed],t2_153n7s,False,,0,...,https://www.reddit.com/r/stocks/comments/tw47e...,3852890,1.649085e+09,0,,False,False,{},,
3,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,[removed],,False,,0,...,https://www.reddit.com/r/stocks/comments/tw40p...,3852890,1.649084e+09,0,,False,False,{},,
4,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,[removed],,False,,0,...,https://www.reddit.com/r/stocks/comments/tw363...,3852890,1.649082e+09,0,,False,False,{},,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,"**PsychoMarket Recap - Monday, April 26, 2021*...",t2_7gtjd4c0,False,,0,...,https://www.reddit.com/r/stocks/comments/mz8dh...,3852890,1.619472e+09,1,,False,False,{},,
96,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,"**PsychoMarket Recap - Thursday, April 22, 202...",t2_7gtjd4c0,False,,0,...,https://www.reddit.com/r/stocks/comments/mwegi...,3852890,1.619125e+09,1,,False,False,{},self,{'images': [{'source': {'url': 'https://extern...
97,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,"**PsychoMarket Recap - Friday, April 16, 2021*...",t2_7gtjd4c0,False,,0,...,https://www.reddit.com/r/stocks/comments/msbrm...,3852890,1.618605e+09,0,,False,False,{},self,{'images': [{'source': {'url': 'https://extern...
98,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,[removed],t2_9n9zl,False,,0,...,https://www.reddit.com/r/stocks/comments/mlujt...,3852890,1.617771e+09,0,,False,False,{},,


In [6]:
# Get historical stock data for a ticker
twtr = yf.download('TWTR', progress=True)
twtr

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-11-07,45.099998,50.090000,44.000000,44.900002,44.900002,117701600
2013-11-08,45.930000,46.939999,40.689999,41.650002,41.650002,27925300
2013-11-11,40.500000,43.000000,39.400002,42.900002,42.900002,16113900
2013-11-12,43.660000,43.779999,41.830002,41.900002,41.900002,6316700
2013-11-13,41.029999,42.869999,40.759998,42.599998,42.599998,8688300
...,...,...,...,...,...,...
2022-03-30,40.349998,40.349998,38.570000,39.000000,39.000000,15804300
2022-03-31,39.110001,39.230000,38.410000,38.689999,38.689999,13208300
2022-04-01,39.160000,39.849998,39.000000,39.310001,39.310001,12122600
2022-04-04,47.869999,51.369999,46.860001,49.970001,49.970001,268465400


## Processing Tickers

In [11]:
# At close, calculate the real and percent change since last close
def get_diff(ticker_data):
    df = ticker_data.copy()
    real = []
    percent = []
    for index, row in df.reset_index().iterrows():
        if(index == 0):
            real.append(0)
            percent.append(0)
        else:
            real.append(row["Close"]-df.iloc[index-1]["Close"])
            percent.append(real[-1]/df.iloc[index-1]["Close"])
    return real, percent

In [12]:
# Get the reddit posts that mention a certain ticker n days before a large change in stock price
def get_pre_change_posts(ticker, ticker_gain, days=1, limit=100, subreddit="stocks"):
    df = None
    for index, row in ticker_gain.iterrows():
        start_date = datetime.fromtimestamp(row.name.timestamp()) + timedelta(hours=6, days=-days)
        end_date = datetime.fromtimestamp(row.name.timestamp()) + timedelta(hours=6)
        submissions = api.search_submissions(after=start_date, before=end_date, q=ticker, subreddit=subreddit, filter=['url','author', 'title', 'subreddit'], limit=limit)
        if(df is None):
            df = pd.DataFrame([{k:getattr(praw_obj, k) for k in vars(praw_obj)} for praw_obj in submissions])
        else:
            df = df.append([{k:getattr(praw_obj, k) for k in vars(praw_obj)} for praw_obj in submissions], ignore_index=True)
    return df

In [15]:
# Generate information for a given ticker
def process_ticker(ticker, gain_cutoff=0.05, loss_cutoff=0.05, limit=100, days=1):
    ticker_data = yf.download(ticker, progress=False)
    real, percent = get_diff(twtr)
    ticker_data["Real_Change"] = real
    ticker_data["Percent_Change"] = percent
    ticker_gain = ticker_data[ticker_data["Percent_Change"] > gain_cutoff]
    ticker_loss = ticker_data[ticker_data["Percent_Change"] < -loss_cutoff]
    
    pre_gain = get_pre_change_posts(ticker, ticker_gain, days, limit)
    pre_loss = get_pre_change_posts(ticker, ticker_loss, days, limit)
    return ticker_gain, ticker_loss, pre_gain, pre_loss

In [16]:
gain, loss, pre_gain, pre_loss = process_ticker("TWTR", limit=100)

In [17]:
gain

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Real_Change,Percent_Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2013-12-04,41.270000,43.919998,41.270000,43.689999,43.689999,11014900,2.320000,0.056079
2013-12-09,45.590000,49.840000,45.020000,49.139999,49.139999,17366600,4.189999,0.093215
2013-12-10,48.900002,52.580002,48.700001,51.990002,51.990002,25792000,2.850002,0.057998
2013-12-12,52.200001,55.869999,50.689999,55.330002,55.330002,23446900,2.990002,0.057127
2013-12-13,56.200001,59.410000,55.450001,59.000000,59.000000,38979600,3.669998,0.066329
...,...,...,...,...,...,...,...,...
2022-02-09,36.500000,37.919998,36.139999,37.830002,37.830002,24473500,1.850002,0.051418
2022-02-24,31.299999,35.070000,31.299999,34.980000,34.980000,22551000,2.220001,0.067766
2022-03-09,33.549999,34.529999,33.299999,34.369999,34.369999,16886000,1.689999,0.051714
2022-03-17,35.209999,37.709999,34.889999,37.299999,37.299999,30853100,1.930000,0.054566


In [18]:
loss

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Real_Change,Percent_Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2013-11-08,45.930000,46.939999,40.689999,41.650002,41.650002,27925300,-3.250000,-0.072383
2013-11-18,43.500000,43.950001,40.849998,41.139999,41.139999,12810600,-2.840000,-0.064575
2013-12-27,70.099998,71.250000,63.689999,63.750000,63.750000,60418700,-9.559998,-0.130405
2013-12-30,60.270000,63.709999,58.570000,60.509998,60.509998,55538300,-3.240002,-0.050824
2014-01-07,67.669998,67.730003,61.389999,61.459999,61.459999,31748400,-4.830002,-0.072862
...,...,...,...,...,...,...,...,...
2021-04-30,56.000000,57.630001,55.049999,55.220001,55.220001,88378800,-9.869995,-0.151636
2021-10-04,61.040001,61.160000,57.639999,58.389999,58.389999,17381300,-3.590000,-0.057922
2021-10-27,60.049999,60.160000,54.790001,54.810001,54.810001,48107700,-6.619999,-0.107765
2022-01-21,36.900002,37.080002,34.799999,34.820000,34.820000,25674100,-2.459999,-0.065987


In [19]:
pre_gain

Unnamed: 0,comment_limit,comment_sort,_reddit,approved_at_utc,subreddit,selftext,author_fullname,saved,mod_reason_title,gilded,...,stickied,url,subreddit_subscribers,created_utc,num_crossposts,media,is_video,_fetched,_comments_by_id,link_flair_template_id
0,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,,t2_3posu,False,,0,...,False,http://imgur.com/I3aGRGa,3852921,1391720000.0,0,{'oembed': {'provider_url': 'http://imgur.com'...,False,False,{},
1,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,,t2_f16nu,False,,0,...,False,http://www.theeconomicdaily.com/2014/02/06/ana...,3852921,1391694000.0,0,,False,False,{},
2,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,[deleted],,False,,0,...,False,https://www.reddit.com/r/stocks/comments/2uw9p...,3852921,1423161000.0,0,,False,False,{},
3,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,,t2_unhrn,False,,0,...,False,https://www.reddit.com/r/stocks/comments/458ld...,3852921,1455195000.0,0,,False,False,{},
4,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,"Realistically the prices can't get much lower,...",t2_mwro0,False,,0,...,False,https://www.reddit.com/r/stocks/comments/4hpzc...,3852921,1462307000.0,0,,False,False,{},
5,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,On $aapl and $twtr . Apple is looking like a g...,t2_vn0bl,False,,0,...,False,https://www.reddit.com/r/stocks/comments/4hkur...,3852921,1462237000.0,0,,False,False,{},
6,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,"$GRPN, $TWTR, $YHOO... who is getting bought u...",t2_4hpkx,False,,0,...,False,https://www.reddit.com/r/stocks/comments/4nw6h...,3852921,1465832000.0,0,,False,False,{},
7,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,Twitter Inc (NYSE:TWTR) stock continues to gai...,t2_qk4a6,False,,0,...,False,https://www.reddit.com/r/stocks/comments/55tqm...,3852921,1475591000.0,0,,False,False,{},
8,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,#Good morning traders of the r/stocks sub! Hap...,t2_eaak0,False,,0,...,False,https://www.reddit.com/r/stocks/comments/78ne1...,3852922,1508935000.0,0,,False,False,{},
9,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,"So a month or so ago, I invested $5200 in the ...",t2_k4wrm2r,False,,0,...,False,https://www.reddit.com/r/stocks/comments/7ssnr...,3852922,1516847000.0,0,,False,False,{},


In [20]:
pre_loss

Unnamed: 0,comment_limit,comment_sort,_reddit,approved_at_utc,subreddit,selftext,author_fullname,saved,mod_reason_title,gilded,...,subreddit_subscribers,created_utc,num_crossposts,media,is_video,_fetched,_comments_by_id,post_hint,preview,link_flair_template_id
0,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,Hello!\n\nI'm completely new to investing and ...,t2_gaedg,False,,0,...,3852927,1398374000.0,0,,False,False,{},,,
1,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,[TWTR](https://www.google.com/finance?q=NYSE%3...,t2_9mc26,False,,0,...,3852928,1430256000.0,0,,False,False,{},self,{'images': [{'source': {'url': 'https://extern...,
2,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,https://investor.twitterinc.com/releasedetail....,t2_hsofa,False,,0,...,3852928,1430250000.0,0,,False,False,{},self,{'images': [{'source': {'url': 'https://extern...,
3,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,[deleted],,False,,0,...,3852928,1438129000.0,0,,False,False,{},,,
4,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,Had a lot of movement today. This made some po...,t2_qva3e,False,,0,...,3852928,1446067000.0,0,,False,False,{},self,{'images': [{'source': {'url': 'https://extern...,
5,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,There is a rumor about the deal with venture c...,t2_p54sk,False,,0,...,3852929,1454338000.0,0,,False,False,{},,,
6,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,[removed],,False,,0,...,3852930,1454587000.0,0,,False,False,{},,,
7,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,Seems like TWTR is in a real trouble.,t2_p54sk,False,,0,...,3852931,1461702000.0,0,,False,False,{},,,
8,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,Yet another shitty quarter from a shitty compa...,t2_j2syo,False,,0,...,3852933,1469564000.0,0,,False,False,{},,,
9,2048,confidence,<praw.reddit.Reddit object at 0x00000256469C5A00>,,stocks,,t2_b4ivl,False,,0,...,3852933,1475713000.0,0,,False,False,{},,,
