# Get tweets data

In [216]:
import GetOldTweets3 as got
import pandas as pd
import numpy as np
import time
import re
from itertools import combinations 

In [98]:
tickers_democratic = ['EXC','F','APTV','STZ','EL','SPWR','KO','WMT','HD','NEE','CSX','MCD','SPG','FSLR','NSC']
tickers_republic = ['HON','COP','MRO','C','CRM','GILD','CVX','MRK','PYPL','AXP','V','AMZN','GOOG','QCOM','FB'] 

tickers = tickers_democratic + tickers_republic

In [107]:
combination = list(combinations(tickers,2))
i = 0

In [113]:
df_2016 = pd.DataFrame(columns=['date','source','target','text','hashtags'])

for combo in combination:
    tweetCriteria = got.manager.TweetCriteria().setQuerySearch('$'+combo[0] + ' $' +combo[1]).setSince("2016-10-01").setUntil("2016-12-31").setMaxTweets(5000)
    tweet = got.manager.TweetManager.getTweets(tweetCriteria)
    
    date = list(map(lambda x:x.date,tweet))
    if len(date) == 5000:
        print('Max reached for {}'.format(combo))
        break
        
    text = list(map(lambda x:x.text,tweet))
    hashtags = list(map(lambda x:x.hashtags,tweet))
    #retweets = list(map(lambda x:x.retweets,tweet))
    df_2016 = df_2016.append(pd.DataFrame({'date':date, 'source':combo[0],'target':combo[1],'text':text,  
                                           'hashtags':hashtags}),ignore_index=True)
    df_2016.to_csv('df_2016.csv')
    i+=1
    time.sleep(10)

In [121]:
years = ['2015', '2012', '2011']
for year in years:
    i = 0
    df_2016 = pd.DataFrame(columns=['date','source','target','text','hashtags'])
    
    for combo in combination:
        
        tweetCriteria = got.manager.TweetCriteria().setQuerySearch('$'+combo[0] + ' $' +combo[1]).setSince(year+'-10-01').setUntil(year+'-12-31').setMaxTweets(5000)
        tweet = got.manager.TweetManager.getTweets(tweetCriteria)

        date = list(map(lambda x:x.date,tweet))
        text = list(map(lambda x:x.text,tweet))
        hashtags = list(map(lambda x:x.hashtags,tweet))
        #retweets = list(map(lambda x:x.retweets,tweet))
        df_2016 = df_2016.append(pd.DataFrame({'date':date, 'source':combo[0],'target':combo[1],'text':text,  
                                               'hashtags':hashtags}),ignore_index=True)
        df_2016.to_csv('df_'+ year + '.csv')
        i+=1
        time.sleep(10)

# Building network

In [267]:
df_2016 = pd.read_csv('df_2016.csv', parse_dates=['date'])
df_2015 = pd.read_csv('df_2015.csv', parse_dates=['date'])
df_2012 = pd.read_csv('df_2012.csv', parse_dates=['date'])
df_2011 = pd.read_csv('df_2011.csv', parse_dates=['date'])

In [268]:
def generate_network(df):
    
    df.drop(df.columns[0], axis=1)
    df = pd.concat([df,df.rename(columns={'target':'source','source':'target'})], ignore_index=True) #Swap source and target and duplciate
    
    connections = df.groupby(['source','target']).size().reset_index()
    connections.columns = ["source", "target", "N"]
    
    connections = connections.pivot("source", "target", "N").fillna(0)  #Pivot to get matrix form
    connections = connections.div(connections.sum(axis=1), axis=0) #Normalize each row by its sum
    
    return  np.log(connections + 1)

In [269]:
connections_2016 = generate_network(df_2016)
connections_2015 = generate_network(df_2015)
connections_2012 = generate_network(df_2012)
connections_2011 = generate_network(df_2011)

APTV and PYPL missing from 2011,2012 while APTV missing from 2015,2016 data

In [270]:
delta_2016 = connections_2016 - connections_2015

In [273]:
delta_2016.loc['C']

target
AMZN    0.092596
AXP     0.018732
C       0.000000
COP     0.000387
CRM    -0.009201
CSX     0.017196
CVX    -0.029316
EL     -0.000970
EXC     0.008873
F       0.023801
FB      0.056968
FSLR   -0.009862
GILD   -0.047769
GOOG    0.010134
HD     -0.025976
HON    -0.007011
KO     -0.009770
MCD    -0.014495
MRK    -0.000916
MRO    -0.006881
NEE     0.001505
NSC     0.000084
PYPL   -0.000496
QCOM   -0.001779
SPG     0.000418
SPWR   -0.000910
STZ     0.002825
V      -0.028120
WMT    -0.055300
Name: C, dtype: float64

# test

In [259]:
df = df_2016

In [260]:
df.drop(df.columns[0], axis=1)
df = pd.concat([df,df.rename(columns={'target':'source','source':'target'})], ignore_index=True) #Swap source and target and duplciate
    
connections = df.groupby(['source','target']).size().reset_index()
connections.columns = ["source", "target", "N"]

connections = connections.pivot("source", "target", "N").fillna(0)

In [266]:
np.log(connections + 1)

target,AMZN,AXP,C,COP,CRM,CSX,CVX,EL,EXC,F,...,MRO,NEE,NSC,PYPL,QCOM,SPG,SPWR,STZ,V,WMT
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AMZN,0.0,4.174387,5.746203,4.127134,5.823046,3.931826,5.379897,1.386294,2.944439,5.308268,...,2.302585,2.564949,1.791759,4.691348,5.365976,2.890372,2.397895,4.007333,5.429346,7.142827
AXP,4.174387,0.0,4.553877,3.367296,1.791759,0.693147,5.666427,0.0,3.135494,3.178054,...,0.693147,3.135494,1.098612,3.637586,2.639057,1.791759,0.0,0.0,5.572154,5.220356
C,5.746203,4.553877,0.0,3.73767,3.89182,3.970292,4.248495,0.693147,3.044522,4.94876,...,2.639057,2.197225,2.397895,3.295837,3.433987,2.079442,1.098612,2.639057,4.382027,3.496508
COP,4.127134,3.367296,3.73767,0.0,2.302585,0.693147,5.866468,0.0,3.465736,4.060443,...,4.248495,3.583519,0.0,0.693147,3.401197,1.791759,0.0,0.0,2.833213,2.833213
CRM,5.823046,1.791759,3.89182,2.302585,0.0,3.218876,2.484907,1.386294,1.386294,3.091042,...,2.302585,0.0,1.94591,3.044522,3.688879,1.098612,0.0,3.465736,2.639057,4.430817
CSX,3.931826,0.693147,3.970292,0.693147,3.218876,0.0,1.609438,1.609438,0.693147,2.079442,...,2.079442,0.693147,4.219508,1.386294,2.890372,0.693147,0.693147,0.693147,2.079442,2.564949
CVX,5.379897,5.666427,4.248495,5.866468,2.484907,1.609438,0.0,0.693147,3.433987,3.78419,...,4.043051,3.988984,1.098612,2.70805,3.332205,1.94591,1.098612,2.944439,4.890349,5.384495
EL,1.386294,0.0,0.693147,0.0,1.386294,1.609438,0.693147,0.0,0.0,1.386294,...,1.098612,0.0,0.0,0.693147,1.098612,0.693147,0.693147,1.098612,1.098612,0.0
EXC,2.944439,3.135494,3.044522,3.465736,1.386294,0.693147,3.433987,0.0,0.0,2.890372,...,1.386294,3.970292,0.0,0.693147,2.484907,2.302585,1.098612,0.0,1.791759,1.609438
F,5.308268,3.178054,4.94876,4.060443,3.091042,2.079442,3.78419,1.386294,2.890372,0.0,...,2.302585,2.772589,0.693147,2.564949,4.867534,1.791759,1.94591,1.609438,4.394449,3.610918
