# Pulling Tweets from WHO

In [61]:
# Import libbraries
import os, sys, subprocess
import json
import pandas as pd
import numpy as np
from langdetect import detect
import pickle

pd.set_option('display.max_colwidth', -1)

pd.set_option('display.max_rows', 1000)
#pd.set_option('display.max_columns', 500)

In [62]:
# load project config

terminal_call = ! git rev-parse --show-toplevel
repo_path=terminal_call[0]
project_config_path = os.path.join(repo_path,'project_config.json')

with open(project_config_path,'r') as fp: 
    project_config = json.load(fp)


project_config

{'project_module_relative_path': 'src'}

In [63]:
# import custom scraper module

module_path = os.path.join(repo_path,project_config['project_module_relative_path'])
sys.path.append(module_path)

import scraper
from scraper import tweet_scraper

from importlib import reload

In [88]:
reload(tweet_scraper)

<module 'scraper.tweet_scraper' from '/Users/vivianpeng/git/tweet_analysis/src/scraper/tweet_scraper.py'>

## Pull Tweets for a year

In [78]:
# get tweets from for a year from this date
df = tweet_scraper.query_tweets('global public good', '2019-06-01', '2020-06-01')

In [79]:
# rename columns
df.rename(columns={0:"date",
          1:"text",
          2: "user"}, inplace = True)

In [80]:
# set column types
df['text'] = df['text'].astype(str)
df['date'] = pd.to_datetime(df['date'])
df['user'] = df['user'].astype(str)

In [81]:
# quick sort on most common users
df.groupby("user").count().sort_values("text", ascending=False).head(20)

Unnamed: 0_level_0,date,text
user,Unnamed: 1_level_1,Unnamed: 2_level_1
FAKADIMABOYA,41,41
KataraGPDNet,23,23
GYamey,12,12
SciTechDiploHub,10,10
euroscot1,9,9
sanjaygreddy,8,8
FondationBotnar,8,8
geosociety,7,7
ISC,7,7
JeremyFarrar,7,7


In [82]:
df.shape

(2418, 3)

In [83]:
df["text"].head()

0    if you missed it, read or hear ftok @mariangelasimao @elenhoeg @ellenthoen @EmbCelsoAmorim achal prabhala on politics of #COVID19 #vaccines as #global public good.                                                                                                                    
1    Common public good should be a global geographical equation to solve the variables of universal entrepreneurial safety nets formed by individual basic needs in creative innovative group efforts towards goals A Soul non profit effort needs enterprise leadership without monitizing
2    Why - and how - we should make Covid-19 vaccines a global public good Globalizing the Fight Against the Pandemic by Carlos Alvarado Quesada, et al @ProSyn                                                                                                                             
3    One final thing, public money should be used to produce a public good. We have a chance to do that with agricultural reform. To create value

## Pickle for later use

In [29]:
# pickle.dump(df, open("../data/gpg.pkl", "wb"))

## Clean Tweets

In [23]:
# Reload package when I make updates to function
# reload(tweet_scraper)

In [89]:
# Clean tweet column
df = tweet_scraper.clean_tweets(df, 'text')

In [90]:
df["text"].head()

0    if you missed it, read or hear ftok @mariangelasimao @elenhoeg @ellenthoen @embcelsoamorim achal prabhala on politics of as public good.                                                                                                                                               
1    common public good should be a global geographical equation to solve the variables of universal entrepreneurial safety nets formed by individual basic needs in creative innovative group efforts towards goals a soul non profit effort needs enterprise leadership without monitizing
2    why - and how - we should make covid-19 vaccines a global public good globalizing the fight against the pandemic by carlos alvarado quesada, et al @prosyn                                                                                                                             
3    one final thing, public money should be used to produce a public good. we have a chance to do that with agricultural reform. to create value

# Write to CSV

In [91]:
df = df[df["text"].str.contains("global public good")]

In [92]:
# write to csv
df.to_csv("../data/gpg.csv", index = False)