### Step 1: Accept a search term from the user and download the last 100 tweets with that term.

In [1]:
# The python-twitter module is used because it has a RESTful API that returns
# pre-parsed objects instead of raw JSON to make working with data easier.

import twitter
import json

# The twitter library uses an API object authenticated with app keys to access the API.
# For privacy reasons, these keys are loaded from a local JSON file not included in this repo.

with open('twitter_keys.json') as keystore:
    keys = json.load(keystore)

api = twitter.Api(consumer_key=keys['consumer_key'],
                  consumer_secret=keys['consumer_secret'],
                  access_token_key=keys['access_token_key'],
                  access_token_secret=keys['access_token_secret'])

In [2]:
# This is a function to accept a search term and fetch the tweets with that term.
def fetchTestData(search_string):
    try:
        tweets_fetched = api.GetSearch(search_string, count=100)
        print("Fetched",str(len(tweets_fetched)),"tweets with the term",search_string)
        return [{'text':status.text,'label':None} for status in tweets_fetched]
    except:
        print("Failed to fetch tweets with the term",search_string)
        return None

In [3]:
search_string = input("Hello. Please enter a search term: ")
testData = fetchTestData(search_string)

Hello. Please enter a search term: apple
Fetched 87 tweets with the term apple


In [4]:
# First 10 tweets that were fetched.
for tweet in testData[0:9]:
    print(tweet['text'])

Next year, Netflix turns 20. Amazon turns 23. Apple turns 41. Facebook turns 13. Twitter turns 11. Google turns 19.
My NEW song #BeautifulDistraction is now available on iTunes Download it now!!! https://t.co/iZ8er1fM9H
[#MONSTA_X] #몬스타엑스 #2017골든디스크 #음반후보 투표참여안내! 
많은 참여 부탁드립니다💞
👉🏻Google play : https://t.co/YetbxH99iI
👉🏻Apple Store : https://t.co/YnZEPW3IHB
business: The Fed raised interest rates. So, what should you do? https://t.co/v8vvphFsdK https://t.co/p9i6WgYdZC
https://t.co/pYFMQgyuvD  https://t.co/vdKQJvQ6CT Off-Grid Travel — Setting Up a… https://t.co/ywkcRscBq1… https://t.co/FKp13R6nXx
@sheeraddicted c'est le cancer de l'humanité parce qu'apple répare rien
RT @thegoodperry: Ion want yo apple pie mama
RT @applegiveaway16: Apple Computer GIVEAWAY!!!
RT to Win and stay active!!
Notifications must be on to win!! https://t.co/9tJMLfO6jq
RT @applesfera: Apple amplía los descuentos de sus accesorios y lanza el monitor LG UltraFine 5K en su tienda online https://t.co/WP3MYoTPd…


### Step 2: Classify these tweets as positive or negative.

In [None]:
# This requires downloading a corpus of tweet data. However, twitter only allows
# tweet ID's to be shared, and not the tweets themselves. The API can be used to
# cross-reference the corpus, but considering that Niek Sanders corpus contains
# 5000 tweets and twitter limits API pulls to 180/15 mins, it will take several 
# hours to complete the pull.

def createTrainingCorpus(corpusFile,tweetFile):
    import csv
    corpus=[]
    with open(corpusFile,'r') as csvfile:
        lineReader = csv.reader(csvfile,delimiter=',',quotechar='"')
        for row in lineReader:
            corpus.append({'tweet_id':row[2],'label':row[1],'topic':row[0]})
    # The tweet pull is self-throttled to work around twitter's rate limit.
    import time
    rate_limit=180
    sleep_time=900/180 # 15 minutes / limit
    trainingData=[]
    downloadCount = 1
    for tweet in corpus:
        try:
            status = api.GetStatus(tweet['tweet_id'])
            tweet['text'] = status.text
            print('fetched tweet',str(downloadCount),'of',str(len(corpus)))
            downloadCount += 1
            trainingData.append(tweet)
            time.sleep(sleep_time) # to avoid rate limit
        except:
            continue
    with open(tweetFile,'w') as csvfile:
        lineWriter = csv.writer(csvfile,delimiter=',',quotechar='"')
        for tweet in trainingData:
            lineWriter.writerow([tweet['tweet_id'],tweet['text'],tweet['label'],tweet['topic']])
    return trainingData

In [None]:
createTrainingCorpus('./corpus.csv','./tweets.csv')

fetched tweet 0 of 5513
fetched tweet 1 of 5513
fetched tweet 2 of 5513
fetched tweet 3 of 5513
fetched tweet 4 of 5513
fetched tweet 5 of 5513
fetched tweet 6 of 5513
fetched tweet 7 of 5513
fetched tweet 8 of 5513
fetched tweet 9 of 5513
