### Step 1: Accept a search term from the user and download the last 100 tweets with that term.

In [1]:
# The python-twitter module is used because it has a RESTful API that returns
# pre-parsed objects instead of raw JSON to make working with data easier.

import twitter
import json

# The twitter library uses an API object authenticated with app keys to access the API.
# For privacy reasons, these keys are loaded from a local JSON file not included in this repo.

with open('twitter_keys.json') as keystore:
    keys = json.load(keystore)

api = twitter.Api(consumer_key=keys['consumer_key'],
                  consumer_secret=keys['consumer_secret'],
                  access_token_key=keys['access_token_key'],
                  access_token_secret=keys['access_token_secret'])

In [2]:
# This is a function to accept a search term and fetch the tweets with that term.
def fetchTestData(search_string):
    try:
        tweets_fetched = api.GetSearch(search_string, count=100)
        print("Fetched",str(len(tweets_fetched)),"tweets with the term",search_string)
        return [{'text':status.text,'label':None} for status in tweets_fetched]
    except:
        print("Failed to fetch tweets with the term",search_string)
        return None

In [3]:
search_string = input("Hello. Please enter a search term: ")
testData = fetchTestData(search_string)

Hello. Please enter a search term: apple
Fetched 99 tweets with the term apple


In [4]:
# First 10 tweets that were fetched.
for tweet in testData[0:9]:
    print(tweet['text'])

Next year, Netflix turns 20. Amazon turns 23. Apple turns 41. Facebook turns 13. Twitter turns 11. Google turns 19.
My NEW song #BeautifulDistraction is now available on iTunes Download it now!!! https://t.co/iZ8er1fM9H
[#MONSTA_X] #몬스타엑스 #2017골든디스크 #음반후보 투표참여안내! 
많은 참여 부탁드립니다💞
👉🏻Google play : https://t.co/YetbxH99iI
👉🏻Apple Store : https://t.co/YnZEPW3IHB
#RADIO #90s #juice Now Playing #Tom's Dinner #DNA - Suzanne Vega #APPS Apple https://t.co/gsMf7U2vzO Google https://t.co/idypaT7UqA
Apple releases iOS 10.2.1 beta 2 for developers - 9 to 5 Mac https://t.co/aCGwvS0gk6 #iOS
My body is just not fw food though. I need to go get some honey crisp apple slices from HEB
KEEP yourself UP in attitude, gratitude &amp; motivation!#pocketcheerleader  https://t.co/YJmsQdo5je https://t.co/077CJXusl7
Apple AirPods review: wireless that wows, earbuds that don't - The Verge https://t.co/x0QjbTNqOf #news https://t.co/33Rf1srNly
RT @AttackDetection: #Apple #macOS #SSL handshake #OCSP #MiTM and #DoS
CVE-

### Step 2: Classify these tweets as positive or negative.

In [9]:
# This requires downloading a corpus of tweet data. However, twitter only allows
# tweet ID's to be shared, and not the tweets themselves. The API can be used to
# cross-reference the corpus, but considering that Niek Sanders corpus contains
# 5000 tweets and twitter limits API pulls to 180/15 mins, it will take several 
# hours to complete the pull.

def createTrainingCorpus(corpusFile,tweetFile):
    import csv
    corpus=[]
    with open(corpusFile,'r') as csvfile:
        lineReader = csv.reader(csvfile,delimiter=',',quotechar='"')
        for row in lineReader:
            corpus.append({'tweet_id':row[2],'label':row[1],'topic':row[0]})
    # The tweet pull is self-throttled to work around twitter's rate limit.
    import time
    rate_limit=180
    sleep_time=900/180 # 15 minutes / limit
    trainingData=[]
    downloadCount = 1
    for tweet in corpus:
        try:
            status = api.GetStatus(tweet['tweet_id'])
            tweet['text'] = status.text
            print('fetched tweet',str(downloadCount),'of',str(len(corpus)))
            downloadCount += 1
            trainingData.append(tweet)
            time.sleep(sleep_time) # to avoid rate limit
        except:
            continue
    with open(tweetFile,'w') as csvfile:
        lineWriter = csv.writer(csvfile,delimiter=',',quotechar='"')
        for tweet in trainingData:
            # Ignore failures on line write so the trainingData doesn't get lost due to a single encoding error.
            try:
                lineWriter.writerow([tweet['tweet_id'],tweet['text'],tweet['label'],tweet['topic']])
            except:
                continue
    return trainingData

In [10]:
trainingData = createTrainingCorpus('./corpus.csv','./tweets.csv')

fetched tweet 1 of 5513
fetched tweet 2 of 5513
fetched tweet 3 of 5513
fetched tweet 4 of 5513
fetched tweet 5 of 5513
fetched tweet 6 of 5513
fetched tweet 7 of 5513
fetched tweet 8 of 5513
fetched tweet 9 of 5513
fetched tweet 10 of 5513
fetched tweet 11 of 5513
fetched tweet 12 of 5513
fetched tweet 13 of 5513
fetched tweet 14 of 5513
fetched tweet 15 of 5513
fetched tweet 16 of 5513
fetched tweet 17 of 5513
fetched tweet 18 of 5513
fetched tweet 19 of 5513
fetched tweet 20 of 5513
fetched tweet 21 of 5513
fetched tweet 22 of 5513
fetched tweet 23 of 5513
fetched tweet 24 of 5513
fetched tweet 25 of 5513
fetched tweet 26 of 5513
fetched tweet 27 of 5513
fetched tweet 28 of 5513
fetched tweet 29 of 5513
fetched tweet 30 of 5513
fetched tweet 31 of 5513
fetched tweet 32 of 5513
fetched tweet 33 of 5513
fetched tweet 34 of 5513
fetched tweet 35 of 5513
fetched tweet 36 of 5513
fetched tweet 37 of 5513
fetched tweet 38 of 5513
fetched tweet 39 of 5513
fetched tweet 40 of 5513
fetched t