# Example: Ingest Data and Sentiments From A Twitter Feed into iguazio Stream & Time-Series DB 

## Initialization 
install packages and set environment

In [None]:
!pip install textblob
!pip install twython

In [None]:
# iguazio DB path & credentials 
%env V3IO_PASSWORD <V3IO-Password>
%env V3IO_USER <V3IO-Username>
%env V3IO_ADDRESS <address of V3IO API end point>

# Twitter credentials, fill with your App credentials
%env app_key=<..>
%env app_secret=<..>  
%env oauth_token=<..> 
%env oauth_token_secret=<..>

### Twitter stream handling class

In [36]:
from twython import TwythonStreamer

# Twitter stream handler  
class MyStreamer(TwythonStreamer):
    def __init__(self, name, **kw):
        self.name = name
        TwythonStreamer.__init__(self, **kw)
        
    def start(self, cb, limit=10, **kw):
        self.cb = cb
        self.limit = limit
        self.statuses.filter(**kw)
        
    def on_success(self, data):
        if 'text' in data:
            record = {'text': data['text'], 
                      'user': '@'+data['user']['screen_name'],
                      'id': data['id'],
                      'created_at':data['created_at'],
                     }
            if self.cb:
                self.cb(self.name, record)
                
        self.limit -= 1 
        if self.limit <= 0 :
            self.disconnect()

    def on_error(self, status_code, data):
        print(status_code)

        # Want to stop trying to get data because of the error?
        # Uncomment the next line!
        # self.disconnect()
        
  

### Our event handler (executes every time there is a valid tweet)

In [37]:
import json
import re

def process_event(name, record):
        clean = ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", record['text']).split())
        
        # enrich the record with natural language metadata
        blob = TextBlob(clean)
        record['cleaned'] = clean
        record['polarity'] = blob.sentiment.polarity
        record['subjectivity'] = blob.sentiment.subjectivity

        # Write the record into a straem and Time-series DB
        print(name, json.dumps(record))
        resp = v3.putrecords('mystream', [json.dumps(record)])
        print(resp.text)
        
        # Todo TSDB

### Main code section

In [38]:
import v3io
import os
oauth = credsfromfile()

oauth = {
    'app_key' : os.getenv('app_key'),
    'app_secret' : os.getenv('app_secret'),  
    'oauth_token' : os.getenv('oauth_token'), 
    'oauth_token_secret' : os.getenv('oauth_token_secret'),
}

stream = MyStreamer('GOOG', **oauth)
v3 = v3io.v3io(os.getenv('V3IO_ADDRESS'),os.getenv('V3IO_USER'),os.getenv('V3IO_PASSWORD'), 'bigdata')
stream.start(process_event, 2, track='@Google', lang='en')

{"text": "@Google @googlechrome can you explain why nothing I do turns safe search off? https://t.co/GtfEUZT6CD", "cleaned": "can you explain why nothing I do turns safe search off", "user": "@ashleigh_guynn", "id": 1050489695921233921, "created_at": "Thu Oct 11 20:53:55 +0000 2018", "polarity": 0.5, "subjectivity": 0.5}
{ "FailedRecordCount":0,"Records": [{ "SequenceNumber":27,"ShardId":0 } ] }
{"text": "RT @AMJones7: Wow! @Google's free Applied Digital Skills curriculum includes more than 120 hours of lessons to help teachers of any subject\u2026", "cleaned": "RT Wow s free Applied Digital Skills curriculum includes more than 120 hours of lessons to help teachers of any subject", "user": "@_bradfischer", "id": 1050489812883517440, "created_at": "Thu Oct 11 20:54:23 +0000 2018", "polarity": 0.16666666666666669, "subjectivity": 0.5266666666666666}
{ "FailedRecordCount":0,"Records": [{ "SequenceNumber":28,"ShardId":0 } ] }
