In [1]:
import json
import emoji
from datetime import datetime, timedelta
from timezonefinder import TimezoneFinder
import pytz
import re
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
from geopy.exc import GeocoderUnavailable
import ssl
import certifi
import geopy.geocoders
from time import sleep
from nltk import bigrams
import sys
import time 

ctx = ssl.create_default_context(cafile=certifi.where())
geopy.geocoders.options.default_ssl_context = ctx


class FilteredTweet:  # Class to store the clean Tweet data.
    def __init__(self, tweet_dict, list_sleep_words, list_sleep_bigrams, list_stress_words):
        self.index = None
        localtime = parseDate(tweet_dict)
        self.tweet_weekday = localtime.weekday()
        self.tweet_day = localtime.day
        self.tweet_hour = localtime.hour
        self.tweet_year = localtime.year
        self.orig_text = tweet_dict['text']
        self.filtered_text = parseTweetText(tweet_dict, list_sleep_words, list_sleep_bigrams, list_stress_words)

        self.user_ID = tweet_dict['user']['id']
        self.user_verified = tweet_dict['user']['verified']
        self.user_followers_count = tweet_dict['user']['followers_count']
        self.user_friends_count = tweet_dict['user']['friends_count']
        self.user_listed_count = tweet_dict['user']['listed_count']
        self.user_statuses_count = tweet_dict['user']['statuses_count']

        user_creation_time = datetime.strptime(tweet_dict['user']['created_at'], '%a %b %d %H:%M:%S +0000 %Y')
        self.user_creation_year = user_creation_time.year
        self.user_creation_month = user_creation_time.month
        self.user_creation_day = user_creation_time.day

        self.place_name = tweet_dict['place']['name']

        city_and_province = tweet_dict['place']['full_name'].split(",")
        if len(city_and_province) > 1:
            self.place_province = city_and_province[1].strip()
        else:
            self.place_province = None

        self.place_country = tweet_dict['place']['country']
        self.place_longitude = float(tweet_dict['place']['bounding_box']['coordinates'][0][0][0])
        self.place_latitude = float(tweet_dict['place']['bounding_box']['coordinates'][0][0][1])
        
        tweet_date = datetime.strptime(tweet_dict["created_at"], '%a %b %d %H:%M:%S +0000 %Y')
        elapsed_time = tweet_date - user_creation_time
        self.elapsed_time_in_days = elapsed_time/timedelta(minutes=1)/60/24

            
        self.label = None 

    def __str__(self):  # Represent the tweet as a dictionary.
        return str(self.__dict__)


    def setIndex(
            self, the_index):  # Method will set the index.  Called when new clean tweet is written.
        self.index = the_index


def filterCountry(tweet_dict, country):  # Filter tweet object based on country.  Takes arguments of a standard tweet
    # dictionary and country (as string).
    if 'place' in tweet_dict.keys():
        if tweet_dict['place'] is not None:
            if 'country' in tweet_dict['place'].keys():
                if tweet_dict['place']['country'] == country:
                    return True
                else:
                    return False
            else:
                return False
        else:
            return False
    else:
        return False


def filterTrackable(tweet_dict):  # Filter tweet based on whether it is trackable or not.
    if parseTimeZone(tweet_dict) is not None:
        return True
    else:
        return False


def filterLanguage(tweet_dict, language):  # Filter tweet object based on language.
    if 'lang' in tweet_dict.keys():
        if tweet_dict['lang'] is not None:
            if tweet_dict['lang'] == language:
                return True
            else:
                return False
        else:
            return False
    else:
        return False


def filterOriginal(tweet_dict):  # Method to filter out tweets that are quotes or re-tweets.
    if 'is_quote_status' in tweet_dict.keys():
        if tweet_dict['is_quote_status']:
            return False
    if tweet_dict['retweeted']:
        return False
    else:
        return True


def parseDate(tweet_dict):  # Method to parse the date/time of a tweet into the tweeter's local time.
    date_info = tweet_dict['created_at']
    orig_date = datetime.strptime(date_info, '%a %b %d %H:%M:%S +0000 %Y')
    fmt = '%a, %b %d %Y %H:%M:%S'
    new_date = datetime.strptime(datetime.strftime(orig_date, fmt), fmt)
    tz = parseTimeZone(tweet_dict)
    localized_time = new_date.astimezone(pytz.timezone(tz))
    offset = int(str(localized_time)[-6:-3])
    delta = timedelta(hours=offset)
    localized_time = new_date + delta
    return localized_time


def parseTimeZone(tweet_dict):  # Method to retrieve the timezone based on the user coordinates.
    tf = TimezoneFinder()
    if 'place' in tweet_dict.keys():
        if tweet_dict['place'] is not None:
            if 'bounding_box' in tweet_dict['place'].keys():
                if 'coordinates' in tweet_dict['place']['bounding_box'].keys():
                    if len(tweet_dict['place']['bounding_box']['coordinates']) > 0:
                        longitude = float(tweet_dict['place']['bounding_box']['coordinates'][0][0][0])
                        latitude = float(tweet_dict['place']['bounding_box']['coordinates'][0][0][1])
                        timezone = tf.timezone_at(lat=latitude, lng=longitude)
                        if timezone is None:
                            timezone = tf.closest_timezone_at(lat=latitude, lng=longitude)
                        return timezone
                    else:
                        return None
                else:
                    return None
            else:
                return None
        else:
            return None
    else:
        return None


def filterTweet(tweet_dict, language, country):  # Apply language, original, country, and trackable filters to a tweet.
    return (filterCountry(tweet_dict, country) and filterLanguage(tweet_dict, language)
            and filterOriginal(tweet_dict) and filterTrackable(tweet_dict))


def is_emoji(s):  # Method to check if a string is an emoji.
    return s in emoji.UNICODE_EMOJI


def add_space(text):  # Method to add a space between word-emoji pairs.
    result = ''
    for char in text:
        if is_emoji(char):
            char = 'emoji_'+ char + " "
            result += ' '
        result += char
    return result.strip()


def parseTweetText(tweetDict, list_sleep_words, list_sleep_bigrams, list_stress_words):  # Method to demojize tweet text, URLs, remove punctuation, and move to lowercase.
    tweet_text = tweetDict["text"]
    tweet_text = tweet_text + " ."
    tweet_text = tweet_text.replace('&amp;', " and ")
    tweet_text = tweet_text.replace('+', " ")
    tweet_text = tweet_text.replace('=', " ")
    tweet_text = tweet_text.replace('\n', " ")
    tweet_text = tweet_text.replace('@', " AT_")
    tweet_text = tweet_text.replace('#', " ")
    tweet_text = tweet_text.replace('-', " ")
    tweet_text = tweet_text.replace('\'', "")
    tweet_text = add_space(tweet_text)
    tweet_text = emoji.demojize(tweet_text)
    
    filtered_words_1 = []
    clean_words_1 = []
    
    for word in re.split(' +', tweet_text):
        if word.startswith('https'):
            continue
        else:
            word = word.lower()
            word = word.replace("\\", " and ")
            word = word.replace("\/", " and ")
            word = re.sub(r'[^a-z0-9\s_]', ' ', word)
            word = word.strip()
            clean_words_1.append(word)
    clean_sentence_1 = ' '.join(clean_words_1)
    clean_words_2 = re.split(' +', clean_sentence_1)
    for word in clean_words_2:
        while (word.startswith(" ") or word.endswith(" ")):
            word = word.strip()
        while (word.startswith("_") or word.endswith("_")):
            word = word.strip("_")
    
    if (checkSleepWords(clean_words_2, list_sleep_words, list_sleep_bigrams) or checkStressWords(clean_words_2, list_stress_words)):
        for word in re.split(' +', tweet_text):
            if word.startswith('https'):
                continue
            else:
                word = word.lower()
                word = word.replace("\\", " and ")
                word = word.replace("\/", " and ")
                word = re.sub(r'[^a-z0-9\s.,:;!?_]', '', word)
                word = word.strip()
                filtered_words_1.append(word)
        filtered_sentence_1 = ' '.join(filtered_words_1)
        filtered_words_2 = []
        filtered_sentence_2 = negation_sub(filtered_sentence_1)
        for word in re.split(' +', filtered_sentence_2):
            word = re.sub(r'[.,:;!?]', ' ', word)
            while (word.startswith(" ") or word.endswith(" ")):
                word = word.strip()
            while (word.startswith("_") or word.endswith("_")):
                word = word.strip("_")
            if (word != ""):
                filtered_words_2.append(word)
        last_filtered =  ' '.join(filtered_words_2)
        last_filtered = last_filtered.replace("emoji_ ", "emoji_")
        return last_filtered
    else:
        return None 


def negation_sub(text):
    transformed = re.sub(r'\b(?:not|no|never|aint|doesnt|havent|lacks|none|mightnt|shouldnt|'
                         r'cannot|dont|neither|nor|mustnt|wasnt|cant|hadnt|isnt|neednt|without|'
                         r'darent|hardly|lack|nothing|oughtnt|wouldnt|didnt|hasnt|lacking|nobody|'
                         r'nowhere|shant)\b[\w\s]+[.,:;!?]',
                         lambda match: re.sub(r'(\s+)(\w+)', r'\1NEG_\2', match.group(0)),
                         text,
                         flags=re.IGNORECASE)
    return transformed


def checkSleepWords(list_words_to_check, list_sleep_words,
                    list_sleep_bigrams):  # Method to check for existence of sleep keywords/bigrams in filtered text.
    # Takes a list of words to check, list of sleep words, and list of sleep bigrams to compare to.
    tweet_dict_bigrams = list(bigrams(list_words_to_check))
    for word in list_words_to_check:
        if word in list_sleep_words:
            return True

    for each_bigram in tweet_dict_bigrams:
        for sleep_bigram in list_sleep_bigrams:
            if sleep_bigram[0] == each_bigram[0] and sleep_bigram[1] == each_bigram[0]:
                return True
    return False


def checkStressWords(list_words_to_check,
                     list_stress_words):  # Method to check for existence of stress keywords in filtered text.
    # Takes a list of words to check and a list of stress words to compare to.
    for word in list_words_to_check:
        if word in list_stress_words:
            return True
    return False

list_sleep_words = ["bed", "sleep", "sack", "insomnia", "dodo", "zzz", "siesta", "tired", "nosleep",
                    "cantsleep", "rest", "asleep", "slept", "sleeping", "sleepy",
                    "ambien", "zolpidem", "lunesta", "intermezzo", "trazadone", "eszopiclone",
                    "zaleplon"]  # List of sleep words to check for.
list_sleep_bigrams = [["pass", "out"], ["get", "up"], ["wake", "up"],
                      ["power", "nap"]]  # List of sleep bigrams to compare tweet text to.
list_stress_words = ["heart", "control", "depression", "disease", "study", "studies", "life", "stressor", "body", "stress",
                     "anxiety", "health", "feel", "pain","social", "stressors", "pressure", "work", "risk", "stressful", "busy",
                     "depressed", "nervous", "university", "cancer", "marry", "wedding", "bride", "income", "salary", "rent", 
                     "hospital", "sick", "school", "holiday", "finals", "born", "baby", "life", "fired", "job", "lose", "cold",
                     "war", "quarrel", "argue", "question", "blame", "afraid", "baby", "pregnant", "mother-to-be", 
                     "revise", "habits", "smoke", "drink", "pass", "away", "RIP", "divorce", "ex-wife", "cry","surgery"]  # List of stress keywords to compare tweet text to.

class Logger(object):
    def __init__(self, file_name):
        self.terminal = sys.stdout
        self.log = open(file_name, "w")

    def write(self, message):
        self.terminal.write(message)
        self.log.write(message)  

    def flush(self):
        #this flush method is needed for python 3 compatibility.
        #this handles the flush command by doing nothing.
        #you might want to specify some extra behavior here.
        pass    

start_time = time.time()
   
    
sys.stdout = Logger("iterative_test_2.txt")
index_clean = 1
filename = "/home/cheunw/Group Project Folder/Project Raw Data Files/2.json"  # Name of .json dataset to pass.  Should have 1 tweet per line of file.
output_filename = "iterative_output_test_2.json"  # Name of clean .json file to write to.  Will have 1 clean tweet per line of file.

print("Beginning processing...")

with open(filename, 'r', encoding='utf8', errors='ignore') as f:  # Open input file
    with open(output_filename, 'w', encoding='utf8') as outputFile:  # Open output file
        for (i, line) in enumerate(f, 1):  # Iterate through each line in input file.
            tweet_dict = json.loads(line)  # Load the .json object into a dictionary.
            try:
                if filterTweet(tweet_dict, "en", "Canada"):  # Filter the tweet.
                    new_tweet = FilteredTweet(tweet_dict, list_sleep_words, list_sleep_bigrams, list_stress_words)  # Create a new filtered tweet.
                    if new_tweet.filtered_text is None:
                        continue
                    else:
                        new_tweet.setIndex(index_clean)  # Set the index.
                        new_tweet_json = json.dumps(
                            new_tweet.__dict__)  # Create a new json object from the clean tweet.
                        clean_tweet_json = json.dumps(new_tweet.__dict__, indent=4)
                        print(clean_tweet_json)
                        outputFile.write(new_tweet_json)  # Write the new json object to the output file.
                        outputFile.write('\n')  # Write a newline to separate tweets in output file.
                        print("Current tweets written from file:", index_clean, ", Total Tweets evaluated from file:", i)
                        index_clean += 1
            except:
                print("Encountered error in line", i, ": skipping this line.")
                continue


print("****************************************")
print("Total clean tweets written from file: ", index_clean-1)
print("Total tweets evaluated from file: ", i)
end_time = time.time()
print("Time of execution in seconds:", end_time - start_time)

Beginning processing...
{
    "index": 1,
    "tweet_weekday": 5,
    "tweet_day": 29,
    "tweet_hour": 21,
    "tweet_year": 2018,
    "orig_text": "@thechadrsmith Hahaha too much salt in your diet is bad for your health you big bitch lol",
    "filtered_text": "at_thechadrsmith hahaha too much salt in your diet is bad for your health you big bitch lol",
    "user_ID": 104688908,
    "user_verified": false,
    "user_followers_count": 284,
    "user_friends_count": 659,
    "user_listed_count": 3,
    "user_statuses_count": 1988,
    "user_creation_year": 2010,
    "user_creation_month": 1,
    "user_creation_day": 14,
    "place_name": "Peterborough",
    "place_province": "Ontario",
    "place_country": "Canada",
    "place_longitude": -78.387994,
    "place_latitude": 44.252035,
    "elapsed_time_in_days": 3271.9699652777776,
    "label": null
}
Current tweets written from file: 1 , Total Tweets evaluated from file: 1188
{
    "index": 2,
    "tweet_weekday": 5,
    "tweet_day": 2

{
    "index": 10,
    "tweet_weekday": 5,
    "tweet_day": 29,
    "tweet_hour": 18,
    "tweet_year": 2018,
    "orig_text": "@Brent_Ashby Negative Brent ... sadly this my last game live . Tsn for the rest . R u here ?",
    "filtered_text": "at_brent_ashby negative brent sadly this my last game live tsn for the rest r u here",
    "user_ID": 384606809,
    "user_verified": false,
    "user_followers_count": 280,
    "user_friends_count": 253,
    "user_listed_count": 3,
    "user_statuses_count": 888,
    "user_creation_year": 2011,
    "user_creation_month": 10,
    "user_creation_day": 4,
    "place_name": "Vancouver",
    "place_province": "British Columbia",
    "place_country": "Canada",
    "place_longitude": -123.224215,
    "place_latitude": 49.19854,
    "elapsed_time_in_days": 2644.0690509259257,
    "label": null
}
Current tweets written from file: 10 , Total Tweets evaluated from file: 12446
{
    "index": 11,
    "tweet_weekday": 5,
    "tweet_day": 29,
    "tweet_hour"

{
    "index": 20,
    "tweet_weekday": 5,
    "tweet_day": 29,
    "tweet_hour": 18,
    "tweet_year": 2018,
    "orig_text": "See our latest BC, Canada job and click to apply: Specialist I, Clinical Application - SCI - https://t.co/7DYkCWNe1n medicaldevices veterans",
    "filtered_text": "see our latest bc canada job and click to apply specialist i clinical application sci medicaldevices veterans",
    "user_ID": 256780228,
    "user_verified": false,
    "user_followers_count": 963,
    "user_friends_count": 82,
    "user_listed_count": 179,
    "user_statuses_count": 829,
    "user_creation_year": 2011,
    "user_creation_month": 2,
    "user_creation_day": 24,
    "place_name": "Bulkley-Nechako G",
    "place_province": "British Columbia",
    "place_country": "Canada",
    "place_longitude": -128.48587,
    "place_latitude": 53.660186,
    "elapsed_time_in_days": 2866.0058680555558,
    "label": null
}
Current tweets written from file: 20 , Total Tweets evaluated from file: 1904

{
    "index": 29,
    "tweet_weekday": 5,
    "tweet_day": 29,
    "tweet_hour": 21,
    "tweet_year": 2018,
    "orig_text": "I love Canada \ud83c\udde8\ud83c\udde6! Here at an Italian Restaurant celebrating the wedding of a colleague honouring both Sudanese/Cana\u2026 https://t.co/oetBE196Xt",
    "filtered_text": "i love canada canada here at an italian restaurant celebrating the wedding of a colleague honouring both sudanesecana",
    "user_ID": 2305475125,
    "user_verified": false,
    "user_followers_count": 1026,
    "user_friends_count": 251,
    "user_listed_count": 9,
    "user_statuses_count": 1408,
    "user_creation_year": 2014,
    "user_creation_month": 1,
    "user_creation_day": 22,
    "place_name": "Toronto",
    "place_province": "Ontario",
    "place_country": "Canada",
    "place_longitude": -79.639319,
    "place_latitude": 43.403221,
    "elapsed_time_in_days": 1802.2102662037034,
    "label": null
}
Current tweets written from file: 29 , Total Tweets evaluat

{
    "index": 38,
    "tweet_weekday": 5,
    "tweet_day": 29,
    "tweet_hour": 19,
    "tweet_year": 2018,
    "orig_text": "@MacLeodLisa Tired of the lies and bullshit, I have to wonder what your getting for all your lies and deceit?\nBecau\u2026 https://t.co/IJ1NeoeiCz",
    "filtered_text": "at_macleodlisa tired of the lies and bullshit i have to wonder what your getting for all your lies and deceit becau",
    "user_ID": 162600984,
    "user_verified": false,
    "user_followers_count": 1657,
    "user_friends_count": 2766,
    "user_listed_count": 75,
    "user_statuses_count": 98452,
    "user_creation_year": 2010,
    "user_creation_month": 7,
    "user_creation_day": 4,
    "place_name": "Sylvan Lake",
    "place_province": "Alberta",
    "place_country": "Canada",
    "place_longitude": -114.121165,
    "place_latitude": 52.282105,
    "elapsed_time_in_days": 3100.9095833333336,
    "label": null
}
Current tweets written from file: 38 , Total Tweets evaluated from file: 312

{
    "index": 48,
    "tweet_weekday": 5,
    "tweet_day": 29,
    "tweet_hour": 20,
    "tweet_year": 2018,
    "orig_text": "@abprogressive @trishkatree @roy_maclellan @Cdn4LFC @jkenney @AndrewScheer Hahaha \u201c a bible school dropout\u201dOMG  the shame!!!!!",
    "filtered_text": "at_abprogressive at_trishkatree at_roy_maclellan at_cdn4lfc at_jkenney at_andrewscheer hahaha a bible school dropoutomg the shame",
    "user_ID": 831691644667781100,
    "user_verified": false,
    "user_followers_count": 305,
    "user_friends_count": 204,
    "user_listed_count": 2,
    "user_statuses_count": 17028,
    "user_creation_year": 2017,
    "user_creation_month": 2,
    "user_creation_day": 15,
    "place_name": "Hazlet",
    "place_province": "Saskatchewan",
    "place_country": "Canada",
    "place_longitude": -108.599102,
    "place_latitude": 50.396187,
    "elapsed_time_in_days": 682.9932175925925,
    "label": null
}
Current tweets written from file: 48 , Total Tweets evaluated from

{
    "index": 57,
    "tweet_weekday": 5,
    "tweet_day": 29,
    "tweet_hour": 21,
    "tweet_year": 2018,
    "orig_text": "@andylassner @CarrlynBathe @TheEllenShow I feel your pain Andy. The Leafs are losing miserably too.  Sincerely Depressed Denise",
    "filtered_text": "at_andylassner at_carrlynbathe at_theellenshow i feel your pain andy the leafs are losing miserably too sincerely depressed denise",
    "user_ID": 27780929,
    "user_verified": false,
    "user_followers_count": 34,
    "user_friends_count": 189,
    "user_listed_count": 2,
    "user_statuses_count": 121,
    "user_creation_year": 2009,
    "user_creation_month": 3,
    "user_creation_day": 31,
    "place_name": "Grimsby",
    "place_province": "Ontario",
    "place_country": "Canada",
    "place_longitude": -79.649751,
    "place_latitude": 43.132582,
    "elapsed_time_in_days": 3561.0366435185188,
    "label": null
}
Current tweets written from file: 57 , Total Tweets evaluated from file: 49265
{
    "index

{
    "index": 66,
    "tweet_weekday": 5,
    "tweet_day": 29,
    "tweet_hour": 21,
    "tweet_year": 2018,
    "orig_text": "My life feels a lot like bird box, making decisions blind folded in a sense \ud83d\udcaf",
    "filtered_text": "my life feels a lot like bird box making decisions blind folded in a sense emoji_hundred_points",
    "user_ID": 475163119,
    "user_verified": false,
    "user_followers_count": 215,
    "user_friends_count": 217,
    "user_listed_count": 1,
    "user_statuses_count": 20965,
    "user_creation_year": 2012,
    "user_creation_month": 1,
    "user_creation_day": 26,
    "place_name": "Lambton Shores",
    "place_province": "Ontario",
    "place_country": "Canada",
    "place_longitude": -82.339616,
    "place_latitude": 43.066005,
    "elapsed_time_in_days": 2529.299236111111,
    "label": null
}
Current tweets written from file: 66 , Total Tweets evaluated from file: 66331
{
    "index": 67,
    "tweet_weekday": 5,
    "tweet_day": 29,
    "tweet_h

{
    "index": 75,
    "tweet_weekday": 5,
    "tweet_day": 29,
    "tweet_hour": 19,
    "tweet_year": 2018,
    "orig_text": "@espenstrand @JMarshyBosco93 Gut feel",
    "filtered_text": "at_espenstrand at_jmarshybosco93 gut feel",
    "user_ID": 455140103,
    "user_verified": false,
    "user_followers_count": 322,
    "user_friends_count": 1091,
    "user_listed_count": 5,
    "user_statuses_count": 4412,
    "user_creation_year": 2012,
    "user_creation_month": 1,
    "user_creation_day": 4,
    "place_name": "Camrose County",
    "place_province": "Alberta",
    "place_country": "Canada",
    "place_longitude": -113.186658,
    "place_latitude": 52.489974,
    "elapsed_time_in_days": 2551.2824074074074,
    "label": null
}
Current tweets written from file: 75 , Total Tweets evaluated from file: 73794
{
    "index": 76,
    "tweet_weekday": 5,
    "tweet_day": 29,
    "tweet_hour": 18,
    "tweet_year": 2018,
    "orig_text": "@thramp @rustlang The pain is still real. Specializa

{
    "index": 86,
    "tweet_weekday": 5,
    "tweet_day": 29,
    "tweet_hour": 19,
    "tweet_year": 2018,
    "orig_text": "Fort McMurray Apartments for rent on https://t.co/rc0bRnWlUZ https://t.co/ecrstFNacd",
    "filtered_text": "fort mcmurray apartments for rent on",
    "user_ID": 31169027,
    "user_verified": false,
    "user_followers_count": 345,
    "user_friends_count": 0,
    "user_listed_count": 33,
    "user_statuses_count": 189569,
    "user_creation_year": 2009,
    "user_creation_month": 4,
    "user_creation_day": 14,
    "place_name": "Wood Buffalo",
    "place_province": "Alberta",
    "place_country": "Canada",
    "place_longitude": -114.001159,
    "place_latitude": 55.329481,
    "elapsed_time_in_days": 3546.3902893518516,
    "label": null
}
Current tweets written from file: 86 , Total Tweets evaluated from file: 76651
{
    "index": 87,
    "tweet_weekday": 5,
    "tweet_day": 29,
    "tweet_hour": 19,
    "tweet_year": 2018,
    "orig_text": "Fort Saskatc

{
    "index": 97,
    "tweet_weekday": 5,
    "tweet_day": 29,
    "tweet_hour": 20,
    "tweet_year": 2018,
    "orig_text": "Winnipeg Apartments for rent on https://t.co/rc0bRnWlUZ https://t.co/abuBqzPKQB",
    "filtered_text": "winnipeg apartments for rent on",
    "user_ID": 31169027,
    "user_verified": false,
    "user_followers_count": 345,
    "user_friends_count": 0,
    "user_listed_count": 33,
    "user_statuses_count": 189584,
    "user_creation_year": 2009,
    "user_creation_month": 4,
    "user_creation_day": 14,
    "place_name": "Winnipeg",
    "place_province": "Manitoba",
    "place_country": "Canada",
    "place_longitude": -97.350366,
    "place_latitude": 49.713679,
    "elapsed_time_in_days": 3546.390324074074,
    "label": null
}
Current tweets written from file: 97 , Total Tweets evaluated from file: 76782
{
    "index": 98,
    "tweet_weekday": 5,
    "tweet_day": 29,
    "tweet_hour": 22,
    "tweet_year": 2018,
    "orig_text": "Shediac Apartments for rent

{
    "index": 107,
    "tweet_weekday": 5,
    "tweet_day": 29,
    "tweet_hour": 21,
    "tweet_year": 2018,
    "orig_text": "North York Apartments for rent on https://t.co/rc0bRnWlUZ https://t.co/2FQez3NfcC",
    "filtered_text": "north york apartments for rent on",
    "user_ID": 31169027,
    "user_verified": false,
    "user_followers_count": 345,
    "user_friends_count": 0,
    "user_listed_count": 33,
    "user_statuses_count": 189599,
    "user_creation_year": 2009,
    "user_creation_month": 4,
    "user_creation_day": 14,
    "place_name": "Toronto",
    "place_province": "Ontario",
    "place_country": "Canada",
    "place_longitude": -79.639319,
    "place_latitude": 43.403221,
    "elapsed_time_in_days": 3546.3903472222223,
    "label": null
}
Current tweets written from file: 107 , Total Tweets evaluated from file: 76894
{
    "index": 108,
    "tweet_weekday": 5,
    "tweet_day": 29,
    "tweet_hour": 21,
    "tweet_year": 2018,
    "orig_text": "Ottawa Houses for ren

{
    "index": 117,
    "tweet_weekday": 5,
    "tweet_day": 29,
    "tweet_hour": 18,
    "tweet_year": 2018,
    "orig_text": "Five minutes left in the second period and the Canadian party goes on! @5twisty5 got the pass close to the net and\u2026 https://t.co/Nd9z5aKwR6",
    "filtered_text": "five minutes left in the second period and the canadian party goes on at_5twisty5 got the pass close to the net and",
    "user_ID": 34895713,
    "user_verified": true,
    "user_followers_count": 144520,
    "user_friends_count": 383,
    "user_listed_count": 1677,
    "user_statuses_count": 21342,
    "user_creation_year": 2009,
    "user_creation_month": 4,
    "user_creation_day": 24,
    "place_name": "Vancouver",
    "place_province": "British Columbia",
    "place_country": "Canada",
    "place_longitude": -123.224215,
    "place_latitude": 49.19854,
    "elapsed_time_in_days": 3536.661145833333,
    "label": null
}
Current tweets written from file: 117 , Total Tweets evaluated from fi

KeyboardInterrupt: 