# Deep learning
This notebook is responsible for implementing a recurrent neural network using TensorFlow.

## Database credentials

In [2]:
db_user = ""
db_pass = ""
db_name = ""
db_host = "localhost"
with open("database_credentials.txt") as f:
    db_user = f.readline().strip()
    db_pass = f.readline().strip()
    db_name = f.readline().strip()

## Dataframe-ize tweets

In [3]:
import pymysql as pms
import numpy as np
import pandas as pd

In [4]:
try:
    con = pms.connect(host=db_host, user=db_user, passwd=db_pass, db=db_name)
    df = pd.read_sql("""SELECT * FROM search_tweets""", con)
finally:
    if con:
        con.close()
df.head()

Unnamed: 0,id,message
0,1,Great meeting with @Cabinet at the @WhiteHouse...
1,2,Looking forward to 3:30 P.M. meeting today at ...
2,3,"Lowest rated Oscars in HISTORY. Problem is, we..."
3,4,"JOBS, JOBS, JOBS! #MAGA"
4,5,The U.S. is acting swiftly on Intellectual Pro...


## Data exploration

In [5]:
#All tweets linearly joined together in a list
char_list = " ".join(list(df["message"]))

print("Unique space-separated character orderings: {}".format(len({
    word: None for word in char_list.split(" ")})))
print("Tweets: {}".format(df.shape[0]))

print("Average sentences per tweet: {}".format(
    (char_list.count(".") + char_list.count("?") + char_list.count("!")) / float(df.shape[0])))

Unique space-separated character orderings: 12837
Tweets: 2889
Average sentences per tweet: 2.7608168916580134


## Data preprocessing
### Lookup table
In order to create a word embedding, the words used in the tweets need to be transformed to IDs. The 2 way mapping from words->IDs and IDs->words is generated below.

In [6]:
from string import punctuation
from collections import Counter

In [7]:
sample_text = "here's some sample text. hopefully this\nworks? ok - time to give it a shot!!"
def get_lookup_tables(text):
    """
    Gets the lookup tables mapping character orderings to their IDs and vice-versa.
    :param text: Text to create lookup tables from
    :return: A tuple of mapes (vocab_to_int, int_to_vocab)
    """
    #If passed in text as big string, words separated by spaces
    if type(text) == str:
        #text = text.translate(None, punctuation).split()
        text = text.split()
    #If passed in text as list (same as string representation but separated by indices)
    elif type(text) == list:
        #Handle later
        None
        
    #Create mappings
    words = [k for (k,v) in Counter(text).items()]
    vocab_to_int = {}
    int_to_vocab = {}
    for i in range(len(words)):
        vocab_to_int[words[i]] = i
        int_to_vocab[i] = words[i]
    return (vocab_to_int, int_to_vocab)
#get_lookup_tables(sample_text)

### Punctuation tokenizing
Spaces split the tweets up word by words. However, punctuations make it difficult for neural networks to distinguish between "dream" and "dream!". The requring tokenization mechanism to map characters to their IDs is performed below.

With this mapping mechanism, the dictionary will be used to toeknize the symbols and add a space around the character, making the character it's own word. When punctuations act as their own word, the neural network can more easily incorporate them into it's produced language.

In [13]:
#Consider adding possessive/abbreviation for punctuation ... "'"
rnn_punctuation = [".", ",", "\"", ";", "!", "?", "(", ")", "-", "\n", "|"]
rnn_punctuation_words = list(map(lambda s : "~" + s.upper() + "~", ["period", "comma", "quotation", "semicolon", "exclamation",
                         "question", "leftparen", "rightparen", "hyphen", "newline", "pipe"]))

rnn_punctuation_map = {rnn_punctuation[i]: rnn_punctuation_words[i] for i in range(len(rnn_punctuation))}
rnn_punctuation_map

{'\n': '~NEWLINE~',
 '!': '~EXCLAMATION~',
 '"': '~QUOTATION~',
 '(': '~LEFTPAREN~',
 ')': '~RIGHTPAREN~',
 ',': '~COMMA~',
 '-': '~HYPHEN~',
 '.': '~PERIOD~',
 ';': '~SEMICOLON~',
 '?': '~QUESTION~',
 '|': '~PIPE~'}

## Checking TensorFlow

In [16]:
from distutils.version import LooseVersion
import warnings
import tensorflow as tf

ImportError: Traceback (most recent call last):
  File "C:\Users\unknown\AppData\Local\conda\conda\envs\fake_realdonaldtrump\lib\site-packages\tensorflow\python\pywrap_tensorflow_internal.py", line 18, in swig_import_helper
    return importlib.import_module(mname)
  File "C:\Users\unknown\AppData\Local\conda\conda\envs\fake_realdonaldtrump\lib\importlib\__init__.py", line 126, in import_module
    return _bootstrap._gcd_import(name[level:], package, level)
  File "<frozen importlib._bootstrap>", line 985, in _gcd_import
  File "<frozen importlib._bootstrap>", line 968, in _find_and_load
  File "<frozen importlib._bootstrap>", line 957, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 666, in _load_unlocked
  File "<frozen importlib._bootstrap>", line 577, in module_from_spec
  File "<frozen importlib._bootstrap_external>", line 938, in create_module
  File "<frozen importlib._bootstrap>", line 222, in _call_with_frames_removed
ImportError: DLL load failed: The specified module could not be found.

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\unknown\AppData\Local\conda\conda\envs\fake_realdonaldtrump\lib\site-packages\tensorflow\python\pywrap_tensorflow.py", line 41, in <module>
    from tensorflow.python.pywrap_tensorflow_internal import *
  File "C:\Users\unknown\AppData\Local\conda\conda\envs\fake_realdonaldtrump\lib\site-packages\tensorflow\python\pywrap_tensorflow_internal.py", line 21, in <module>
    _pywrap_tensorflow_internal = swig_import_helper()
  File "C:\Users\unknown\AppData\Local\conda\conda\envs\fake_realdonaldtrump\lib\site-packages\tensorflow\python\pywrap_tensorflow_internal.py", line 20, in swig_import_helper
    return importlib.import_module('_pywrap_tensorflow_internal')
  File "C:\Users\unknown\AppData\Local\conda\conda\envs\fake_realdonaldtrump\lib\importlib\__init__.py", line 126, in import_module
    return _bootstrap._gcd_import(name[level:], package, level)
ImportError: No module named '_pywrap_tensorflow_internal'


Failed to load the native TensorFlow runtime.

See https://www.tensorflow.org/install/install_sources#common_installation_problems

for some common reasons and solutions.  Include the entire stack trace
above this error message when asking for help.