In [1]:
import numpy as np
import os
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from collections import Counter
nltk.download('wordnet')
nltk.download('punkt')
from nltk.stem import WordNetLemmatizer
import random
import logging
logging.getLogger().setLevel('INFO')

lemmatizer = WordNetLemmatizer()

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/teemuraitaluoto/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/teemuraitaluoto/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Activation

from tensorflow.keras.callbacks import TensorBoard

In [3]:
DIR = '/users/teemuraitaluoto/Downloads/tweet-data'

In [4]:
training_set = pd.read_csv(os.path.join(DIR, 'train.csv'))

In [5]:
training_set.head()

Unnamed: 0,id,keyword,location,text,target
0,1,,,Our Deeds are the Reason of this #earthquake M...,1
1,4,,,Forest fire near La Ronge Sask. Canada,1
2,5,,,All residents asked to 'shelter in place' are ...,1
3,6,,,"13,000 people receive #wildfires evacuation or...",1
4,7,,,Just got sent this photo from Ruby #Alaska as ...,1


In [6]:
def create_lexicon():
    lexicon = []
    all_words = training_set['text'].values
    for word in all_words:
        word = word.lower()
        tokenized_word = word_tokenize(word)
        for w in tokenized_word:
            lemmatized_word = lemmatizer.lemmatize(w)
            lexicon.append(lemmatized_word)
    return lexicon

lexicon = create_lexicon()
print(len(lexicon))

143758


In [7]:
word_counts = Counter(lexicon)

In [8]:
new_lexicon = []
for w in word_counts:
    if 1000 > word_counts[w] > 4:
        new_lexicon.append(w)

In [9]:
print(len(new_lexicon))

2723


In [15]:
def sample_handling(training_set,lexicon):

    featureset = []

    for word_index, row in enumerate(training_set.iterrows()):
        row = row[1]
        current_words = word_tokenize(row['text'].lower())
        current_words = [lemmatizer.lemmatize(i) for i in current_words]
        features = np.zeros(len(lexicon))
        for word in current_words:
            if word.lower() in lexicon:
                index_value = lexicon.index(word.lower())
                features[index_value] += 1

        #classification = np.zeros(2)
        #classification[row['target']] += 1
        
        
        
        features = list(features)
        featureset.append([features, row['target']])
        
    
        if word_index % 500 == 0:
            logging.info(word_index)
    
    return featureset

In [16]:
def create_feature_sets_and_labels(test_size = 0.1):
    features = []
    
    features += sample_handling(training_set,new_lexicon)
    logging.info('Shuffling')
    random.shuffle(features)
    features = np.array(features)

    testing_size = int(test_size*len(features))

    train_x = list(features[:,0][:-testing_size])
    train_y = list(features[:,1][:-testing_size])
    test_x = list(features[:,0][-testing_size:])
    test_y = list(features[:,1][-testing_size:])
    
    train_x = np.array(train_x)
    train_y = np.array(train_y)
    test_x = np.array(test_x)
    test_y = np.array(test_y)
    
    return train_x, train_y, test_x, test_y

train_x, train_y, test_x, test_y = create_feature_sets_and_labels()

INFO:root:0
INFO:root:500
INFO:root:1000
INFO:root:1500
INFO:root:2000
INFO:root:2500
INFO:root:3000
INFO:root:3500
INFO:root:4000
INFO:root:4500
INFO:root:5000
INFO:root:5500
INFO:root:6000
INFO:root:6500
INFO:root:7000
INFO:root:7500
INFO:root:Shuffling


In [20]:
dense_layers = [0, 1, 2]
layer_sizes = [32, 64, 128]
conv_layers = [1, 2, 3]

for dense_layer in dense_layers:
    for layer_size in layer_sizes:
        for conv_layer in conv_layers:
            NAME = f'{conv_layer}-conv-{layer_size}-nodes-{dense_layer}-dense'
            logging.info(NAME)
            tensorboard = TensorBoard(log_dir=f'{DIR}/logs/{NAME}')
            model = Sequential()

            model.add(Activation('relu'))
            
            for l in range(conv_layer - 1):

                model.add(Activation('relu'))

            model.add(Flatten())
            for l in range(dense_layer):
                model.add(Dense(layer_size))
                model.add(Activation('relu'))

            model.add(Dense(1))
            model.add(Activation('sigmoid'))

            model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

            #y = np.asarray(train_y)
            
            model.fit(train_x, train_y, batch_size=32, validation_split=0.1, epochs=10, callbacks=[tensorboard])
            
            model.save(f'{NAME}.model')

INFO:root:1-conv-32-nodes-0-dense


Train on 6166 samples, validate on 686 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: 1-conv-32-nodes-0-dense.model/assets


INFO:tensorflow:Assets written to: 1-conv-32-nodes-0-dense.model/assets
INFO:root:2-conv-32-nodes-0-dense


Train on 6166 samples, validate on 686 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: 2-conv-32-nodes-0-dense.model/assets


INFO:tensorflow:Assets written to: 2-conv-32-nodes-0-dense.model/assets
INFO:root:3-conv-32-nodes-0-dense


Train on 6166 samples, validate on 686 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: 3-conv-32-nodes-0-dense.model/assets


INFO:tensorflow:Assets written to: 3-conv-32-nodes-0-dense.model/assets
INFO:root:1-conv-64-nodes-0-dense


Train on 6166 samples, validate on 686 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: 1-conv-64-nodes-0-dense.model/assets


INFO:tensorflow:Assets written to: 1-conv-64-nodes-0-dense.model/assets
INFO:root:2-conv-64-nodes-0-dense


Train on 6166 samples, validate on 686 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: 2-conv-64-nodes-0-dense.model/assets


INFO:tensorflow:Assets written to: 2-conv-64-nodes-0-dense.model/assets
INFO:root:3-conv-64-nodes-0-dense


Train on 6166 samples, validate on 686 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: 3-conv-64-nodes-0-dense.model/assets


INFO:tensorflow:Assets written to: 3-conv-64-nodes-0-dense.model/assets
INFO:root:1-conv-128-nodes-0-dense


Train on 6166 samples, validate on 686 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: 1-conv-128-nodes-0-dense.model/assets


INFO:tensorflow:Assets written to: 1-conv-128-nodes-0-dense.model/assets
INFO:root:2-conv-128-nodes-0-dense


Train on 6166 samples, validate on 686 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: 2-conv-128-nodes-0-dense.model/assets


INFO:tensorflow:Assets written to: 2-conv-128-nodes-0-dense.model/assets
INFO:root:3-conv-128-nodes-0-dense


Train on 6166 samples, validate on 686 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: 3-conv-128-nodes-0-dense.model/assets


INFO:tensorflow:Assets written to: 3-conv-128-nodes-0-dense.model/assets
INFO:root:1-conv-32-nodes-1-dense


Train on 6166 samples, validate on 686 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: 1-conv-32-nodes-1-dense.model/assets


INFO:tensorflow:Assets written to: 1-conv-32-nodes-1-dense.model/assets
INFO:root:2-conv-32-nodes-1-dense


Train on 6166 samples, validate on 686 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: 2-conv-32-nodes-1-dense.model/assets


INFO:tensorflow:Assets written to: 2-conv-32-nodes-1-dense.model/assets
INFO:root:3-conv-32-nodes-1-dense


Train on 6166 samples, validate on 686 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: 3-conv-32-nodes-1-dense.model/assets


INFO:tensorflow:Assets written to: 3-conv-32-nodes-1-dense.model/assets
INFO:root:1-conv-64-nodes-1-dense


Train on 6166 samples, validate on 686 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: 1-conv-64-nodes-1-dense.model/assets


INFO:tensorflow:Assets written to: 1-conv-64-nodes-1-dense.model/assets
INFO:root:2-conv-64-nodes-1-dense


Train on 6166 samples, validate on 686 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: 2-conv-64-nodes-1-dense.model/assets


INFO:tensorflow:Assets written to: 2-conv-64-nodes-1-dense.model/assets
INFO:root:3-conv-64-nodes-1-dense


Train on 6166 samples, validate on 686 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: 3-conv-64-nodes-1-dense.model/assets


INFO:tensorflow:Assets written to: 3-conv-64-nodes-1-dense.model/assets
INFO:root:1-conv-128-nodes-1-dense


Train on 6166 samples, validate on 686 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: 1-conv-128-nodes-1-dense.model/assets


INFO:tensorflow:Assets written to: 1-conv-128-nodes-1-dense.model/assets
INFO:root:2-conv-128-nodes-1-dense


Train on 6166 samples, validate on 686 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: 2-conv-128-nodes-1-dense.model/assets


INFO:tensorflow:Assets written to: 2-conv-128-nodes-1-dense.model/assets
INFO:root:3-conv-128-nodes-1-dense


Train on 6166 samples, validate on 686 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: 3-conv-128-nodes-1-dense.model/assets


INFO:tensorflow:Assets written to: 3-conv-128-nodes-1-dense.model/assets
INFO:root:1-conv-32-nodes-2-dense


Train on 6166 samples, validate on 686 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: 1-conv-32-nodes-2-dense.model/assets


INFO:tensorflow:Assets written to: 1-conv-32-nodes-2-dense.model/assets
INFO:root:2-conv-32-nodes-2-dense


Train on 6166 samples, validate on 686 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: 2-conv-32-nodes-2-dense.model/assets


INFO:tensorflow:Assets written to: 2-conv-32-nodes-2-dense.model/assets
INFO:root:3-conv-32-nodes-2-dense


Train on 6166 samples, validate on 686 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: 3-conv-32-nodes-2-dense.model/assets


INFO:tensorflow:Assets written to: 3-conv-32-nodes-2-dense.model/assets
INFO:root:1-conv-64-nodes-2-dense


Train on 6166 samples, validate on 686 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: 1-conv-64-nodes-2-dense.model/assets


INFO:tensorflow:Assets written to: 1-conv-64-nodes-2-dense.model/assets
INFO:root:2-conv-64-nodes-2-dense


Train on 6166 samples, validate on 686 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: 2-conv-64-nodes-2-dense.model/assets


INFO:tensorflow:Assets written to: 2-conv-64-nodes-2-dense.model/assets
INFO:root:3-conv-64-nodes-2-dense


Train on 6166 samples, validate on 686 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: 3-conv-64-nodes-2-dense.model/assets


INFO:tensorflow:Assets written to: 3-conv-64-nodes-2-dense.model/assets
INFO:root:1-conv-128-nodes-2-dense


Train on 6166 samples, validate on 686 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: 1-conv-128-nodes-2-dense.model/assets


INFO:tensorflow:Assets written to: 1-conv-128-nodes-2-dense.model/assets
INFO:root:2-conv-128-nodes-2-dense


Train on 6166 samples, validate on 686 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: 2-conv-128-nodes-2-dense.model/assets


INFO:tensorflow:Assets written to: 2-conv-128-nodes-2-dense.model/assets
INFO:root:3-conv-128-nodes-2-dense


Train on 6166 samples, validate on 686 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: 3-conv-128-nodes-2-dense.model/assets


INFO:tensorflow:Assets written to: 3-conv-128-nodes-2-dense.model/assets
