In [37]:
from __future__ import absolute_import, division, print_function, unicode_literals

import os
import sys
import pickle

import numpy as np
import pandas as pd

from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import matplotlib.pyplot as plt
%matplotlib inline

### Loading preprocessed Data

In [31]:
with open(os.path.join('pickles', 'dataset.pickle'), 'rb') as handle:
    dataset = pickle.load(handle)

X_train = dataset['X_train']
y_train = dataset['y_train']
X_valid = dataset['X_valid']
y_valid = dataset['y_valid']
X_test = dataset['X_test']
y_test = dataset['y_test']

In [8]:
with open(os.path.join('pickles', 'encoder.pickle'), 'rb') as handle:
    encoder = pickle.load(handle)

## Building Model 

In [10]:
embedding_dim = 16

model = keras.Sequential([
    layers.Embedding(encoder.vocab_size, embedding_dim),
    layers.GlobalAveragePooling1D(),
    layers.Dense(1, activation='sigmoid')
])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 16)          130960    
_________________________________________________________________
global_average_pooling1d (Gl (None, 16)                0         
_________________________________________________________________
dense (Dense)                (None, 1)                 17        
Total params: 130,977
Trainable params: 130,977
Non-trainable params: 0
_________________________________________________________________


In [16]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

history = model.fit(
    X_train, y_train,
    epochs=10,
    batch_size=32,
    validation_data=(X_valid, y_valid),
    callbacks=[early_stop]
)

Train on 7000 samples, validate on 1000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [40]:
weights = model.layers[0]\
                .get_weights()[0]
vect_file = os.path.join('embeddings', 'vecs.tsv')
meta_file = os.path.join('embeddings', 'meta.tsv')

with open(vect_file, 'w') as out_vect, open(meta_file, 'w') as out_meta:
    for i, word in enumerate(encoder.subwords):
        vec = weights[num + 1]
        out_meta.write(word + '\n')
        out_vect.write('\t'.join([str(w) for w in vec]) + '\n')