In [1]:
import preprocessing

from keras.models import Sequential
from keras.layers.core import Dense
from keras.layers import Flatten
from keras.layers.embeddings import Embedding
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

In [2]:
X_balanced, y_balanced, embeddings_matrix = preprocessing.nn_preprocess(20000)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X_balanced, y_balanced, test_size=0.20, random_state=42)

In [4]:
model = Sequential()

embedding_layer = Embedding(
    embeddings_matrix.shape[0],
    embeddings_matrix.shape[1],
    weights=[embeddings_matrix],
    input_length=max([len(sent) for sent in X_balanced]),
    trainable=False
)
model.add(embedding_layer)

model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='relu'))

model.compile(optimizer='adam', loss='mean_squared_error')

print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 1363, 300)         18079500  
                                                                 
 flatten (Flatten)           (None, 408900)            0         
                                                                 
 dense (Dense)               (None, 32)                13084832  
                                                                 
 dense_1 (Dense)             (None, 32)                1056      
                                                                 
 dense_2 (Dense)             (None, 1)                 33        
                                                                 
Total params: 31,165,421
Trainable params: 13,085,921
Non-trainable params: 18,079,500
_________________________________________________________________
None


In [5]:
BATCH_SIZE = 1024
EPOCHS = 8

history = model.fit(X_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=1, validation_split=0.2)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


In [6]:
predictions = model.predict(X_test)
mse = mean_squared_error(y_test, predictions)
print("MSE", mse)

MSE 1.9492503389219396
