In [0]:
import os
import json
import numpy as np
from random import randrange

Importing all the packages

In [0]:
def load_data(link):
  
  text = []
  sentiment = []

  for filename in os.listdir(link):
    if filename != '.ipynb_checkpoints':
      file_path = os.path.join(link, filename)
      with open(file_path, encoding = 'unicode_escape') as f:
        dic = json.load(f)

        for k, v in dic['text'].items():
          text.append(v)
        for k, v in dic['sentiment'].items():
          sentiment.append(v)
  return text, sentiment

In [0]:
(text,sentiment)=load_data('sample_data')

  **   Loading the data and splitting into train and text**

In [0]:
def preprocess_data(text, sentiment):
  # 80-20 split
  combo = [list(i) for i in zip(text, sentiment)]
  
  combo_test = []

  test_size = int(len(combo)*0.2)
  while len(combo_test) < test_size:
    index = randrange(len(combo))
    combo_test.append(combo.pop(index))
  
  x_train = []
  y_train = []
  x_test = []
  y_test = []
  # x, y split
  for line in combo:
    x_train.append(line[0])
    if line[1] in ['Positive', 'positive']:
      y_train.append(2)
    elif line[1] in ['Neutral', 'neutral']:
      y_train.append(1)
    elif line[1] in ['Negative', 'negative']:
      y_train.append(0)
    else:
      print('error in sentiment label {}'.format(line[1]))
  for line in combo_test:
    x_test.append(line[0])
    if line[1] in ['Positive', 'positive']:
      y_test.append(2)
    elif line[1] in ['Neutral', 'neutral']:
      y_test.append(1)
    elif line[1] in ['Negative', 'negative']:
      y_test.append(0)
    else:
      print('error in sentiment label {}'.format(line[1]))
      
  return (x_train, y_train), (x_test, y_test)

In [0]:
(x_train, y_train), (x_test, y_test) = preprocess_data(text, sentiment)

In [0]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import itertools
import os

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.preprocessing import LabelBinarizer, LabelEncoder
from sklearn.metrics import confusion_matrix

from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.preprocessing import text, sequence
from keras import utils

**Taking max number of words and tokenizing them**

In [0]:
max_words = 1000
tokenize = text.Tokenizer(num_words=max_words, char_level=False)

In [0]:
tokenize.fit_on_texts(x_train) # only fit on train
x_trains = tokenize.texts_to_matrix(x_train)
x_tests = tokenize.texts_to_matrix(x_test)

In [0]:
encoder = LabelEncoder()
encoder.fit(y_train)
y_trains = encoder.transform(y_train)
y_tests = encoder.transform(y_test)

In [0]:
num_classes = np.max(y_train) + 1
y_trains = utils.to_categorical(y_train, num_classes)
y_tests = utils.to_categorical(y_test, num_classes)

**Checking the size of train and test**

In [0]:
print('x_train shape:', x_trains.shape)
print('x_test shape:', x_tests.shape)
print('y_train shape:', y_trains.shape)
print('y_test shape:', y_tests.shape)

x_train shape: (69, 1000)
x_test shape: (17, 1000)
y_train shape: (69, 3)
y_test shape: (17, 3)


***Intializing batch size and epochs and running the model***

In [0]:
batch_size = 32
epochs = 2

model = Sequential()
model.add(Dense(512, input_shape=(max_words,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [0]:
history = model.fit(x_trains, y_trains,
                    batch_size=64,
                    epochs=10,
                    verbose=1,
                    validation_split=0.2)

Train on 55 samples, validate on 14 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [0]:
y_pred = model.predict(x_tests)

confusion_matrix(
    y_tests.argmax(axis=1), y_pred.argmax(axis=1))



array([[6, 0, 0],
       [1, 1, 0],
       [1, 0, 8]])