In [None]:
!wget https://www.dropbox.com/s/pdhwlpi2yeie0ol/movie-reviews-dataset.zip

In [2]:
!unzip -q "/content/movie-reviews-dataset.zip"

In [1]:
from tensorflow.keras.preprocessing import text_dataset_from_directory
from tensorflow.strings import regex_replace
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
from tensorflow.keras.models import Sequential
from tensorflow.keras import Input
from tensorflow.keras.layers import Dense, RNN, SimpleRNNCell, Embedding, Dropout

In [2]:
def prepareData(dir):
  data = text_dataset_from_directory(dir)
  return data.map(
    lambda text, label: (regex_replace(text, '<br />', ' '), label),
  )

In [3]:
train_data = prepareData('movie-reviews-dataset/train')
test_data = prepareData('movie-reviews-dataset/test')

for text_batch, label_batch in train_data.take(1):
  print(text_batch.numpy()[0])
  print(label_batch.numpy()[0])

Found 25000 files belonging to 2 classes.
Found 25000 files belonging to 2 classes.
b"(spoilers)Wow, this is a bad one. I did a double take when watching an old Star Trek episode the other day-it was the one where everyone gets infected with that space sickness and then go a bit nuts-and there was Stewart Moss, a.k.a the unlikable 'hero' of It Lives by Night! He played the first crewmember infected, who dies from terminal depression. All I could think was that he'd watched his own movie too many times, that's what caused the depression. This movie is full of truly unlikable people. There is no redeeming character in the film, not one. It's very hard to feel bad about Dr. Beck's turning into a bat(or whatever he actually turned into), because you just don't like him. And you don't like his shrill, bony wife, or the nasty sleazy Sgt. Ward, or Dr. Mustache Love...So why would you invest any time or energy in this movie? Where there is no empathy with the characters, there is no reason to 

In [4]:
model = Sequential()

In [5]:
model.add(Input(shape=(1,), dtype="string"))

In [6]:
max_tokens = 1000
max_len = 100
vectorize_layer = TextVectorization(
  max_tokens=max_tokens,
  output_mode="int",
  output_sequence_length=max_len,
)

In [7]:
train_texts = train_data.map(lambda text, label: text)
vectorize_layer.adapt(train_texts)

model.add(vectorize_layer)

In [8]:
model.add(Embedding(max_tokens + 1, 128))

rnn = RNN(SimpleRNNCell(64) , return_sequences=False,return_state=False)
model.add(rnn)
model.add(Dense(64, activation="relu"))
model.add(Dense(1, activation="sigmoid"))

In [9]:
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [10]:
model.fit(train_data, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7ff418d92b70>

In [11]:
model.evaluate(test_data)



[0.7133914828300476, 0.5574399828910828]

In [12]:
text = "I loved the movie !"

In [13]:
model.predict([text])

array([[0.58057064]], dtype=float32)