<a href="https://colab.research.google.com/github/yeee457984/RNN-Reuters_contentclassifier/blob/main/Textual_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os

if not os.path.isdir("HappyML"):
  os.system("git clone https://github.com/cnchi/HappyML.git")

In [None]:
from tensorflow.keras.datasets import reuters

most_freq_words = 10000
(X_train, Y_train), (X_test, Y_test) = reuters.load_data(num_words=most_freq_words)

print("X_train[0]:", X_train[0])
print("Y_train[0]:", Y_train[0])

X_train[0]: [1, 2, 2, 8, 43, 10, 447, 5, 25, 207, 270, 5, 3095, 111, 16, 369, 186, 90, 67, 7, 89, 5, 19, 102, 6, 19, 124, 15, 90, 67, 84, 22, 482, 26, 7, 48, 4, 49, 8, 864, 39, 209, 154, 6, 151, 6, 83, 11, 15, 22, 155, 11, 15, 7, 48, 9, 4579, 1005, 504, 6, 258, 6, 272, 11, 15, 22, 134, 44, 11, 15, 16, 8, 197, 1245, 90, 67, 52, 29, 209, 30, 32, 132, 6, 109, 15, 17, 12]
Y_train[0]: 3


In [None]:
word_index = reuters.get_word_index()
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
decoded_news = ' '.join([reverse_word_index.get(i - 3, 'NiD') for i in X_train[0]])

print("Decoded text:", decoded_news)

Decoded text: NiD NiD NiD said as a result of its december acquisition of space co it expects earnings per share in 1987 of 1 15 to 1 30 dlrs per share up from 70 cts in 1986 the company said pretax net should rise to nine to 10 mln dlrs from six mln dlrs in 1986 and rental operation revenues to 19 to 22 mln dlrs from 12 5 mln dlrs it said cash flow per share this year should be 2 50 to three dlrs reuter 3


Preprocessing

In [None]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

max_len = 500

X_train = pad_sequences(
        sequences=X_train,
        maxlen=max_len,
        padding="pre",
        truncating="post",
        value=0)

X_test = pad_sequences(
        sequences=X_test,
        maxlen=max_len,
        padding="pre",
        truncating="post",
        value=0)

Y_train = to_categorical(Y_train, num_classes=46)
Y_test = to_categorical(Y_test, num_classes=46)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers

model = Sequential()

model.add(layers.Embedding(input_dim=most_freq_words, output_dim=128, input_length=max_len))
model.add(layers.Dropout(0.2))

model.add(layers.GRU(40, activation="relu"))
model.add(layers.Dropout(0.2))

model.add(layers.Dense(46, activation="softmax"))

model.compile(optimizer="rmsprop", loss="categorical_crossentropy", metrics=["acc"])
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_3 (Embedding)     (None, 500, 128)          1280000   
                                                                 
 dropout_5 (Dropout)         (None, 500, 128)          0         
                                                                 
 gru_3 (GRU)                 (None, 40)                20400     
                                                                 
 dropout_6 (Dropout)         (None, 40)                0         
                                                                 
 dense_2 (Dense)             (None, 46)                1886      
                                                                 
Total params: 1302286 (4.97 MB)
Trainable params: 1302286 (4.97 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
%load_ext tensorboard

!rm -rf ./logs/

import os
from datetime import datetime
from tensorflow.keras.callbacks import TensorBoard

logdir = os.path.join("logs", datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = TensorBoard(logdir, histogram_freq=1)

In [None]:
%tensorboard --logdir logs

In [None]:
model.fit(X_train, Y_train, validation_split=0.2, epochs=20, callbacks=[tensorboard_callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x7aeee443e0e0>

In [None]:
model.fit(X_train, Y_train, validation_split=0.2, epochs=6)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<keras.src.callbacks.History at 0x7d2c3ec51600>

In [None]:
test_loss, test_acc = model.evaluate(X_test, Y_test)
print("Loss of Test Set:", test_loss)
print("Accuracy of Test Set:", test_acc)

Loss of Test Set: 1.6193408966064453
Accuracy of Test Set: 0.5992876291275024
