## Building a Text Classification Model 

1. Preparing Text Data
2. Creating the Model
3. Compiling and Training the Model
4. Making Predictions


In [54]:
import tensorflow as tf
import numpy as np

from tensorflow.keras.preprocessing.text import Tokenizer

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Dense

from nltk import pos_tag

import nltk

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

## Preparing Text Data

In [55]:
# Dataset and stopwords

stop_words = set(stopwords.words('english'))

# Example text data and labels

texts = ["I love this movie!", "This movie is terrible.", "Great book!", "Awful experience."]

labels = np.array([1, 0, 1, 0])  # Positive sentiment: 1, Negative sentiment: 0
processed_texts = []
for text in texts:
    # Tokenization
    tokens = word_tokenize(text.lower())
    # POS tagging
    tagged_tokens = pos_tag(tokens)
    # Remove stop words and retain relevant POS tags
    filtered_tokens = [word for word, tag in tagged_tokens if word not in stop_words and tag.startswith(('JJ', 'NN'))]
    # Join filtered tokens back into a sentence
    processed_text = ' '.join(filtered_tokens)
    processed_texts.append(processed_text)

In [56]:


# Tokenization

tokenizer = Tokenizer()

tokenizer.fit_on_texts(processed_texts)

sequences = tokenizer.texts_to_sequences(processed_texts)

In [57]:


# Padding

max_len = max(len(sequence) for sequence in sequences)

padded_sequences = pad_sequences(sequences, maxlen=max_len)

padded_sequences

array([[0, 1],
       [1, 2],
       [3, 4],
       [5, 6]])

## Creating the Model

In [58]:
# Model architecture

model = Sequential([

        Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=16, input_length=max_len),

            GlobalAveragePooling1D(),

                Dense(units=1, activation='sigmoid')])

model.summary()


Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_6 (Embedding)     (None, 2, 16)             112       
                                                                 
 global_average_pooling1d_6   (None, 16)               0         
 (GlobalAveragePooling1D)                                        
                                                                 
 dense_6 (Dense)             (None, 1)                 17        
                                                                 
Total params: 129
Trainable params: 129
Non-trainable params: 0
_________________________________________________________________


## Compiling and Training the Model

In [59]:
# Compiling the model

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Training the model

model.fit(padded_sequences, labels, epochs=10, batch_size=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x25a7f181e10>

## Making Predictions

In [60]:
# Example new texts for prediction

new_texts = ["This movie is amazing!", "I didn't like the book."]

In [61]:
# Preprocess the new texts
processed_new_texts = []
for text in new_texts:
    tokens = word_tokenize(text.lower())
    tagged_tokens = pos_tag(tokens)
    filtered_tokens = [word for word, tag in tagged_tokens if word not in stop_words and tag.startswith(('JJ', 'NN'))]
    processed_text = ' '.join(filtered_tokens)
    processed_new_texts.append(processed_text)

# Tokenize and pad the new texts
new_sequences = tokenizer.texts_to_sequences(processed_new_texts)
new_padded_sequences = pad_sequences(new_sequences, maxlen=max_len)

In [62]:
# Make predictions

predictions = model.predict(new_padded_sequences)



In [63]:
# Print the predictions
for text, prediction in zip(new_texts, predictions):
    sentiment = 'Positive' if prediction > 0.5 else 'Negative'
    print(f'Text: {text}\nSentiment: {sentiment}\n')


Text: This movie is amazing!
Sentiment: Positive

Text: I didn't like the book.
Sentiment: Positive

