In [31]:
pip install nltk

Note: you may need to restart the kernel to use updated packages.


In [32]:
import random
import numpy as np
import json
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import SGD
from nltk.stem import WordNetLemmatizer
import nltk
nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\vasanth\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\vasanth\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [33]:
# Load data from bot.json
with open('bot2.json', 'r') as file:
    data = json.load(file)

In [34]:
# Initialize WordNet Lemmatizer
lemmatizer = WordNetLemmatizer()

# Initialize lists for storing words, classes, and training data
words = []
classes = []
documents = []
ignore_words = ['?', '!']

In [35]:
# Preprocess data, tokenize words, lemmatize, create bag of words
for intent in data['intents']:
    for pattern in intent['patterns']:
        # Tokenize words
        w = nltk.word_tokenize(pattern)
        words.extend(w)
        # Add documents
        documents.append((w, intent['tag']))
        # Add unique classes
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

In [36]:
# Lemmatize and lower each word and remove duplicates
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))
# Sort classes
classes = sorted(list(set(classes)))

print(len(documents), "documents")
print(len(classes), "classes", classes)
print(len(words), "unique lemmatized words", words)


179 documents
39 classes ['My age is 20s', 'My age is 25s', 'My age is 30', 'My age is 35', 'My age is 40', 'My age is 50_plus', 'aps_workout', 'arms_workout', 'general_fitness', 'goodbye', 'greeting', 'gym_workouts_for_elderly', 'handball_info', 'high_calorie_foods', 'low_calorie_foods', 'masturbation workout disadvantages', 'my age is 10s', 'my age is 15s', 'my age is 6s', 'no_equipment_glutes_workouts', 'no_equipment_hamstrings_workouts', 'no_equipment_lats_workouts', 'no_equipment_quads_workouts', 'no_equipment_traps_workouts', 'non_veg_fitness', 'nutrients_for_fitness', 'pregnancy_gym_workouts', 'pregnancy_workouts', 'shoulder_workout', 'spa_benefits', 'terms_and_conditions', 'vitamins_for_fitness', 'weight_gain_diet', 'weight_gain_workouts', 'weight_loss_diet', 'weight_loss_workouts', 'workout_nutrition', 'workouts_for_elderly', 'wrestling_evaluation']
211 unique lemmatized words ['10', '15', '20', '25', '30', '35', '40', '50_plus', '6', '70', 'a', 'activity', 'advantage', 'advic

In [40]:
# Save words and classes to pickle files for later use
import pickle
pickle.dump(words, open('words.pkl', 'wb'))
pickle.dump(classes, open('classes.pkl', 'wb'))


In [38]:
# Create training data
training = []
output_empty = [0] * len(classes)

for doc in documents:
    # Initialize bag of words
    bag = []
    pattern_words = doc[0]
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
    # Create bag of words array
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)

    # Output is a '0' for each tag and '1' for current tag (for each pattern)
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1

    training.append([bag, output_row])

# Shuffle the training data
random.shuffle(training)

# Separate features and labels
train_x = np.array([sample[0] for sample in training])
train_y = np.array([sample[1] for sample in training])

# Define and compile the model
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

sgd = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
model.summary()

# Train the model
hist = model.fit(train_x, train_y, epochs=200, batch_size=5, verbose=1)

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_12 (Dense)            (None, 128)               27136     
                                                                 
 dropout_8 (Dropout)         (None, 128)               0         
                                                                 
 dense_13 (Dense)            (None, 64)                8256      
                                                                 
 dropout_9 (Dropout)         (None, 64)                0         
                                                                 
 dense_14 (Dense)            (None, 39)                2535      
                                                                 
Total params: 37927 (148.15 KB)
Trainable params: 37927 (148.15 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/200
Epoc

In [39]:
# Save the model
model.save('GYM_model.h5')
print("Model created and saved successfully!")

Model created and saved successfully!
