In [1]:
# Import necessary libraries
import json
import time
from threading import Thread
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import CountVectorizer
from torch.utils.data import Dataset, DataLoader
import pickle
import os

# Add project directory to the system path
project_path = '/content/drive/My Drive/ChatBotProject'
os.chdir(project_path)

# Paths
intents_path = 'data/intents.json'
user_generated_intents_path = 'data/user_generated_intents.json'
model_path = 'models/intent_classifier.pth'
label_encoder_path = 'models/label_encoder.pkl'
vectorizer_path = 'models/vectorizer.pkl'

# Function to load intents
def load_intents():
    with open(intents_path, 'r') as f:
        intents = json.load(f)['intents']
    try:
        with open(user_generated_intents_path, 'r') as f:
            user_generated_intents = json.load(f)['intents']
        intents.extend(user_generated_intents)
    except (FileNotFoundError, json.JSONDecodeError):
        pass
    return intents

# Function to preprocess data
def preprocess_data(intents):
    patterns = []
    tags = []
    for intent in intents:
        for pattern in intent['patterns']:
            patterns.append(pattern)
            tags.append(intent['tag'])
    return patterns, tags

# Function to retrain the model
def retrain_model():
    intents = load_intents()
    patterns, tags = preprocess_data(intents)

    # Convert labels to integers
    label_encoder = LabelEncoder()
    encoded_tags = label_encoder.fit_transform(tags)

    # Tokenization function
    def tokenize(text):
        return text.split()

    # Convert text to vectors
    vectorizer = CountVectorizer(tokenizer=tokenize, binary=True)
    X = vectorizer.fit_transform(patterns).toarray()
    y = encoded_tags

    # Create custom dataset
    class IntentDataset(Dataset):
        def __init__(self, X, y):
            self.X = X
            self.y = y

        def __len__(self):
            return len(self.y)

        def __getitem__(self, idx):
            return torch.tensor(self.X[idx], dtype=torch.float32), torch.tensor(self.y[idx], dtype=torch.long)

    dataset = IntentDataset(X, y)
    dataloader = DataLoader(dataset, batch_size=8, shuffle=True)

    # Define the model
    class IntentClassifier(nn.Module):
        def __init__(self, input_dim, hidden_dim, output_dim):
            super(IntentClassifier, self).__init__()
            self.fc1 = nn.Linear(input_dim, hidden_dim)
            self.relu = nn.ReLU()
            self.fc2 = nn.Linear(hidden_dim, output_dim)

        def forward(self, x):
            x = self.fc1(x)
            x = self.relu(x)
            x = self.fc2(x)
            return x

    input_dim = X.shape[1]
    hidden_dim = 128
    output_dim = len(set(tags))

    model = IntentClassifier(input_dim, hidden_dim, output_dim)

    # Define loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Training loop
    epochs = 100
    for epoch in range(epochs):
        total_loss = 0
        for inputs, labels in dataloader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        if (epoch+1) % 10 == 0:
            print(f'Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(dataloader)}')

    # Save the model
    torch.save(model.state_dict(), model_path)
    print(f'Model saved as {model_path}')

    # Save label encoder and vectorizer for future use
    with open(label_encoder_path, 'wb') as f:
        pickle.dump(label_encoder, f)
    with open(vectorizer_path, 'wb') as f:
        pickle.dump(vectorizer, f)
    print('Label encoder and vectorizer saved.')

# Function to run retrain_model periodically
def retrain_periodically(interval=600):
    while True:
        retrain_model()
        time.sleep(interval)

# Start the retraining thread
retrain_thread = Thread(target=retrain_periodically, args=(600,))
retrain_thread.start()
