In [None]:
import random
import torch
from transformers import pipeline
from nltk import word_tokenize
from nltk.classify import NaiveBayesClassifier
from nltk.classify.util import accuracy

# Download NLTK resources
import nltk
nltk.download('punkt')

# Predefined responses for specific intents
RESPONSES = {
    "greeting": ["Hello! How can I help you?", "Hi there! What can I do for you today?"],
    "courses": ["We offer BTech, MTech, MBA, BBA, and PhD programs."],
    "departments": ["We have departments like Computer Science, Electronics, Mechanical, Civil, and more."],
    "admission": ["You can apply through our official website under the 'Admissions' section."],
    "facilities": ["We offer hostels, libraries, labs, sports facilities, and more."],
    "contact": ["You can reach us at +91-1234567890 or email us at info@srmramapuram.edu."],
    "default": ["I'm sorry, I don't have information on that. Could you ask something else?"]
}

# Sample training data for intent classification
TRAINING_DATA = [
    ("hello", "greeting"),
    ("hi", "greeting"),
    ("what courses do you offer", "courses"),
    ("tell me about the courses", "courses"),
    ("which departments are there", "departments"),
    ("what departments do you have", "departments"),
    ("how can I apply for admission", "admission"),
    ("admission process", "admission"),
    ("what facilities do you provide", "facilities"),
    ("tell me about the facilities", "facilities"),
    ("how can I contact the college", "contact"),
    ("contact details", "contact")
]

# Feature extraction function for the classifier
def extract_features(sentence):
    words = word_tokenize(sentence.lower())
    return {word: True for word in words}

# Prepare the dataset
feature_sets = [(extract_features(text), label) for (text, label) in TRAINING_DATA]

# Train a Naive Bayes Classifier
classifier = NaiveBayesClassifier.train(feature_sets)

# Pre-trained language model for dynamic responses
qa_pipeline = pipeline("text-generation", model="gpt2")

# Chatbot function
def chatbot_response(user_input):
    # Classify the intent
    features = extract_features(user_input)
    intent = classifier.classify(features)

    # Generate a response based on intent
    if intent in RESPONSES:
        return random.choice(RESPONSES[intent])
    else:
        # For unknown intents, use the language model to generate a response
        generated_response = qa_pipeline(user_input, max_length=50, num_return_sequences=1)
        return generated_response[0]['generated_text']

# Conversation loop
print("Welcome to the SRM Ramapuram College Chatbot! Type 'quit' to exit.")
while True:
    user_input = input("> ")
    if user_input.lower() == 'quit':
        print("Goodbye!")
        break
    response = chatbot_response(user_input)
    print(response)


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\vsri6\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu


Welcome to the SRM Ramapuram College Chatbot! Type 'quit' to exit.
> hi
Hi there! What can I do for you today?
> tell me about college
We offer hostels, libraries, labs, sports facilities, and more.
> admission process
You can apply through our official website under the 'Admissions' section.
