## Reading the Dataset

In [38]:
question  =[]
answer = []
with open("dialogs.txt",'r') as f :
    for line in f :
        line  =  line.split('\t')
        question.append(line[0])
        answer.append(line[1])
print(len(question) == len(answer))

result = pd.DataFrame({"question" : question ,"answer":answer})

True


# 

## Supervised Learning Approach 
`This approach trains a machine learning model that uses a pipeline of CountVectorizer, TfidfTransformer, and RandomForestClassifier to predict answers to questions.`

In [35]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier

# load the dataset
df = result.head(100)

# create a pipeline that consists of a CountVectorizer, a TfidfTransformer, and a RandomForestClassifier
pipeline = Pipeline([
    ('vect', CountVectorizer()),
    ('tfidf', TfidfTransformer()),
    ('clf', RandomForestClassifier())
])

# fit the pipeline to the text data
pipeline.fit(df['question'], df['answer'])

# test the model
while True:
    try:
        user_input = input('You: ')
        if user_input.lower() == 'quit':
            break
        response = pipeline.predict([user_input])[0]
        print('Bot:', response)
        
    except KeyboardInterrupt:
        print("Interrupted by user")
        break    

You: hi, how are you doing?
Bot: i'm fine. how about yourself?

You: i'm pretty good. thanks for asking.
Bot: no problem. so how have you been?

You: i've been good. i'm in school right now.
Bot: what school do you go to?

Interrupted by user
You: hi


# 

## Unsupervised Learning Approach
`This code builds a simple chatbot that uses a TF-IDF (term frequency–inverse document frequency) vectorization model to calculate the similarity between the user's input and the questions in the dataset. It loads a dataset of questions and answers, initializes a lemmatizer to normalize the text, tokenizes and lemmatizes the text, fits a TfidfVectorizer to the text data, transforms the text data into a tf-idf matrix, and creates a dictionary with question-answer pairs.`

In [39]:
import nltk
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# load the dataset
df =  result

# initialize lemmatizer
lemmatizer = WordNetLemmatizer()

# tokenize and lemmatize the text
def tokenize_and_lemmatize(text):
    tokens = nltk.word_tokenize(text.lower())
    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens]
    return lemmatized_tokens

# create a TfidfVectorizer object
vectorizer = TfidfVectorizer(tokenizer=tokenize_and_lemmatize)

# fit the vectorizer to the text data
vectorizer.fit(df['question'])

# transform the text data to a tf-idf matrix
tfidf_matrix = vectorizer.transform(df['question'])

# create a dictionary with question-answer pairs
qa_dict = dict(zip(df['question'], df['answer']))

# define a function to generate responses
def generate_response(user_input):
    # transform the user input to a tf-idf vector
    user_tfidf = vectorizer.transform([user_input])
    # calculate the cosine similarities between the user input and the questions in the dataset
    similarities = cosine_similarity(user_tfidf, tfidf_matrix)
    # get the index of the question with the highest similarity
    index = similarities.argmax()
    # get the corresponding answer from the qa_dict
    response = qa_dict[df.iloc[index]['question']]
    return response

# test the chatbot
while True:
    try: 
        user_input = input('You: ')
        if user_input.lower() == 'quit':
            break
        response = generate_response(user_input)
        print('Bot:', response)
    
    except KeyboardInterrupt:
        print("Interrupted by user")
        break

You: hi, how are you doing?
Bot: i'm fine. how about yourself?

You: i'm pretty good. thanks for asking.	
Bot: no problem. so how have you been?

Interrupted by user
You: Chat ended.


## Thank you for your time!