<a href="https://colab.research.google.com/github/vidhya2324/Website_code/blob/main/Chatbot_perfume.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import os
import json

from zipfile import ZipFile
import pandas as pd

In [1]:
import json
kaggle_dictionary=json.load(open("kaggle.json"))

In [4]:
#setup AIP credential
os.environ["KAGGLE_USERNAME"] = kaggle_dictionary["username"]
os.environ["KAGGLE_KEY"] = kaggle_dictionary["key"]

In [21]:
!kaggle datasets download -d kanchana1990/perfume-e-commerce-dataset-2024

Dataset URL: https://www.kaggle.com/datasets/kanchana1990/perfume-e-commerce-dataset-2024
License(s): ODC Attribution License (ODC-By)
Downloading perfume-e-commerce-dataset-2024.zip to /content
  0% 0.00/106k [00:00<?, ?B/s]
100% 106k/106k [00:00<00:00, 53.6MB/s]


In [22]:
#unzip the file
!unzip \*.zip && rm *.zip

Archive:  perfume-e-commerce-dataset-2024.zip
  inflating: ebay_mens_perfume.csv   
  inflating: ebay_womens_perfume.csv  


In [9]:
!ls

final_perfume_data.csv	kaggle.json  sample_data


In [15]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import seaborn as sns
import re
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

In [23]:
class PerfumeAnalyzer:
    def __init__(self, data_path):
        """Initialize with data loading and preprocessing"""
        self.df = pd.read_csv('/content/ebay_mens_perfume.csv')
        self.preprocess_data()

    def preprocess_data(self):
        """Clean and prepare the dataset"""
        # Fill NaN values
        self.df['price'] = pd.to_numeric(self.df['price'].str.replace('$', ''), errors='coerce')
        self.df['rating'] = pd.to_numeric(self.df['rating'], errors='coerce')
        self.df['votes'] = pd.to_numeric(self.df['votes'].str.replace(',', ''), errors='coerce')

        # Create combined features for recommendation
        self.df['combined_features'] = self.df['name'].fillna('') + ' ' + \
                                     self.df['brand'].fillna('') + ' ' + \
                                     self.df['main_accords'].fillna('') + ' ' + \
                                     self.df['notes'].fillna('')

    def get_top_brands(self, n=10):
        """Get top brands by number of perfumes"""
        return self.df['brand'].value_counts().head(n)

    def get_price_statistics(self):
        """Get price statistics"""
        return self.df['price'].describe()

    def plot_price_distribution(self):
        """Plot price distribution"""
        plt.figure(figsize=(10, 6))
        sns.histplot(data=self.df, x='price', bins=50)
        plt.title('Price Distribution of Perfumes')
        plt.xlabel('Price ($)')
        plt.ylabel('Count')
        plt.show()

    def plot_top_brands(self, n=10):
        """Plot top brands"""
        plt.figure(figsize=(12, 6))
        self.get_top_brands(n).plot(kind='bar')
        plt.title(f'Top {n} Perfume Brands')
        plt.xlabel('Brand')
        plt.ylabel('Number of Perfumes')
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.show()

    def analyze_common_notes(self, n=10):
        """Analyze most common notes in perfumes"""
        all_notes = ' '.join(self.df['notes'].dropna()).lower()
        words = re.findall(r'\w+', all_notes)
        return Counter(words).most_common(n)

In [17]:
class PerfumeRecommender:
    def __init__(self, df):
        """Initialize the recommender system"""
        self.df = df
        self.create_vectors()

    def create_vectors(self):
        """Create TF-IDF vectors for similarity matching"""
        self.tfidf = TfidfVectorizer(stop_words='english')
        self.tfidf_matrix = self.tfidf.fit_transform(self.df['combined_features'])

    def get_recommendations(self, query, n=5):
        """Get perfume recommendations based on query"""
        query_vec = self.tfidf.transform([query.lower()])
        similarity = cosine_similarity(query_vec, self.tfidf_matrix)
        similar_indices = similarity[0].argsort()[-n:][::-1]
        return self.df.iloc[similar_indices]

    def get_similar_perfumes(self, perfume_name, n=5):
        """Get similar perfumes to a given perfume"""
        perfume_idx = self.df[self.df['name'].str.lower() == perfume_name.lower()].index
        if len(perfume_idx) == 0:
            return None

        perfume_vec = self.tfidf_matrix[perfume_idx[0]]
        similarity = cosine_similarity(perfume_vec, self.tfidf_matrix)
        similar_indices = similarity[0].argsort()[-n-1:][::-1][1:]  # Exclude the perfume itself
        return self.df.iloc[similar_indices]

In [18]:
class PerfumeChatbot:
    def __init__(self, analyzer, recommender):
        """Initialize chatbot with analyzer and recommender"""
        self.analyzer = analyzer
        self.recommender = recommender

    def process_query(self, user_input):
        """Process user input and generate response"""
        user_input = user_input.lower()

        if "analyze" in user_input:
            if "brands" in user_input:
                top_brands = self.analyzer.get_top_brands()
                response = "Top 10 perfume brands:\n"
                for brand, count in top_brands.items():
                    response += f"{brand}: {count} perfumes\n"

            elif "prices" in user_input:
                stats = self.analyzer.get_price_statistics()
                response = "Price statistics:\n"
                response += f"Average price: ${stats['mean']:.2f}\n"
                response += f"Minimum price: ${stats['min']:.2f}\n"
                response += f"Maximum price: ${stats['max']:.2f}\n"

            elif "notes" in user_input:
                common_notes = self.analyzer.analyze_common_notes()
                response = "Most common perfume notes:\n"
                for note, count in common_notes:
                    response += f"{note}: {count} occurrences\n"

            else:
                response = "I can analyze brands, prices, or notes. Please specify what you'd like to analyze."

        elif "recommend" in user_input or "suggest" in user_input:
            recommendations = self.recommender.get_recommendations(user_input)
            response = "Here are some perfumes you might like:\n"
            for _, perfume in recommendations.iterrows():
                response += f"- {perfume['name']} by {perfume['brand']}"
                if pd.notna(perfume['price']):
                    response += f" (${perfume['price']:.2f})"
                response += "\n"

        elif "similar to" in user_input:
            perfume_name = user_input.split("similar to")[-1].strip()
            similar_perfumes = self.recommender.get_similar_perfumes(perfume_name)
            if similar_perfumes is not None:
                response = f"Perfumes similar to {perfume_name}:\n"
                for _, perfume in similar_perfumes.iterrows():
                    response += f"- {perfume['name']} by {perfume['brand']}"
                    if pd.notna(perfume['price']):
                        response += f" (${perfume['price']:.2f})"
                    response += "\n"
            else:
                response = f"I couldn't find the perfume '{perfume_name}' in my database."

        elif "help" in user_input:
            response = """I can help you with:
1. Analysis:
   - "Analyze brands" - See top perfume brands
   - "Analyze prices" - Get price statistics
   - "Analyze notes" - See most common perfume notes
2. Recommendations:
   - "Recommend floral perfumes" - Get perfume suggestions
   - "Similar to [perfume name]" - Find similar perfumes
3. Visualizations:
   - "Show price distribution" - See price distribution chart
   - "Show top brands" - See top brands chart
Type 'exit' to end the conversation."""

        elif "show" in user_input:
            if "price distribution" in user_input:
                self.analyzer.plot_price_distribution()
                response = "Showing price distribution plot..."
            elif "top brands" in user_input:
                self.analyzer.plot_top_brands()
                response = "Showing top brands plot..."
            else:
                response = "I can show price distribution or top brands. Please specify what you'd like to see."

        else:
            response = "I didn't understand that. Type 'help' to see what I can do!"

        return response

In [25]:
import pandas as pd

class PerfumeAnalyzer:
    def __init__(self, data_path):
        """Initialize with data loading and preprocessing"""
        self.df = pd.read_csv('/content/ebay_mens_perfume.csv')
        self.preprocess_data()

    def preprocess_data(self):
        """Clean and prepare the dataset"""
        # Fill NaN values
        # Convert the 'price' column to string type before using .str methods
        self.df['price'] = self.df['price'].astype(str).str.replace('[\$,]', '', regex=True)
        self.df['price'] = pd.to_numeric(self.df['price'], errors='coerce')
        self.df['rating'] = pd.to_numeric(self.df['rating'], errors='coerce')
        self.df['votes'] = pd.to_numeric(self.df['votes'].str.replace(',', ''), errors='coerce')

In [26]:
def main():
    # Initialize the system
    print("Loading perfume dataset and initializing systems...")
    analyzer = PerfumeAnalyzer('/content/ebay_mens_perfume.csv')  # Replace with your dataset path
    recommender = PerfumeRecommender(analyzer.df)
    chatbot = PerfumeChatbot(analyzer, recommender)

    print("\n🌸 Welcome to the Perfume Recommendation System! 🌸")
    print("This system can help you analyze perfume data, get recommendations, and visualize trends.")
    print("Type 'help' to see available commands or 'exit' to quit.")

    while True:
        user_input = input("\nYou: ").strip()

        if user_input.lower() == 'exit':
            print("Thank you for using the Perfume Recommendation System. Goodbye!")
            break

        response = chatbot.process_query(user_input)
        print("\nBot:", response)

if __name__ == "__main__":
    main()

Loading perfume dataset and initializing systems...


KeyError: 'rating'

**Its a simple perfume chatbot**

In [14]:
import nltk
import random
import re
from nltk.corpus import wordnet

# Download required NLTK data
nltk.download('punkt')
nltk.download('wordnet')

# Define a basic perfume recommendation chatbot
class PerfumeChatbot:
    def __init__(self):
        self.greetings = ["hello", "hi", "hey", "greetings", "what's up"]
        self.goodbyes = ["bye", "goodbye", "see you", "take care"]
        self.perfume_preferences = {
            "floral": ["Chanel No.5", "Dior J'adore", "Gucci Bloom"],
            "woody": ["Tom Ford Oud Wood", "Dolce & Gabbana Velvet Wood", "Gucci Guilty Oud"],
            "citrus": ["Jo Malone Lime Basil & Mandarin", "Versace Pour Homme", "Dolce & Gabbana Light Blue"],
            "spicy": ["Yves Saint Laurent Opium", "Tom Ford Noir", "Gucci Intense Oud"],
            "fresh": ["Davidoff Cool Water", "Issey Miyake L'eau D'issey", "Hermès Un Jardin Sur Le Nil"]
        }

    def greet(self, text):
        for word in text.split():
            if word.lower() in self.greetings:
                return random.choice(["Hello! How can I help you with perfumes?",
                                      "Hi there! Looking for a new scent?",
                                      "Hello! I’m here to help you find the perfect fragrance."])
        return None

    def recommend_perfume(self, preference):
        # Use regular expressions to detect perfume types
        preference = preference.lower()
        for category, perfumes in self.perfume_preferences.items():
            if category in preference:
                return f"I recommend you try {random.choice(perfumes)}. It has a lovely {category} scent!"
        return "I'm sorry, I didn't quite understand. Could you describe the type of scent you're looking for? (e.g., floral, woody, citrus)"

    def respond(self, text):
        # Check if the user is greeting the bot
        greeting = self.greet(text)
        if greeting:
            return greeting

        # Check if the user is saying goodbye
        for word in text.split():
            if word.lower() in self.goodbyes:
                return "Goodbye! I hope you find the perfect fragrance!"

        # If not a greeting or goodbye, try to recommend a perfume
        return self.recommend_perfume(text)

# Function to chat with the bot
def chat():
    bot = PerfumeChatbot()
    print("PerfumeBot: Hi! I’m here to help you find the perfect perfume. What type of scent are you interested in?")

    while True:
        user_input = input("You: ")
        if user_input.lower() in ["exit", "quit"]:
            print("PerfumeBot: Thank you for chatting! Have a great day!")
            break

        response = bot.respond(user_input)
        print("PerfumeBot:", response)

# Run the chatbot
chat()


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


PerfumeBot: Hi! I’m here to help you find the perfect perfume. What type of scent are you interested in?
You: hi
PerfumeBot: Hello! How can I help you with perfumes?
You: i need a good perfume
PerfumeBot: I'm sorry, I didn't quite understand. Could you describe the type of scent you're looking for? (e.g., floral, woody, citrus)
You: tell about the inbult brand of perfume
PerfumeBot: I'm sorry, I didn't quite understand. Could you describe the type of scent you're looking for? (e.g., floral, woody, citrus)
You: Chanel No.5
PerfumeBot: I'm sorry, I didn't quite understand. Could you describe the type of scent you're looking for? (e.g., floral, woody, citrus)
You: exist
PerfumeBot: I'm sorry, I didn't quite understand. Could you describe the type of scent you're looking for? (e.g., floral, woody, citrus)
You: exit
PerfumeBot: Thank you for chatting! Have a great day!
