In [21]:
import pandas as pd

# Load your datasets
customers_df = pd.read_csv("customer_data_collection.csv")
products_df = pd.read_csv("product_recommendation_data.csv")

# Preview the data
print("📋 Customers Data")
display(customers_df.head())

print("📦 Products Data")
display(products_df.head())


📋 Customers Data


Unnamed: 0,Customer_ID,Age,Gender,Location,Browsing_History,Purchase_History,Customer_Segment,Avg_Order_Value,Holiday,Season,Unnamed: 10
0,C1000,28,Female,Chennai,"['Books', 'Fashion']","['Biography', 'Jeans']",New Visitor,4806.99,No,Winter,
1,C1001,27,Male,Delhi,"['Books', 'Fitness', 'Fashion']","['Biography', 'Resistance Bands', 'T-shirt']",Occasional Shopper,795.03,Yes,Autumn,
2,C1002,34,Other,Chennai,['Electronics'],['Smartphone'],Occasional Shopper,1742.45,Yes,Summer,
3,C1003,23,Male,Bangalore,['Home Decor'],['Wall Art'],Frequent Buyer,2023.16,No,Autumn,
4,C1004,24,Other,Kolkata,"['Fashion', 'Home Decor']","['Shoes', 'Lamp']",Frequent Buyer,794.76,No,Winter,


📦 Products Data


Unnamed: 0,Product_ID,Category,Subcategory,Price,Brand,Average_Rating_of_Similar_Products,Product_Rating,Customer_Review_Sentiment_Score,Holiday,Season,Geographical_Location,Similar_Product_List,Probability_of_Recommendation,Unnamed: 13,Unnamed: 14
0,P2000,Fashion,Jeans,1713,Brand B,4.2,2.3,0.26,No,Summer,Canada,"['Jeans', 'Shoes']",0.91,,
1,P2001,Beauty,Lipstick,1232,Brand C,4.7,2.1,0.21,Yes,Winter,India,"['Moisturizer', 'Lipstick', 'Lipstick']",0.26,,
2,P2002,Electronics,Laptop,4833,Brand B,3.5,2.4,0.74,Yes,Spring,Canada,"['Headphones', 'Headphones', 'Smartphone']",0.6,,
3,P2003,Books,Comics,4902,Brand D,3.4,4.2,0.8,No,Winter,Germany,"['Biography', 'Non-fiction', 'Comics']",0.36,,
4,P2004,Home Decor,Cushions,856,Brand C,3.2,3.3,0.08,Yes,Summer,USA,['Lamp'],0.37,,


In [23]:
import sqlite3

# Connect to SQLite (will create ecommerce.db if it doesn't exist)
conn = sqlite3.connect("ecommerce.db")

# Save DataFrames to SQLite tables
customers_df.to_sql("customers", conn, if_exists="replace", index=False)
products_df.to_sql("products", conn, if_exists="replace", index=False)

conn.commit()
conn.close()

print("✅ Data saved to SQLite successfully!")


✅ Data saved to SQLite successfully!


In [25]:
conn = sqlite3.connect("ecommerce.db")

# Fetch a few rows
print("🔎 Customers Table")
print(pd.read_sql("SELECT * FROM customers LIMIT 5", conn))

print("🔎 Products Table")
print(pd.read_sql("SELECT * FROM products LIMIT 5", conn))

conn.close()


🔎 Customers Table
  Customer_ID  Age  Gender   Location                 Browsing_History  \
0       C1000   28  Female    Chennai             ['Books', 'Fashion']   
1       C1001   27    Male      Delhi  ['Books', 'Fitness', 'Fashion']   
2       C1002   34   Other    Chennai                  ['Electronics']   
3       C1003   23    Male  Bangalore                   ['Home Decor']   
4       C1004   24   Other    Kolkata        ['Fashion', 'Home Decor']   

                               Purchase_History    Customer_Segment  \
0                        ['Biography', 'Jeans']         New Visitor   
1  ['Biography', 'Resistance Bands', 'T-shirt']  Occasional Shopper   
2                                ['Smartphone']  Occasional Shopper   
3                                  ['Wall Art']      Frequent Buyer   
4                             ['Shoes', 'Lamp']      Frequent Buyer   

   Avg_Order_Value Holiday  Season Unnamed: 10  
0          4806.99      No  Winter        None  
1           

In [27]:
import sqlite3

conn = sqlite3.connect("ecommerce.db")
cursor = conn.cursor()

cursor.execute('''
    CREATE TABLE IF NOT EXISTS customers (
        id TEXT PRIMARY KEY,
        name TEXT,
        age INTEGER,
        gender TEXT,
        location TEXT,
        purchase_history TEXT
    )
''')

cursor.execute('''
    CREATE TABLE IF NOT EXISTS products (
        id TEXT PRIMARY KEY,
        name TEXT,
        category TEXT,
        price REAL,
        popularity INTEGER
    )
''')

conn.commit()
conn.close()


In [29]:
from sklearn.cluster import KMeans

# Reconnect to SQLite
conn = sqlite3.connect("ecommerce.db")

# Load customers data
customers_df = pd.read_sql_query("SELECT * FROM customers", conn)
conn.close()

# Convert 'purchase_history' into numerical features (Dummy Example)
customers_df["Purchase_Count"] = customers_df["Purchase_History"].apply(lambda x: len(x.split(',')))

# Perform K-Means clustering
kmeans = KMeans(n_clusters=3, random_state=42)
customers_df["segment"] = kmeans.fit_predict(customers_df[["Age", "Purchase_Count"]])

print(customers_df.head())


  Customer_ID  Age  Gender   Location                 Browsing_History  \
0       C1000   28  Female    Chennai             ['Books', 'Fashion']   
1       C1001   27    Male      Delhi  ['Books', 'Fitness', 'Fashion']   
2       C1002   34   Other    Chennai                  ['Electronics']   
3       C1003   23    Male  Bangalore                   ['Home Decor']   
4       C1004   24   Other    Kolkata        ['Fashion', 'Home Decor']   

                               Purchase_History    Customer_Segment  \
0                        ['Biography', 'Jeans']         New Visitor   
1  ['Biography', 'Resistance Bands', 'T-shirt']  Occasional Shopper   
2                                ['Smartphone']  Occasional Shopper   
3                                  ['Wall Art']      Frequent Buyer   
4                             ['Shoes', 'Lamp']      Frequent Buyer   

   Avg_Order_Value Holiday  Season Unnamed: 10  Purchase_Count  segment  
0          4806.99      No  Winter        None        

In [31]:
from sklearn.preprocessing import OneHotEncoder

# Select features for recommendation system
features = [
    'Category', 'Subcategory', 'Price', 'Brand',
    'Average_Rating_of_Similar_Products',
    'Product_Rating', 'Customer_Review_Sentiment_Score',
    'Holiday', 'Season', 'Geographical_Location',
    'Probability_of_Recommendation'
]

product_data = products_df[features]

# Separate categorical and numerical columns
categorical_cols = ['Category', 'Subcategory', 'Brand', 'Holiday', 'Season', 'Geographical_Location']
numerical_cols = ['Price', 'Average_Rating_of_Similar_Products', 'Product_Rating', 'Customer_Review_Sentiment_Score', 'Probability_of_Recommendation']

# One-hot encode categorical columns
encoder = OneHotEncoder(sparse_output=False)
encoded_cat = encoder.fit_transform(product_data[categorical_cols])

# Combine with numerical features
import numpy as np

X = np.concatenate([encoded_cat, product_data[numerical_cols].values], axis=1)


In [33]:
from sklearn.metrics.pairwise import cosine_similarity

# Compute similarity matrix
similarity_matrix = cosine_similarity(X)

# Function to recommend similar products
def recommend_products(product_id, top_n=5):
    # Find index of product
    index = products_df[products_df['Product_ID'] == product_id].index[0]
    
    # Get similarity scores
    scores = list(enumerate(similarity_matrix[index]))
    
    # Sort scores
    sorted_scores = sorted(scores, key=lambda x: x[1], reverse=True)
    
    # Get top N similar product indices (excluding the product itself)
    top_indices = [i[0] for i in sorted_scores[1:top_n+1]]
    
    return products_df.iloc[top_indices][['Product_ID', 'Brand', 'Category', 'Product_Rating', 'Probability_of_Recommendation']]

# 🔍 Example usage
recommend_products(product_id='P2000')  # Replace 'P101' with an actual Product_ID from your dataset


Unnamed: 0,Product_ID,Brand,Category,Product_Rating,Probability_of_Recommendation
7521,P9521,Brand B,Fashion,3.0,0.17
2381,P4381,Brand C,Fashion,2.3,0.62
8865,P10865,Brand B,Fashion,2.4,0.6
8047,P10047,Brand C,Fashion,2.8,0.97
3037,P5037,Brand B,Fashion,2.4,0.89


In [35]:
class CustomerAgent:
    def __init__(self, customer_id):
        self.customer_id = customer_id

    def get_customer_data(self):
        conn = sqlite3.connect("ecommerce.db")
        cursor = conn.cursor()
        cursor.execute("SELECT * FROM customers WHERE id=?", (self.customer_id,))
        customer = cursor.fetchone()
        conn.close()
        return customer

class ProductAgent:
    def get_product_recommendations(self, customer_segment):
        conn = sqlite3.connect("ecommerce.db")
        cursor = conn.cursor()
        cursor.execute("SELECT * FROM products WHERE category=?", (customer_segment,))
        products = cursor.fetchall()
        conn.close()
        return products


In [37]:
!ollama pull nomic-embed-text


[?2026h[?25l[1Gpulling manifest â ‹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ™ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ¹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ¸ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ¼ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ´ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ¦ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â § [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ‡ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â � [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ‹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ™ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ¹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ¸ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest [K
pulling 970aa74c0a90... 100% â–•â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–� 274 MB                         [K
pulling c71d239df917... 100% â–•â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–

In [38]:
import ollama

# Create the client
client = ollama.Client()

# Sample product description
product_description = "Wireless Bluetooth headphones with noise cancellation"

# Generate the embedding
response = client.embed(
    model='nomic-embed-text',
    input=[product_description]
)

# 🧠 The correct key is 'embeddings' — it's a list of vectors!
embedding_vector = response.embeddings[0]

# Preview the vector
print("🔢 First 10 values:", embedding_vector[:10])


🔢 First 10 values: [0.021164376, -0.03768223, -0.13226596, 0.007422421, 0.04549926, 0.0021966759, -0.022055058, -0.007950228, -0.018502984, 0.008697911]


In [None]:
import requests
from bs4 import BeautifulSoup

def scrape_product_reviews(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")

    reviews = soup.find_all("div", class_="review-text")
    return [review.text for review in reviews]

# Example
product_url = "https://www.example.com/product"
reviews = scrape_product_reviews(product_url)
print(reviews)



In [41]:
from bs4 import BeautifulSoup

html = """
<html>
  <body>
    <div class="review-text">Amazing product! Totally loved it.</div>
    <div class="review-text">Decent quality for the price.</div>
    <div class="review-text">Battery life could be better.</div>
  </body>
</html>
"""

soup = BeautifulSoup(html, "html.parser")
reviews = [r.text for r in soup.find_all("div", class_="review-text")]
print(reviews)


['Amazing product! Totally loved it.', 'Decent quality for the price.', 'Battery life could be better.']


In [43]:
!pip install textblob




In [44]:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')


[nltk_data] Downloading package punkt to C:\Users\Sudarshan
[nltk_data]     Dhongade\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Sudarshan
[nltk_data]     Dhongade\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity


In [None]:
products_df = pd.read_csv('product_recommendation_data.csv')
products_df.head()


In [None]:
features = [
    'Price',
    'Product_Rating',
    'Average_Rating_of_Similar_Products',
    'Customer_Review_Sentiment_Score',
    'Probability_of_Recommendation'
]

# Handle missing values
products_df[features] = products_df[features].fillna(0)

# Normalize for cosine similarity
scaler = MinMaxScaler()
normalized_features = scaler.fit_transform(products_df[features])


In [None]:
similarity_matrix = cosine_similarity(normalized_features)


In [53]:
def recommend_products(product_id, top_n=5):
    # Find the index of the product
    if product_id not in products_df['Product_ID'].values:
        print(f"❌ Product ID {product_id} not found in dataset.")
        return

    index = products_df[products_df['Product_ID'] == product_id].index[0]
    
    # Get similarity scores
    scores = list(enumerate(similarity_matrix[index]))
    
    # Sort by similarity score (excluding self)
    sorted_scores = sorted(scores, key=lambda x: x[1], reverse=True)[1:top_n+1]
    
    # Get product info for recommendations
    recommended_indices = [i[0] for i in sorted_scores]
    return products_df.iloc[recommended_indices][['Product_ID', 'Brand', 'Category', 'Product_Rating', 'Probability_of_Recommendation']]


In [17]:
import os
print(os.getcwd())


C:\Users\Sudarshan Dhongade\Shopping


In [49]:
import sqlite3

conn = sqlite3.connect("ecommerce.db")
cursor = conn.cursor()

cursor.execute("PRAGMA table_info(customers)")
columns = cursor.fetchall()

print("Customer Table Columns:")
for col in columns:
    print(col)

conn.close()


Customer Table Columns:
(0, 'Customer_ID', 'TEXT', 0, None, 0)
(1, 'Age', 'INTEGER', 0, None, 0)
(2, 'Gender', 'TEXT', 0, None, 0)
(3, 'Location', 'TEXT', 0, None, 0)
(4, 'Browsing_History', 'TEXT', 0, None, 0)
(5, 'Purchase_History', 'TEXT', 0, None, 0)
(6, 'Customer_Segment', 'TEXT', 0, None, 0)
(7, 'Avg_Order_Value', 'REAL', 0, None, 0)
(8, 'Holiday', 'TEXT', 0, None, 0)
(9, 'Season', 'TEXT', 0, None, 0)
(10, 'Unnamed: 10', 'REAL', 0, None, 0)


In [55]:
import sqlite3
import pandas as pd

# Load CSV
df = pd.read_csv("customer_data_collection.csv")

# Connect to SQLite DB
conn = sqlite3.connect("ecommerce.db")
cursor = conn.cursor()

# Drop and recreate customers table
cursor.execute("DROP TABLE IF EXISTS customers")
cursor.execute('''
    CREATE TABLE customers (
        id TEXT PRIMARY KEY,
        age INTEGER,
        gender TEXT,
        location TEXT,
        browsing_history TEXT,
        purchase_history TEXT,
        customer_segment TEXT,
        avg_order_value TEXT,
        holiday TEXT,
        season TEXT
    )
''')


# Use correct column names from the CSV
for _, row in df.iterrows():
    cursor.execute('''
        INSERT INTO customers (id, age, gender, location, browsing_history, purchase_history, customer_segment, avg_order_value, holiday, season  )
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''',
        (
        row['Customer_ID'],  # or row['ID'] depending on your CSV
        row['Age'],
        row['Gender'],
        row['Location'],
        row['Browsing_History'],
        row['Purchase_History'],
        row['Customer_Segment'],
        row['Avg_Order_Value'],
        row['Holiday'],
        row['Season'],
    ))

conn.commit()
conn.close()

print("✅ Customer data inserted successfully.")


✅ Customer data inserted successfully.


In [7]:
import sqlite3
import pandas as pd

# Load CSV
df = pd.read_csv("product_recommendation_data.csv")

# Connect to SQLite DB
conn = sqlite3.connect("ecommerce.db")
cursor = conn.cursor()

# Drop and recreate the products table
cursor.execute("DROP TABLE IF EXISTS products")

cursor.execute('''
    CREATE TABLE products (
        product_id TEXT PRIMARY KEY,
        category TEXT,
        subcategory TEXT,
        price REAL,
        brand TEXT,
        average_rating_of_similar_products REAL,
        product_rating REAL,
        customer_review_sentiment_score REAL,
        holiday TEXT,
        season TEXT,
        geographical_location TEXT,
        similar_product_list TEXT,
        probability_of_recommendation REAL
    )
''')

# Insert product data
for _, row in df.iterrows():
    cursor.execute('''
        INSERT INTO products (
            product_id, category, subcategory, price, brand,
            average_rating_of_similar_products, product_rating,
            customer_review_sentiment_score, holiday, season,
            geographical_location, similar_product_list,
            probability_of_recommendation
        )
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
    ''', (
        row['Product_ID'],
        row['Category'],
        row['Subcategory'],
        row['Price'],
        row['Brand'],
        row['Average_Rating_of_Similar_Products'],
        row['Product_Rating'],
        row['Customer_Review_Sentiment_Score'],
        row['Holiday'],
        row['Season'],
        row['Geographical_Location'],
        row['Similar_Product_List'],
        row['Probability_of_Recommendation']
    ))

conn.commit()
conn.close()

print("✅ Product data inserted successfully.")


✅ Product data inserted successfully.
