In [24]:
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import hstack
from fuzzywuzzy import process

In [25]:
df=pd.read_csv(r"C:\Users\MY\Desktop\Capstone\skincare_preprocessed.csv")

In [26]:
# Step 1: TF-IDF Vectorization on 'title'
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(df['Product_new'])

In [27]:
# Step 2: Normalize numerical features ('price', 'rating')
scaler = StandardScaler()
numeric_features = scaler.fit_transform(df[['Price', 'Rating']])

In [28]:
# Step 3: Encode categorical feature ('product_type')
encoder = OneHotEncoder()
categorical_features = encoder.fit_transform(df[['Product Type']])

In [29]:
# Step 4: Combine all features into a single feature matrix
combined_features = hstack([tfidf_matrix, numeric_features, categorical_features])

In [30]:
# Step 5: Dimensionality Reduction using PCA
pca = PCA(n_components=500)  # Reduce to 50 dimensions (adjust based on variance explained)
reduced_features = pca.fit_transform(combined_features.toarray())


In [31]:
# Step 6: Clustering with KMeans
num_clusters = 10  # Adjust based on dataset size and variety
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
clusters = kmeans.fit_predict(reduced_features)

# Add cluster labels to the DataFrame
df['Cluster'] = clusters


In [38]:

# Enhanced Recommendation System Function with Clustering
def recommend_products_with_clustering(product_title, num_recommendations=10):
    try:
        # Find the closest match for the product title using fuzzy matching
        #closest_match = process.extractOne(product_title, df['Product_new'])[0]
        closest_match=product_title
        product_idx = df[df['Product_new'] == closest_match].index[0]
        
        # Find the cluster of the queried product
        product_cluster = df.loc[product_idx, 'Cluster']
        
        # Filter products in the same cluster
        cluster_products = df[df['Cluster'] == product_cluster]
        
        # Compute cosine similarity within the cluster
        cluster_features = reduced_features[df['Cluster'] == product_cluster]
        query_features = reduced_features[product_idx].reshape(1, -1)
        cluster_sim_scores = cosine_similarity(query_features, cluster_features)
        
        # Rank products within the cluster by similarity
        sim_scores = list(enumerate(cluster_sim_scores[0]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        
        # Get the top N recommendations (excluding the queried product)
        recommended_indices = [cluster_products.index[idx] for idx, score in sim_scores if idx != product_idx][:num_recommendations]
        
        # Return the recommended products
        return df.loc[recommended_indices]
    except IndexError:
        return "Product title not found in the dataset."



In [41]:
# Test the recommendation system
product_to_search = "Fruit Brightening Brightens  Soften Skin 100 soap freeMen  Women Face Wash"
recommended_products = recommend_products_with_clustering(product_to_search)

print(recommended_products)

                                                Product  Price  Rating  \
271   DR.RASHEL VITAMIN C FACE WASH BRIGHTENS THE SK...    160     4.2   
50    BIOTIQUE Fruit Brightening | Ayurvedic and Org...    151     4.2   
1008  BIOTIQUE Fruit Brightening | Ayurvedic and Org...    151     4.2   
188      NIVEA Milk Delights Caring Rosewater Face Wash    207     4.2   
729   OZiva Bioactive VitaminC30 FaceCleanser for Sk...    199     4.3   
282   Leeford meglow Fairness  70g x 2 = 170 g Face ...    160     4.2   
608   Estonia Refining  With White Crystallites and ...    233     4.3   
888          acnestar Unisex  - (50 Gr, 3 Pc) Face Wash    266     4.2   
40                           BEARDO Ultraglow Face Wash    203     4.2   
208                            NIVEA All-In-1 Face Wash    220     4.3   

          Brand Product Type  Packing  \
271   DR.RASHEL    Face Wash      100   
50     BIOTIQUE    Face Wash      200   
1008   BIOTIQUE    Face Wash      150   
188       NIVEA    Fa

In [18]:
pca.explained_variance_.sum()

3.5342819390654885

In [19]:
pca.explained_variance_ratio_.sum()

0.9060733516853876