In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

product = "iphone 14"

# Load data from Excel files
amazon_df = pd.read_excel('iphone_amazon_data.xlsx')  # Replace with the actual file name
flipkart_df = pd.read_excel('iphone_flipkart_data.xlsx')  # Replace with the actual file name

# Extract and preprocess prices for "iPhone 14" from Amazon DataFrame
if 'Product_Name' in amazon_df.columns and 'Product_price' in amazon_df.columns:
    amazon_rows = amazon_df.loc[amazon_df['Product_name'].str.contains('product', case=False)]
    amazon_prices = amazon_rows['Product_rice'].str.replace('[^0-9.]', '', regex=True).astype(float)

# Extract and preprocess prices for "iPhone 14" from Flipkart DataFrame
if 'Product Name' in flipkart_df.columns and 'Price' in flipkart_df.columns:
    flipkart_rows = flipkart_df.loc[flipkart_df['Product Name'].str.contains('iPhone 14', case=False)]
    flipkart_prices = flipkart_rows['Price'].str.replace('[^0-9.]', '', regex=True).astype(float)

# Calculate price differences
price_differences = np.array(amazon_prices) - np.array(flipkart_prices)

# Reshape the price differences array for clustering
price_differences = price_differences.reshape(-1, 1)

# Apply K-Means clustering
num_clusters = 3  # You can adjust the number of clusters as needed
kmeans = KMeans(n_clusters=num_clusters, random_state=0).fit(price_differences)
cluster_labels = kmeans.labels_

# Create a scatter plot to visualize the clusters
plt.scatter(amazon_prices, flipkart_prices, c=cluster_labels, cmap='viridis')
plt.xlabel('Amazon Prices')
plt.ylabel('Flipkart Prices')
plt.title('Price Comparison for iPhone 14 (Clustered)')
plt.show()
