# Amazon Customer Segmentation Project
---
This project performs customer segmentation using Amazon product review data.
Tools: Python (Pandas, Scikit-learn, Matplotlib), SQLite for database integration.

In [None]:
!pip install pandas scikit-learn matplotlib seaborn sqlite3

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
import sqlite3

In [None]:
# Load dataset (placeholder sample if real dataset is not available)
url = 'https://raw.githubusercontent.com/datablist/sample-csv-files/main/files/customers/customers-100.csv'
df = pd.read_csv(url)
df.head()

In [None]:
# Data Preprocessing
df.dropna(inplace=True)
df = df.rename(columns={'First Name': 'FirstName', 'Last Name': 'LastName'})
# Assume columns: Age, Annual Income, Spending Score (simulate if not present)
np.random.seed(42)
df['AnnualIncome'] = np.random.randint(30000, 120000, size=len(df))
df['SpendingScore'] = np.random.randint(1, 100, size=len(df))
df.head()

In [None]:
# Feature Selection
X = df[['AnnualIncome', 'SpendingScore']]

In [None]:
# KMeans Clustering
kmeans = KMeans(n_clusters=4, random_state=42)
df['Cluster'] = kmeans.fit_predict(X)
df.head()

In [None]:
# Visualization
plt.figure(figsize=(8,5))
sns.scatterplot(data=df, x='AnnualIncome', y='SpendingScore', hue='Cluster', palette='viridis')
plt.title('Customer Segmentation Clusters')
plt.show()

In [None]:
# Save to SQLite Database
conn = sqlite3.connect('customer_segmentation.db')
df.to_sql('customers', conn, if_exists='replace', index=False)
conn.commit()

In [None]:
# Query database
query = 'SELECT Cluster, COUNT(*) as Count FROM customers GROUP BY Cluster'
pd.read_sql(query, conn)

In [None]:
# Export final CSV
df.to_csv('customer_segmentation_results.csv', index=False)