In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestRegressor

# Load data
data = pd.read_csv('sales_data.csv')

# Data preprocessing
data.fillna(0, inplace=True)
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data.drop(columns=['product_id', 'sales']))

# Clustering
kmeans = KMeans(n_clusters=5, random_state=42)
data['cluster'] = kmeans.fit_predict(scaled_data)

# Identify non-selling products
non_selling = data[data['sales'] == 0]

# Predictive modeling
selling = data[data['sales'] > 0]
X = selling.drop(columns=['product_id', 'sales'])
y = selling['sales']

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X, y)

# Forecast sales for non-selling products
X_non_selling = non_selling.drop(columns=['product_id', 'sales'])
non_selling['predicted_sales'] = model.predict(X_non_selling)

# Suggested order quantity
non_selling['must_sell_qty'] = non_selling['predicted_sales'] * 1.2  # Example factor

# Output results
print(non_selling[['product_id', 'predicted_sales', 'must_sell_qty']])
