<a href="https://colab.research.google.com/github/zrghassabi/DataScienceProject/blob/main/practice_E_commerce.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

LSTM-based web traffic forecasting model

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# **Step 1: Simulated Web Traffic Data**
np.random.seed(42)
date_range = pd.date_range(start="2021-01-01", periods=36, freq="M")
web_traffic_data = pd.DataFrame({
    'Date': date_range,
    'Monthly_Visitors': np.random.randint(500000, 1500000, len(date_range)),
    'Ad_Spend': np.random.uniform(50000, 200000, len(date_range)),
    'SEO_Ranking': np.random.randint(1, 50, len(date_range)),
    'Competitor_Issues': np.random.choice([0, 1], len(date_range), p=[0.8, 0.2]),
    'Seasonality': np.sin(np.linspace(0, 2 * np.pi, len(date_range)))
})

# **Step 2: Preprocessing**
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(web_traffic_data[['Monthly_Visitors', 'Ad_Spend', 'SEO_Ranking', 'Competitor_Issues', 'Seasonality']])

# **Step 3: Prepare Data for LSTM**
def create_sequences(data, seq_length=12):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length, 0])
    return np.array(X), np.array(y)

seq_length = 12
X_lstm, y_lstm = create_sequences(data_scaled, seq_length)
X_train, X_test, y_train, y_test = train_test_split(X_lstm, y_lstm, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train_torch = torch.tensor(X_train, dtype=torch.float32)
y_train_torch = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_torch = torch.tensor(X_test, dtype=torch.float32)
y_test_torch = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# **Step 4: Define LSTM Model**
class WebTrafficLSTM(nn.Module):
    def __init__(self, input_size=5, hidden_size=50, num_layers=2):
        super(WebTrafficLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        return self.fc(lstm_out[:, -1, :])

# Initialize model, loss function, and optimizer
lstm_model = WebTrafficLSTM()
criterion = nn.MSELoss()
optimizer = optim.Adam(lstm_model.parameters(), lr=0.001)

# **Step 5: Train the LSTM Model**
epochs = 50
for epoch in range(epochs):
    optimizer.zero_grad()
    y_pred = lstm_model(X_train_torch)
    loss = criterion(y_pred, y_train_torch)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')

# **Step 6: Forecast Next 6 Months**
future_dates = pd.date_range(start=web_traffic_data['Date'].max() + pd.DateOffset(months=1), periods=6, freq="M")
future_data = np.random.rand(6, seq_length, 5)  # Generate random future input data
future_data_torch = torch.tensor(future_data, dtype=torch.float32)
future_predictions = lstm_model(future_data_torch).detach().numpy()

# Rescale predictions
future_predictions_rescaled = scaler.inverse_transform(np.concatenate((future_predictions, np.zeros((6, 4))), axis=1))[:, 0]

# **Step 7: Display Forecasted Traffic Trends**
plt.figure(figsize=(10, 6))
plt.plot(web_traffic_data['Date'], web_traffic_data['Monthly_Visitors'], label="Historical Traffic")
plt.plot(future_dates, future_predictions_rescaled, marker='o', linestyle="--", label="Predicted Traffic (LSTM)")
plt.xlabel("Date")
plt.ylabel("Monthly Visitors")
plt.title("LSTM-Based Web Traffic Forecasting")
plt.legend()
plt.show()

# **Output Forecasted Values**
future_forecast_df = pd.DataFrame({'Date': future_dates, 'Predicted_Monthly_Visitors': future_predictions_rescaled})
print(future_forecast_df)


In [None]:
print(date_range, len(date_range),len(data_scaled))

In [None]:
print(web_traffic_data.head())

In [None]:
data_scaled[:12]




In [None]:
x=data_scaled[0:0+12]
y=data_scaled[0:0+12,0]

print(x,y)

In [None]:
X_lstm.shape, y_lstm.shape

print("X_lstm (first 3 rows):\n", X_lstm[:5])
print("\ny_lstm (first 3 rows):\n", y_lstm[:5])


Optimize LSTM

In [None]:
# **Step 4: Optimized LSTM Model**
class WebTrafficLSTM(nn.Module):
    def __init__(self, input_size=5, hidden_size=100, num_layers=3, dropout=0.2):
        super(WebTrafficLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        return self.fc(lstm_out[:, -1, :])

# Initialize model with optimized hyperparameters
lstm_model = WebTrafficLSTM()
criterion = nn.MSELoss()
optimizer = optim.Adam(lstm_model.parameters(), lr=0.0005, weight_decay=1e-5)

# **Step 5: Train the Optimized LSTM Model**
epochs = 100
for epoch in range(epochs):
    optimizer.zero_grad()
    y_pred = lstm_model(X_train_torch)
    loss = criterion(y_pred, y_train_torch)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')

# **Step 6: Forecast Next 6 Months with Optimized Model**
future_dates = pd.date_range(start=web_traffic_data['Date'].max() + pd.DateOffset(months=1), periods=6, freq="M")
future_data = np.random.rand(6, seq_length, 5)  # Generate random future input data
future_data_torch = torch.tensor(future_data, dtype=torch.float32)
future_predictions = lstm_model(future_data_torch).detach().numpy()

# Rescale predictions
future_predictions_rescaled = scaler.inverse_transform(np.concatenate((future_predictions, np.zeros((6, 4))), axis=1))[:, 0]

# **Step 7: Display Forecasted Traffic Trends**
plt.figure(figsize=(10, 6))
plt.plot(web_traffic_data['Date'], web_traffic_data['Monthly_Visitors'], label="Historical Traffic")
plt.plot(future_dates, future_predictions_rescaled, marker='o', linestyle="--", label="Predicted Traffic (Optimized LSTM)")
plt.xlabel("Date")
plt.ylabel("Monthly Visitors")
plt.title("Optimized LSTM-Based Web Traffic Forecasting")
plt.legend()
plt.show()

# **Output Forecasted Values**
future_forecast_df = pd.DataFrame({'Date': future_dates, 'Predicted_Monthly_Visitors': future_predictions_rescaled})
print(future_forecast_df)


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# **Step 1: Simulated Web Traffic Data**
np.random.seed(42)
date_range = pd.date_range(start="2021-01-01", periods=36, freq="M")
web_traffic_data = pd.DataFrame({
    'Date': date_range,
    'Monthly_Visitors': np.random.randint(500000, 1500000, len(date_range)),
    'Ad_Spend': np.random.uniform(50000, 200000, len(date_range)),
    'SEO_Ranking': np.random.randint(1, 50, len(date_range)),
    'Competitor_Issues': np.random.choice([0, 1], len(date_range), p=[0.8, 0.2]),
    'Seasonality': np.sin(np.linspace(0, 2 * np.pi, len(date_range)))
})

# **Step 2: Preprocessing**
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(web_traffic_data[['Monthly_Visitors', 'Ad_Spend', 'SEO_Ranking', 'Competitor_Issues', 'Seasonality']])

# **Step 3: Prepare Data for Transformer Model**
def create_sequences(data, seq_length=12):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length, 0])
    return np.array(X), np.array(y)

seq_length = 12
X_lstm, y_lstm = create_sequences(data_scaled, seq_length)
X_train, X_test, y_train, y_test = train_test_split(X_lstm, y_lstm, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train_torch = torch.tensor(X_train, dtype=torch.float32)
y_train_torch = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_torch = torch.tensor(X_test, dtype=torch.float32)
y_test_torch = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# **Step 4: Transformer-Based Forecasting Model**
class TransformerModel(nn.Module):
    def __init__(self, input_size=5, d_model=64, nhead=4, num_layers=3, dropout=0.1):
        super(TransformerModel, self).__init__()
        self.encoder = nn.Linear(input_size, d_model)
        self.transformer = nn.Transformer(d_model=d_model, nhead=nhead, num_encoder_layers=num_layers, num_decoder_layers=num_layers, dropout=dropout)
        self.decoder = nn.Linear(d_model, 1)

    def forward(self, x):
        x = self.encoder(x)
        x = self.transformer(x, x)
        return self.decoder(x[:, -1, :])

# Initialize Transformer Model
transformer_model = TransformerModel()
criterion = nn.MSELoss()
optimizer = optim.Adam(transformer_model.parameters(), lr=0.0005, weight_decay=1e-5)

# **Step 5: Train Transformer Model**
epochs = 100
for epoch in range(epochs):
    optimizer.zero_grad()
    y_pred = transformer_model(X_train_torch)
    loss = criterion(y_pred, y_train_torch)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')

# **Step 6: Visualize Attention Weights**
attention_weights = torch.softmax(transformer_model.transformer.encoder.layers[0].self_attn.in_proj_weight, dim=1).detach().numpy()
plt.figure(figsize=(8, 6))
plt.imshow(attention_weights, cmap='viridis', aspect='auto')
plt.colorbar(label='Attention Weight')
plt.xlabel('Input Sequence Position')
plt.ylabel('Attention Head')
plt.title('Visualization of Attention Weights in Transformer Model')
plt.show()

# **Step 7: Forecast Next 6 Months with Transformer Model**
future_dates = pd.date_range(start=web_traffic_data['Date'].max() + pd.DateOffset(months=1), periods=6, freq="M")
future_data = np.random.rand(6, seq_length, 5)  # Generate random future input data
future_data_torch = torch.tensor(future_data, dtype=torch.float32)
future_predictions = transformer_model(future_data_torch).detach().numpy()

# Rescale predictions
future_predictions_rescaled = scaler.inverse_transform(np.concatenate((future_predictions, np.zeros((6, 4))), axis=1))[:, 0]

# **Step 8: Display Forecasted Traffic Trends**
plt.figure(figsize=(10, 6))
plt.plot(web_traffic_data['Date'], web_traffic_data['Monthly_Visitors'], label="Historical Traffic")
plt.plot(future_dates, future_predictions_rescaled, marker='o', linestyle="--", label="Predicted Traffic (Transformer Model)")
plt.xlabel("Date")
plt.ylabel("Monthly Visitors")
plt.title("Transformer-Based Web Traffic Forecasting with Attention")
plt.legend()
plt.show()

# **Output Forecasted Values**
future_forecast_df = pd.DataFrame({'Date': future_dates, 'Predicted_Monthly_Visitors': future_predictions_rescaled})
print(future_forecast_df)


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# **Step 1: Simulated Web Traffic Data**
np.random.seed(42)
date_range = pd.date_range(start="2021-01-01", periods=36, freq="M")
web_traffic_data = pd.DataFrame({
    'Date': date_range,
    'Monthly_Visitors': np.random.randint(500000, 1500000, len(date_range)),
    'Ad_Spend': np.random.uniform(50000, 200000, len(date_range)),
    'SEO_Ranking': np.random.randint(1, 50, len(date_range)),
    'Competitor_Issues': np.random.choice([0, 1], len(date_range), p=[0.8, 0.2]),
    'Seasonality': np.sin(np.linspace(0, 2 * np.pi, len(date_range)))
})

# **Step 2: Preprocessing**
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(web_traffic_data[['Monthly_Visitors', 'Ad_Spend', 'SEO_Ranking', 'Competitor_Issues', 'Seasonality']])

# **Step 3: Prepare Data for Transformer Model**
def create_sequences(data, seq_length=12):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length, 0])
    return np.array(X), np.array(y)

seq_length = 12
X_transformer, y_transformer = create_sequences(data_scaled, seq_length)
X_train, X_test, y_train, y_test = train_test_split(X_transformer, y_transformer, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train_torch = torch.tensor(X_train, dtype=torch.float32)
y_train_torch = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_torch = torch.tensor(X_test, dtype=torch.float32)
y_test_torch = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# **Step 4: Informer-Based Forecasting Model**
class InformerModel(nn.Module):
    def __init__(self, input_size=5, d_model=64, nhead=4, num_layers=3, dropout=0.1):
        super(InformerModel, self).__init__()
        self.encoder = nn.Linear(input_size, d_model)
        self.transformer = nn.Transformer(d_model=d_model, nhead=nhead, num_encoder_layers=num_layers, num_decoder_layers=num_layers, dropout=dropout)
        self.decoder = nn.Linear(d_model, 1)

    def forward(self, x):
        x = self.encoder(x)
        x = self.transformer(x, x)
        return self.decoder(x[:, -1, :])

# Initialize Informer Model
informer_model = InformerModel()
criterion = nn.MSELoss()
optimizer = optim.Adam(informer_model.parameters(), lr=0.0005, weight_decay=1e-5)

# **Step 5: Train Informer Model**
epochs = 100
for epoch in range(epochs):
    optimizer.zero_grad()
    y_pred = informer_model(X_train_torch)
    loss = criterion(y_pred, y_train_torch)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')

# **Step 6: Visualize Attention Weights**
attention_weights = torch.softmax(informer_model.transformer.encoder.layers[0].self_attn.in_proj_weight, dim=1).detach().numpy()
plt.figure(figsize=(8, 6))
plt.imshow(attention_weights, cmap='viridis', aspect='auto')
plt.colorbar(label='Attention Weight')
plt.xlabel('Input Sequence Position')
plt.ylabel('Attention Head')
plt.title('Visualization of Attention Weights in Informer Model')
plt.show()

# **Step 7: Forecast Next 6 Months with Informer Model**
future_dates = pd.date_range(start=web_traffic_data['Date'].max() + pd.DateOffset(months=1), periods=6, freq="M")
future_data = np.random.rand(6, seq_length, 5)  # Generate random future input data
future_data_torch = torch.tensor(future_data, dtype=torch.float32)
future_predictions = informer_model(future_data_torch).detach().numpy()

# Rescale predictions
future_predictions_rescaled = scaler.inverse_transform(np.concatenate((future_predictions, np.zeros((6, 4))), axis=1))[:, 0]

# **Step 8: Display Forecasted Traffic Trends**
plt.figure(figsize=(10, 6))
plt.plot(web_traffic_data['Date'], web_traffic_data['Monthly_Visitors'], label="Historical Traffic")
plt.plot(future_dates, future_predictions_rescaled, marker='o', linestyle="--", label="Predicted Traffic (Informer Model)")
plt.xlabel("Date")
plt.ylabel("Monthly Visitors")
plt.title("Informer-Based Web Traffic Forecasting with Attention")
plt.legend()
plt.show()

# **Output Forecasted Values**
future_forecast_df = pd.DataFrame({'Date': future_dates, 'Predicted_Monthly_Visitors': future_predictions_rescaled})
print(future_forecast_df)


telecom churn prediction

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# **Telecom Customer Churn Prediction Model with LSTM**

# Generate simulated telecom customer data
np.random.seed(42)
num_customers = 5000

telecom_data = pd.DataFrame({
    'Customer_ID': np.arange(1, num_customers + 1),
    'Tenure_Months': np.random.randint(1, 72, num_customers),
    'Monthly_Bill': np.random.uniform(20, 150, num_customers),
    'Total_Usage_GB': np.random.uniform(5, 100, num_customers),
    'Customer_Support_Calls': np.random.randint(0, 10, num_customers),
    'Contract_Type': np.random.choice([0, 1], num_customers, p=[0.6, 0.4]),
    'Payment_Method': np.random.choice([0, 1, 2, 3], num_customers),
    'Churn': np.random.choice([0, 1], num_customers, p=[0.75, 0.25])
})

# **Step 1: Preprocessing & Feature Engineering**
X = telecom_data.drop(columns=['Customer_ID', 'Churn'])
y = telecom_data['Churn']

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert data to PyTorch tensors
X_train_torch = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_torch = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
X_test_torch = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_torch = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

# Reshape for LSTM (batch_size, seq_length, num_features)
X_train_torch = X_train_torch.view(X_train_torch.shape[0], 1, X_train_torch.shape[1])
X_test_torch = X_test_torch.view(X_test_torch.shape[0], 1, X_test_torch.shape[1])

# **Step 2: Define LSTM Model**
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size=50, num_layers=2):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        return torch.sigmoid(self.fc(lstm_out[:, -1, :]))

# Initialize and train the model
input_size = X_train.shape[1]
lstm_model = LSTMModel(input_size)
criterion = nn.BCELoss()
optimizer = optim.Adam(lstm_model.parameters(), lr=0.001)

# **Step 3: Train LSTM Model**
epochs = 50
for epoch in range(epochs):
    optimizer.zero_grad()
    y_pred = lstm_model(X_train_torch)
    loss = criterion(y_pred, y_train_torch)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')

# **Step 4: Evaluate Model**
y_pred_test = lstm_model(X_test_torch).detach().numpy()
y_pred_test = (y_pred_test > 0.5).astype(int)

# Compute accuracy
accuracy = np.mean(y_pred_test.flatten() == y_test.values)
print(f'LSTM Model Accuracy: {accuracy * 100:.2f}%')

# **Step 5: Predict Churn for a New Customer**
def predict_churn_lstm(customer_data_point):
    customer_scaled = scaler.transform([customer_data_point])
    customer_torch = torch.tensor(customer_scaled, dtype=torch.float32).view(1, 1, -1)
    prediction = lstm_model(customer_torch).item()
    return "Churn" if prediction > 0.5 else "Retained"

# Example customer: 24-month tenure, $75 bill, 50GB data usage, 2 support calls, annual contract, PayPal payment
sample_customer = [24, 75, 50, 2, 1, 2]
churn_decision = predict_churn_lstm(sample_customer)
print(f"Churn Prediction for Sample Customer: {churn_decision}")


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# **Telecom Customer Churn Prediction Model with Transformer**

# Generate simulated telecom customer data
np.random.seed(42)
num_customers = 5000

telecom_data = pd.DataFrame({
    'Customer_ID': np.arange(1, num_customers + 1),
    'Tenure_Months': np.random.randint(1, 72, num_customers),
    'Monthly_Bill': np.random.uniform(20, 150, num_customers),
    'Total_Usage_GB': np.random.uniform(5, 100, num_customers),
    'Customer_Support_Calls': np.random.randint(0, 10, num_customers),
    'Contract_Type': np.random.choice([0, 1], num_customers, p=[0.6, 0.4]),
    'Payment_Method': np.random.choice([0, 1, 2, 3], num_customers),
    'Churn': np.random.choice([0, 1], num_customers, p=[0.75, 0.25])
})

# **Step 1: Preprocessing & Feature Engineering**
X = telecom_data.drop(columns=['Customer_ID', 'Churn'])
y = telecom_data['Churn']

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert data to PyTorch tensors
X_train_torch = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_torch = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
X_test_torch = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_torch = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

# Reshape for Transformer (batch_size, seq_length, num_features)
X_train_torch = X_train_torch.view(X_train_torch.shape[0], 1, X_train_torch.shape[1])
X_test_torch = X_test_torch.view(X_test_torch.shape[0], 1, X_test_torch.shape[1])

# **Step 2: Define Transformer Model**
class TransformerModel(nn.Module):
    def __init__(self, input_size, d_model=64, nhead=4, num_layers=3, dropout=0.1):
        super(TransformerModel, self).__init__()
        self.encoder = nn.Linear(input_size, d_model)
        self.transformer = nn.Transformer(d_model=d_model, nhead=nhead, num_encoder_layers=num_layers, num_decoder_layers=num_layers, dropout=dropout)
        self.decoder = nn.Linear(d_model, 1)

    def forward(self, x):
        x = self.encoder(x)
        x = self.transformer(x, x)
        return torch.sigmoid(self.decoder(x[:, -1, :]))

# Initialize and train the model
input_size = X_train.shape[1]
transformer_model = TransformerModel(input_size)
criterion = nn.BCELoss()
optimizer = optim.Adam(transformer_model.parameters(), lr=0.001)

# **Step 3: Train Transformer Model**
epochs = 50
for epoch in range(epochs):
    optimizer.zero_grad()
    y_pred = transformer_model(X_train_torch)
    loss = criterion(y_pred, y_train_torch)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')

# **Step 4: Evaluate Model**
y_pred_test = transformer_model(X_test_torch).detach().numpy()
y_pred_test = (y_pred_test > 0.5).astype(int)

# Compute accuracy
accuracy = np.mean(y_pred_test.flatten() == y_test.values)
print(f'Transformer Model Accuracy: {accuracy * 100:.2f}%')

# **Step 5: Predict Churn for a New Customer**
def predict_churn_transformer(customer_data_point):
    customer_scaled = scaler.transform([customer_data_point])
    customer_torch = torch.tensor(customer_scaled, dtype=torch.float32).view(1, 1, -1)
    prediction = transformer_model(customer_torch).item()
    return "Churn" if prediction > 0.5 else "Retained"

# Example customer: 24-month tenure, $75 bill, 50GB data usage, 2 support calls, annual contract, PayPal payment
sample_customer = [24, 75, 50, 2, 1, 2]
churn_decision = predict_churn_transformer(sample_customer)
print(f"Churn Prediction for Sample Customer: {churn_decision}")


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# **Telecom Customer Churn Prediction Model with Optimized Transformer & API Deployment**

# Generate simulated telecom customer data
np.random.seed(42)
num_customers = 5000

telecom_data = pd.DataFrame({
    'Customer_ID': np.arange(1, num_customers + 1),
    'Tenure_Months': np.random.randint(1, 72, num_customers),
    'Monthly_Bill': np.random.uniform(20, 150, num_customers),
    'Total_Usage_GB': np.random.uniform(5, 100, num_customers),
    'Customer_Support_Calls': np.random.randint(0, 10, num_customers),
    'Contract_Type': np.random.choice([0, 1], num_customers, p=[0.6, 0.4]),
    'Payment_Method': np.random.choice([0, 1, 2, 3], num_customers),
    'Churn': np.random.choice([0, 1], num_customers, p=[0.75, 0.25])
})

# **Step 1: Preprocessing & Feature Engineering**
X = telecom_data.drop(columns=['Customer_ID', 'Churn'])
y = telecom_data['Churn']

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert data to PyTorch tensors
X_train_torch = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_torch = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
X_test_torch = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_torch = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

# Reshape for Transformer (batch_size, seq_length, num_features)
X_train_torch = X_train_torch.view(X_train_torch.shape[0], 1, X_train_torch.shape[1])
X_test_torch = X_test_torch.view(X_test_torch.shape[0], 1, X_test_torch.shape[1])

# **Step 2: Optimized Transformer Model**
class OptimizedTransformerModel(nn.Module):
    def __init__(self, input_size, d_model=128, nhead=8, num_layers=4, dropout=0.2):
        super(OptimizedTransformerModel, self).__init__()
        self.encoder = nn.Linear(input_size, d_model)
        self.transformer = nn.Transformer(d_model=d_model, nhead=nhead, num_encoder_layers=num_layers, num_decoder_layers=num_layers, dropout=dropout)
        self.decoder = nn.Linear(d_model, 1)

    def forward(self, x):
        x = self.encoder(x)
        x = self.transformer(x, x)
        return torch.sigmoid(self.decoder(x[:, -1, :]))

# Initialize Optimized Transformer Model
input_size = X_train.shape[1]
transformer_model = OptimizedTransformerModel(input_size)
criterion = nn.BCELoss()
optimizer = optim.AdamW(transformer_model.parameters(), lr=0.0005, weight_decay=1e-5)

# **Step 3: Train Optimized Transformer Model**
epochs = 100
for epoch in range(epochs):
    optimizer.zero_grad()
    y_pred = transformer_model(X_train_torch)
    loss = criterion(y_pred, y_train_torch)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')

# **Step 4: Evaluate Model**
y_pred_test = transformer_model(X_test_torch).detach().numpy()
y_pred_test = (y_pred_test > 0.5).astype(int)

# Compute accuracy
accuracy = np.mean(y_pred_test.flatten() == y_test.values)
print(f'Optimized Transformer Model Accuracy: {accuracy * 100:.2f}%')

# **Step 5: API Deployment (Flask)**
from flask import Flask, request, jsonify

app = Flask(__name__)

@app.route('/predict', methods=['POST'])
def predict():
    data = request.json['customer_data']
    customer_scaled = scaler.transform([data])
    customer_torch = torch.tensor(customer_scaled, dtype=torch.float32).view(1, 1, -1)
    prediction = transformer_model(customer_torch).item()
    result = "Churn" if prediction > 0.5 else "Retained"
    return jsonify({'prediction': result})

if __name__ == '__main__':
    app.run(debug=True)


In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt

# **Step 1: Generate Simulated Supply Chain Data**
np.random.seed(42)
num_samples = 5000

data = pd.DataFrame({
    'Warehouse_ID': np.random.randint(1, 50, num_samples),
    'Product_Category': np.random.randint(1, 20, num_samples),
    'Stock_Level': np.random.randint(10, 1000, num_samples),
    'Lead_Time_Days': np.random.randint(1, 30, num_samples),
    'Supplier_Reliability': np.random.uniform(0.7, 1.0, num_samples),
    'Demand_Volatility': np.random.uniform(0.1, 0.9, num_samples),
    'Shipping_Cost': np.random.uniform(5, 50, num_samples),
    'Restocking_Quantity': np.random.randint(10, 500, num_samples)
})

# **Step 2: Preprocessing & Feature Engineering**
X = data.drop(columns=['Restocking_Quantity'])
y = data['Restocking_Quantity']

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# **Step 3: Train Machine Learning Model (Random Forest Regressor)**
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)

# Predictions
y_pred_rf = rf_model.predict(X_test_scaled)

# **Step 4: Evaluate Model Performance**
rf_mae = np.mean(np.abs(y_test - y_pred_rf))
print(f'Mean Absolute Error (MAE): {rf_mae:.2f}')

# **Step 5: Feature Importance Analysis**
feature_importance_rf = rf_model.feature_importances_
feature_names = X.columns

# Plot Feature Importance
plt.figure(figsize=(8, 6))
plt.barh(feature_names, feature_importance_rf, color='skyblue')
plt.xlabel("Feature Importance Score")
plt.ylabel("Features")
plt.title("Feature Importance in Supply Chain Optimization (Random Forest)")
plt.gca().invert_yaxis()
plt.show()

# **Step 6: Predict Restocking Needs for a New Warehouse Order**
def predict_restocking(warehouse_data):
    warehouse_scaled = scaler.transform([warehouse_data])
    prediction = rf_model.predict(warehouse_scaled)
    return int(prediction[0])

# Example warehouse input: Warehouse 10, Category 5, 200 stock level, 15 days lead time, reliability 0.9, volatility 0.3, shipping cost 25
sample_warehouse = [10, 5, 200, 15, 0.9, 0.3, 25]
restocking_quantity = predict_restocking(sample_warehouse)
print(f"Predicted Restocking Quantity: {restocking_quantity}")


wayfair pricing case Study

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Step 1: Data Simulation (since we don't have real Wayfair data)
np.random.seed(42)
data = {
    'product_id': np.arange(1000),
    'price': np.random.uniform(10, 500, 1000),
    'competitor_price': np.random.uniform(8, 520, 1000),
    'demand_index': np.random.uniform(0.5, 1.5, 1000),
    'inventory_level': np.random.randint(50, 500, 1000),
    'seasonality_index': np.random.uniform(0.8, 1.2, 1000)
}

# Generating sales based on features with some noise
sales = (2000 / data['price']) * data['demand_index'] * data['seasonality_index'] + np.random.normal(0, 10, 1000)
data['units_sold'] = sales.astype(int)

df = pd.DataFrame(data)

# Step 2: Exploratory Data Analysis
plt.figure(figsize=(10, 6))
sns.scatterplot(x='price', y='units_sold', data=df)
plt.title('Price vs Units Sold')
plt.show()

# Step 3: Model Development
X = df[['price', 'competitor_price', 'demand_index', 'inventory_level', 'seasonality_index']]
y = df['units_sold']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Baseline Model: Linear Regression
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

# Advanced Model: XGBoost
xgb_model = XGBRegressor(objective='reg:squarederror', n_estimators=100)
xgb_model.fit(X_train, y_train)

# Step 4: Evaluation
lr_preds = lr_model.predict(X_test)
xgb_preds = xgb_model.predict(X_test)

print("Linear Regression R^2:", r2_score(y_test, lr_preds))
print("XGBoost R^2:", r2_score(y_test, xgb_preds))

# Step 5: Visualization of Predictions
plt.figure(figsize=(12, 6))
plt.plot(y_test.values, label='Actual Units Sold')
plt.plot(lr_preds, label='Linear Regression Predictions')
plt.plot(xgb_preds, label='XGBoost Predictions')
plt.legend()
plt.title('Actual vs Predicted Units Sold')
plt.show()


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import xgboost as xgb

# 1. Data Simulation
np.random.seed(42)
data = {
    'product_id': np.arange(1000),
    'base_price': np.random.uniform(10, 500, 1000),
    'demand': np.random.randint(50, 200, 1000),
    'competitor_price': np.random.uniform(8, 520, 1000),
    'inventory_level': np.random.randint(10, 300, 1000),
    'seasonality': np.random.choice([0, 1], 1000),
    'sales': np.random.randint(30, 250, 1000)
}
df = pd.DataFrame(data)

# 2. Exploratory Data Analysis (EDA)
plt.figure(figsize=(10, 6))
sns.histplot(df['sales'], kde=True)
plt.title('Sales Distribution')
plt.show()

corr = df.corr()
plt.figure(figsize=(8, 6))
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()

# 3. Data Preparation
X = df[['base_price', 'demand', 'competitor_price', 'inventory_level', 'seasonality']]
y = df['sales']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Linear Regression Model
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
y_pred_lr = lr_model.predict(X_test)

# Evaluation
mse_lr = mean_squared_error(y_test, y_pred_lr)
r2_lr = r2_score(y_test, y_pred_lr)
print(f'Linear Regression - MSE: {mse_lr}, R^2: {r2_lr}')

# 5. XGBoost Model
xgb_model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100)
xgb_model.fit(X_train, y_train)
y_pred_xgb = xgb_model.predict(X_test)

# Evaluation
mse_xgb = mean_squared_error(y_test, y_pred_xgb)
r2_xgb = r2_score(y_test, y_pred_xgb)
print(f'XGBoost - MSE: {mse_xgb}, R^2: {r2_xgb}')

# 6. Visualization of Predictions
plt.figure(figsize=(10, 6))
plt.plot(y_test.values, label='Actual Sales', marker='o')
plt.plot(y_pred_lr, label='Predicted Sales (Linear Regression)', linestyle='--')
plt.plot(y_pred_xgb, label='Predicted Sales (XGBoost)', linestyle='-.')
plt.legend()
plt.title('Actual vs Predicted Sales')
plt.show()


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from scipy.stats import skew

# 1. Data Preparation & Exploration
df = pd.read_csv('wayfair_pricing_data.csv')  # Placeholder file name

# Display first few rows
print(df.head())

# Summary statistics
print(df.describe())

# Check for missing values
print(df.info())

# Visualize data distribution
plt.figure(figsize=(10, 6))
df.hist(bins=30, figsize=(15, 10))
plt.tight_layout()
plt.show()

# 2. Handling Skewed Data
# Check skewness
skewness = df.skew()
print("Skewness of features:\n", skewness)

# Apply log transformation to skewed data
for col in df.select_dtypes(include=[np.number]).columns:
    if abs(skew(df[col])) > 0.75:
        df[col] = np.log1p(df[col])

# Visualize after transformation
plt.figure(figsize=(10, 6))
df.hist(bins=30, figsize=(15, 10))
plt.tight_layout()
plt.show()

# 3. Feature Selection & Splitting
features = ['competitor_price', 'demand', 'inventory_level', 'seasonality']
target = 'price'

X = df[features]
y = df[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Train Shape:", X_train.shape)
print("Test Shape:", X_test.shape)

# 4. Model Training
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Predictions
y_train_pred = rf.predict(X_train)
y_pred = rf.predict(X_test)

# Model Performance
print("Train R^2:", r2_score(y_train, y_train_pred))
print("Test R^2:", r2_score(y_test, y_pred))

# 5. Feature Importance Visualization
importances = rf.feature_importances_
plt.figure(figsize=(8, 5))
sns.barplot(x=importances, y=features)
plt.title('Feature Importance')
plt.show()

# 6. Error Analysis
errors = y_test - y_pred
plt.figure(figsize=(8, 5))
sns.histplot(errors, kde=True)
plt.title('Prediction Errors Distribution')
plt.show()


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import PowerTransformer

# Step 1: Generate Synthetic Data
np.random.seed(42)

# Creating skewed price data
prices = np.random.exponential(scale=100, size=1000)

# Additional features
product_age = np.random.randint(1, 365, size=1000)  # Product age in days
ratings = np.random.uniform(1, 5, size=1000)        # Customer ratings
sales_volume = np.random.poisson(lam=20, size=1000) # Sales volume

# Combine into a DataFrame
data = pd.DataFrame({
    'Price': prices,
    'Product_Age': product_age,
    'Ratings': ratings,
    'Sales_Volume': sales_volume
})

# Step 2: Exploratory Data Analysis
plt.figure(figsize=(12, 6))
sns.histplot(data['Price'], kde=True)
plt.title('Original Price Distribution')
plt.show()

# Check skewness
print(f"Skewness of Price: {skew(data['Price'])}")

# Step 3: Handling Skewed Data
pt = PowerTransformer(method='yeo-johnson')
data['Price_Transformed'] = pt.fit_transform(data[['Price']])

plt.figure(figsize=(12, 6))
sns.histplot(data['Price_Transformed'], kde=True)
plt.title('Transformed Price Distribution')
plt.show()

# Step 4: Preparing Data for Modeling
X = data[['Product_Age', 'Ratings', 'Sales_Volume']]
y = data['Price_Transformed']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Building the Model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Step 6: Predictions and Evaluation
y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Visualization of Predictions
plt.figure(figsize=(12, 6))
plt.scatter(y_test, y_pred, alpha=0.6)
plt.xlabel('Actual Prices (Transformed)')
plt.ylabel('Predicted Prices (Transformed)')
plt.title('Actual vs Predicted Prices')
plt.show()


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from scipy.optimize import minimize

# Step 1: Generate Synthetic Data
np.random.seed(42)
data = pd.DataFrame({
    'base_price': np.random.uniform(20, 100, 1000),
    'demand': np.random.uniform(100, 1000, 1000),
    'competitor_price': np.random.uniform(15, 105, 1000),
    'inventory_levels': np.random.randint(50, 500, 1000),
    'sales': np.random.randint(50, 1000, 1000)
})

# Step 2: Target Variable (Revenue)
data['revenue'] = data['base_price'] * data['sales']

# Step 3: Train Demand Prediction Model
features = ['base_price', 'competitor_price', 'inventory_levels', 'sales']
X = data[features]
y = data['demand']

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X, y)

# Step 4: Price Optimization Function
def optimize_price(base_price, competitor_price, inventory, sales):
    def revenue_function(price):
        predicted_demand = model.predict([[price, competitor_price, inventory, sales]])[0]
        return -price * predicted_demand  # Negative for minimization

    result = minimize(revenue_function, x0=base_price, bounds=[(10, 200)])
    return result.x[0] if result.success else base_price

# Step 5: Apply Optimization
optimized_prices = []
for i in range(len(data)):
    opt_price = optimize_price(
        data.loc[i, 'base_price'],
        data.loc[i, 'competitor_price'],
        data.loc[i, 'inventory_levels'],
        data.loc[i, 'sales']
    )
    optimized_prices.append(opt_price)

# Step 6: Compare Base vs Optimized Prices
data['optimized_price'] = optimized_prices

plt.figure(figsize=(10, 6))
plt.plot(data['base_price'][:50], label='Base Price', marker='o')
plt.plot(data['optimized_price'][:50], label='Optimized Price', marker='x')
plt.title('Base Price vs Optimized Price')
plt.xlabel('Sample Index')
plt.ylabel('Price')
plt.legend()
plt.show()


In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
import matplotlib.pyplot as plt

# 1. Synthetic Data Generation
np.random.seed(42)

# Generate customers, products, and purchase history
customers = [f'Customer_{i}' for i in range(1, 11)]
products = [f'Product_{i}' for i in range(1, 11)]
categories = ['Furniture', 'Decor', 'Appliances', 'Outdoor', 'Lighting']

# Purchase history
purchase_history = pd.DataFrame({
    'CustomerID': np.random.choice(customers, 50),
    'ProductID': np.random.choice(products, 50),
    'Rating': np.random.randint(1, 6, 50),
    'Category': np.random.choice(categories, 50)
})

# Display sample data
print(purchase_history.head())

# 2. Collaborative Filtering (using Surprise library)
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(purchase_history[['CustomerID', 'ProductID', 'Rating']], reader)
trainset, testset = train_test_split(data, test_size=0.2)

algo = SVD()
algo.fit(trainset)

predictions = algo.test(testset)

# 3. Content-Based Filtering
# Create a TF-IDF matrix for product categories
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(purchase_history['Category'])

# Compute cosine similarity
cos_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Recommend products based on similar categories
idx = 0  # Example for the first product
similar_indices = cos_sim[idx].argsort()[-4:][::-1]  # Top 3 similar
print("\nContent-Based Recommendations:")
print(purchase_history.iloc[similar_indices][['ProductID', 'Category']])

# 4. Hybrid Recommendation (Combining both)
def hybrid_recommendation(customer_id):
    user_ratings = purchase_history[purchase_history['CustomerID'] == customer_id]
    if user_ratings.empty:
        return "No purchase history available."

    last_product = user_ratings.iloc[-1]['ProductID']
    similar_indices = cos_sim[purchase_history[purchase_history['ProductID'] == last_product].index[0]].argsort()[-4:][::-1]

    collaborative_preds = [(algo.predict(customer_id, prod_id).est, prod_id)
                            for prod_id in purchase_history['ProductID'].unique()]
    collaborative_preds.sort(reverse=True)

    recommendations = set([prod for _, prod in collaborative_preds[:3]])
    recommendations.update(purchase_history.iloc[similar_indices]['ProductID'].values[:3])

    return list(recommendations)

# Example for Customer_1
print("\nHybrid Recommendations for Customer_1:")
print(hybrid_recommendation('Customer_1'))


In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy.sparse import csr_matrix

# Synthetic Data
users = ['User1', 'User2', 'User3', 'User4', 'User5']
products = ['Chair', 'Table', 'Lamp', 'Sofa', 'Bed']

# User-Item Ratings
ratings_data = {
    'User': np.random.choice(users, 20),
    'Product': np.random.choice(products, 20),
    'Rating': np.random.randint(1, 6, 20)
}
ratings_df = pd.DataFrame(ratings_data)
print("Ratings Data:")
print(ratings_df)

# Content-Based Filtering (CBF) Data
product_descriptions = {
    'Chair': 'Comfortable wooden chair with cushion',
    'Table': 'Large dining table with modern design',
    'Lamp': 'LED lamp with adjustable brightness',
    'Sofa': 'Cozy sofa with soft fabric',
    'Bed': 'King size bed with memory foam mattress'
}
products_df = pd.DataFrame(list(product_descriptions.items()), columns=['Product', 'Description'])

# Collaborative Filtering (CF)
user_item_matrix = ratings_df.pivot_table(index='User', columns='Product', values='Rating').fillna(0)
user_similarity = cosine_similarity(user_item_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=users, columns=users)
print("\nUser Similarity Matrix:")
print(user_similarity_df)

# Content-Based Filtering (CBF)
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(products_df['Description'])
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

cbf_recommendations = {}
for idx, product in enumerate(products):
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:3]
    recommended_products = [products[i[0]] for i in sim_scores]
    cbf_recommendations[product] = recommended_products

print("\nContent-Based Recommendations:")
for product, recs in cbf_recommendations.items():
    print(f"{product}: {recs}")

# Hybrid Recommendation System
hybrid_recommendations = {}
for user in users:
    cf_scores = user_item_matrix.loc[user].sort_values(ascending=False)
    cbf_scores = pd.Series(np.random.rand(len(products)), index=products)
    hybrid_scores = cf_scores.add(cbf_scores, fill_value=0).sort_values(ascending=False)
    hybrid_recommendations[user] = hybrid_scores.head(2).index.tolist()

print("\nHybrid Recommendations:")
for user, recs in hybrid_recommendations.items():
    print(f"{user}: {recs}")


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve

# 1. Generate Synthetic Data
np.random.seed(42)
n_customers = 1000
data = {
    'CustomerID': np.arange(1, n_customers + 1),
    'Age': np.random.randint(18, 70, size=n_customers),
    'Tenure': np.random.randint(1, 60, size=n_customers),
    'TotalSpend': np.random.uniform(100, 10000, size=n_customers),
    'PurchaseFrequency': np.random.poisson(2, size=n_customers),
    'LastPurchaseDays': np.random.randint(0, 365, size=n_customers),
    'IsChurn': np.random.choice([0, 1], size=n_customers, p=[0.8, 0.2])
}
df = pd.DataFrame(data)

# 2. Exploratory Data Analysis
plt.figure(figsize=(8, 4))
sns.countplot(x='IsChurn', data=df)
plt.title('Churn Distribution')
plt.show()

# Correlation Matrix
plt.figure(figsize=(10, 6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title('Feature Correlation')
plt.show()

# 3. Data Preprocessing
X = df[['Age', 'Tenure', 'TotalSpend', 'PurchaseFrequency', 'LastPurchaseDays']]
y = df['IsChurn']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# 4. Model Building
# Logistic Regression
log_model = LogisticRegression()
log_model.fit(X_train, y_train)
y_pred_log = log_model.predict(X_test)

# Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

# 5. Evaluation Metrics
print("Logistic Regression Results:")
print(classification_report(y_test, y_pred_log))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_log))

print("\nRandom Forest Results:")
print(classification_report(y_test, y_pred_rf))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_rf))

# ROC Curve
fpr, tpr, _ = roc_curve(y_test, rf_model.predict_proba(X_test)[:, 1])
plt.figure()
plt.plot(fpr, tpr, label='Random Forest (AUC = {:.2f})'.format(roc_auc_score(y_test, y_pred_rf)))
plt.plot([0, 1], [0, 1], linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.show()


In [None]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Step 1: Create synthetic data
data = {
    'query': [
        'best laptop for programming',
        'cheap smartphones with good camera',
        'top running shoes for marathon',
        'best coffee machines for home',
        'affordable noise cancelling headphones'
    ],
    'document': [
        'laptop with i7 processor and 16GB RAM, ideal for programming',
        'smartphone under $300 with 48MP camera',
        'running shoes designed for marathon with breathable material',
        'coffee machine with grinder and milk frother for home use',
        'budget headphones with active noise cancellation'
    ],
    'clicked': [1, 1, 1, 1, 0]  # 1 = Clicked (relevant), 0 = Not clicked
}

# Step 2: Data Preprocessing
df = pd.DataFrame(data)
df['combined'] = df['query'] + " " + df['document']

# Step 3: Feature Extraction using TF-IDF
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['combined'])
y = df['clicked']

# Step 4: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Model Training
model = LogisticRegression()
model.fit(X_train, y_train)

# Step 6: Predictions and Evaluation
y_pred = model.predict(X_test)

# Results
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


In [None]:
import numpy as np
import pandas as pd
from collections import defaultdict
from sklearn.feature_extraction.text import TfidfVectorizer
import networkx as nx

# Simulated web pages
data = {
    'url': ['page1.com', 'page2.com', 'page3.com'],
    'content': [
        'Machine learning is the future of technology.',
        'Deep learning drives many AI applications.',
        'Data science includes machine learning and AI.'
    ],
    'links': [['page2.com'], ['page1.com', 'page3.com'], ['page1.com']]
}

# Creating a DataFrame
web_pages = pd.DataFrame(data)

# Indexing using TF-IDF
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(web_pages['content'])

# PageRank Calculation
graph = nx.DiGraph()
for i, links in enumerate(web_pages['links']):
    for link in links:
        graph.add_edge(web_pages['url'][i], link)

pagerank_scores = nx.pagerank(graph)

# Query Processing
def search(query):
    query_vec = vectorizer.transform([query])
    scores = (tfidf_matrix * query_vec.T).toarray().flatten()
    results = pd.DataFrame({
        'url': web_pages['url'],
        'relevance_score': scores,
        'pagerank': [pagerank_scores[url] for url in web_pages['url']]
    })
    # Combining relevance score and PageRank
    results['final_score'] = results['relevance_score'] * 0.7 + results['pagerank'] * 0.3
    return results.sort_values(by='final_score', ascending=False)

# Example Query
results = search('machine learning')
print(results)


In [None]:
import numpy as np
import faiss

# Step 1: Generate Synthetic Data (Simulating Product Features)
d = 64  # Dimension of the feature vectors (like embedding size)
n = 10000  # Number of data points (products)

# Random data representing product embeddings
data = np.random.random((n, d)).astype('float32')

# Step 2: Build the FAISS Index
index = faiss.IndexFlatL2(d)  # L2 distance (Euclidean distance) for similarity
index.add(data)  # Add the product data to the index

# Step 3: Query Similar Items
query_vector = np.random.random((1, d)).astype('float32')  # A random product to search for

# Perform the search (returns distances and indices of the closest items)
distances, indices = index.search(query_vector, k=5)  # Find top 5 similar items

# Step 4: Display the Results
print("Query Vector:", query_vector)
print("Indices of Similar Items:", indices)
print("Distances to Similar Items:", distances)

In [None]:
import numpy as np
import faiss
from sklearn.datasets import make_blobs
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Step 1: Simulate Synthetic Data
np.random.seed(42)
X, _ = make_blobs(n_samples=100, n_features=128, centers=5)  # Simulating image features
X = StandardScaler().fit_transform(X)  # Normalize features

# Step 2: Build FAISS Index
d = X.shape[1]  # Feature dimension
index = faiss.IndexFlatL2(d)  # L2 distance index
index.add(X)  # Add data to index

# Step 3: Query Example
query_vector = X[0].reshape(1, -1)  # Using the first vector as a query
k = 5  # Retrieve top 5 similar items
distances, indices = index.search(query_vector, k)

# Step 4: Display Results
print("Query Vector:", query_vector)
print("Top 5 Similar Items (Indices):", indices)
print("Distances:", distances)

# Visualization
plt.scatter(X[:, 0], X[:, 1], color='gray')
plt.scatter(query_vector[:, 0], query_vector[:, 1], color='red', label='Query')
plt.scatter(X[indices[0], 0], X[indices[0], 1], color='blue', label='Similar Items')
plt.legend()
plt.title('Visual Search Using FAISS')
plt.show()


In [None]:
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.metrics.pairwise import cosine_similarity
import faiss
import torch
import torchvision.models as models
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

# 1. Data Preparation (Synthetic Example)
product_ids = [f'Product_{i}' for i in range(10)]
product_images = [np.random.randint(0, 255, (224, 224, 3), dtype=np.uint8) for _ in range(10)]

# 2. Preprocessing
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 3. Feature Extraction Using Pretrained ResNet
model = models.resnet50(pretrained=True)
model = torch.nn.Sequential(*list(model.children())[:-1])  # Remove final classification layer
model.eval()

# Extract features
def extract_features(images):
    features = []
    for img in images:
        img_tensor = transform(img).unsqueeze(0)
        with torch.no_grad():
            feature = model(img_tensor).squeeze().numpy()
        features.append(feature)
    return np.array(features)

features = extract_features(product_images)

# 4. Similarity Search with FAISS
feature_dim = features.shape[1]
index = faiss.IndexFlatL2(feature_dim)
index.add(features)

# 5. Query Image
query_image = np.random.randint(0, 255, (224, 224, 3), dtype=np.uint8)
query_feature = extract_features([query_image])

# 6. Retrieve Similar Products
distances, indices = index.search(query_feature, k=3)

# 7. Display Results
plt.figure(figsize=(10, 4))
plt.subplot(1, 4, 1)
plt.imshow(query_image)
plt.title('Query Image')
plt.axis('off')

for i, idx in enumerate(indices[0]):
    plt.subplot(1, 4, i + 2)
    plt.imshow(product_images[idx])
    plt.title(f'Similar {i+1}')
    plt.axis('off')

plt.show()
