In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import seaborn as sns
from matplotlib import pyplot as plt

In [None]:
df = pd.read_csv('/kaggle/input/credit-card-customer-churn-prediction/Churn_Modelling.csv')

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df = df.drop(["CustomerId", "Surname",'RowNumber'], axis=1)

In [None]:
df

In [None]:
df.duplicated().value_counts()

In [None]:
df.isnull().sum()

In [None]:
df.info()

In [None]:
df['Geography'].value_counts()

In [None]:
df['Age'].value_counts()

In [None]:
df.describe()

In [None]:
df['Exited'].value_counts()

In [None]:
df1 = df[['CreditScore','Age','Tenure','Balance','EstimatedSalary','NumOfProducts']]

In [None]:
df1.corr()

In [None]:

cols =  ['CreditScore','Age','Tenure','Balance','EstimatedSalary','NumOfProducts']
n=1
for i in cols:
    plt.subplot(2,3,n)
    sns.histplot(data = df, x = i, kde = True)
    plt.title(f"Distributions of {i}" , fontsize = 8)
    n = n+1
plt.tight_layout()

In [None]:
df = pd.get_dummies(df,columns=['Geography','Gender'],drop_first=True)

In [None]:
X = df.drop(columns=['Exited'])
y = df['Exited']
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=1)

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
print(X_train_scaled)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset,DataLoader

In [None]:
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
y_test_tensor  = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
X_test_tensor  = torch.tensor(X_test_scaled, dtype=torch.float32)

In [None]:
class data(Dataset):
    def __init__(self,feature,label):
        self.feature = feature
        self.label = label
    def __len__(self):
        return len(self.feature)
    def __getitem__(self, index):
        return(self.feature[index] , self.label[index])

In [None]:
train_dataset = data(X_train_tensor,y_train_tensor)
test_dataset = data(X_test_tensor,y_test_tensor)

In [None]:
train_loader = DataLoader(train_dataset,batch_size = 64, shuffle = True, pin_memory = True)
test_loader = DataLoader(test_dataset,batch_size = 64, shuffle = False, pin_memory = True)

In [None]:
class Mynn(nn.Module):
    def __init__(self,num_feature):
        super().__init__()
        self.model = nn.Sequential(

            nn.Linear(num_feature, 64),
            nn.ReLU(),
            nn.Dropout(p=0.5),

            nn.Linear(64, 16),
            nn.ReLU(),
            nn.Dropout(p=0.3),

            nn.Linear(16, 8),
            nn.ReLU(),

            nn.Linear(8, 1),
            nn.Sigmoid()
        )
    def forward(self, index):
        return self.model(index)

In [None]:
import torch.optim as optim

l_r = 0.2
epochs = 100
model = Mynn(X_train_tensor.shape[1])
loss_fn = nn.BCELoss()

optimizer = optim.SGD(model.parameters(), lr=l_r)


In [None]:
# Training the data
for epoch in range(100):
    total_epoch_loss = 0
    for batch_features,batch_labels in train_loader:

        # Forward Pass
        output = model(batch_features)

        # Loss Calculation
        loss = loss_fn(output,batch_labels.view(-1,1).float())
        # back propogation
        optimizer.zero_grad()
        loss.backward()
        # Optimization
        optimizer.step()
        total_epoch_loss = total_epoch_loss + loss.item()

    avg_epoch_loss = total_epoch_loss/len(train_loader)
    print(f"Epoch : {epoch+1}   Loss : {avg_epoch_loss}")

In [None]:

model.eval()
n_correct = 0
n_samples = 0
with torch.no_grad():
    for features, labels in test_loader:
        outputs = model(features)
        predicted = (outputs > 0.5).float()
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

accuracy = 100.0 * n_correct / n_samples
print(f'Accuracy on the test set: {accuracy:.2f} %')