In [None]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch.nn as nn
import torch.nn.functional as F

In [None]:
df = pd.read_csv('/content/drive/MyDrive/creditcard.csv')
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0




In [None]:
df.columns = map(str.lower, df.columns)
df.rename(columns={'class': 'label'}, inplace=True)
df['log10_amount'] = np.log10(df.amount + 0.00001)
df = df[[col for col in df if col not in ['label', 'log10_amount']] + ['log10_amount', 'label']]


In [None]:
X = df.drop('label', axis=1).values
y = df['label'].values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

In [None]:
class CreditCardDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Create datasets and data loaders
train_dataset = CreditCardDataset(X_train_tensor, y_train_tensor)
test_dataset = CreditCardDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False)

In [None]:
class Autoencoder(nn.Module):
    def __init__(self, input_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU(),
            nn.Linear(8, 4),
            nn.ReLU(),
            nn.Linear(4, 2)
        )
        self.decoder = nn.Sequential(
            nn.Linear(2, 4),
            nn.ReLU(),
            nn.Linear(4, 8),
            nn.ReLU(),
            nn.Linear(8, 16),
            nn.ReLU(),
            nn.Linear(16, input_dim),
            nn.ReLU()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

In [None]:
input_dim = X_train.shape[1]  # Assuming X_train is already standardized
autoencoder = Autoencoder(input_dim)

# Define the loss function
criterion = nn.MSELoss()

# Define the optimizer
optimizer = torch.optim.Adam(autoencoder.parameters(), lr=0.001)

# Train the autoencoder model
num_epochs = 100
for epoch in range(num_epochs):
    for data in train_loader:
        inputs, _ = data
        optimizer.zero_grad()
        outputs = autoencoder(inputs)
        loss = criterion(outputs, inputs)
        loss.backward()
        optimizer.step()
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [1/100], Loss: 0.6756
Epoch [2/100], Loss: 0.4576
Epoch [3/100], Loss: 0.5987
Epoch [4/100], Loss: 0.7786
Epoch [5/100], Loss: 0.4563
Epoch [6/100], Loss: 0.5329
Epoch [7/100], Loss: 0.6046
Epoch [8/100], Loss: 0.6829
Epoch [9/100], Loss: 0.8431
Epoch [10/100], Loss: 0.7533
Epoch [11/100], Loss: 0.6331
Epoch [12/100], Loss: 0.7970
Epoch [13/100], Loss: 0.4884
Epoch [14/100], Loss: 1.3260
Epoch [15/100], Loss: 0.6585
Epoch [16/100], Loss: 0.5149
Epoch [17/100], Loss: 0.6366
Epoch [18/100], Loss: 0.8099
Epoch [19/100], Loss: 0.7982
Epoch [20/100], Loss: 0.6305
Epoch [21/100], Loss: 1.9731
Epoch [22/100], Loss: 1.5518
Epoch [23/100], Loss: 0.6177
Epoch [24/100], Loss: 0.5881
Epoch [25/100], Loss: 0.7005
Epoch [26/100], Loss: 0.7069
Epoch [27/100], Loss: 0.4323
Epoch [28/100], Loss: 0.5912
Epoch [29/100], Loss: 0.5672
Epoch [30/100], Loss: 0.8782
Epoch [31/100], Loss: 0.5226
Epoch [32/100], Loss: 3.0055
Epoch [33/100], Loss: 0.9027
Epoch [34/100], Loss: 0.7614
Epoch [35/100], Loss: 0

In [None]:
# Make predictions using the trained autoencoder
autoencoder.eval()  # Set the model to evaluation mode
predictions = []
with torch.no_grad():
    for data in test_loader:
        inputs, _ = data
        outputs = autoencoder(inputs)
        predictions.extend(outputs.tolist())

# Convert predictions to numpy array
predictions = np.array(predictions)



In [None]:
print(predictions)

[[1.46418631 0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 ...
 [1.12801826 0.         0.         ... 0.         0.         0.        ]
 [1.29194343 0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]]
