<a href="https://colab.research.google.com/github/uldkh/colab_projects/blob/main/5_AE_opredelenie_moshennicheskih_operaciy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Flatten, Reshape, Input, Conv2DTranspose, concatenate, Activation, MaxPooling2D, Conv2D, BatchNormalization
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import utils
from tensorflow.keras.preprocessing import image

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import os
import time
from PIL import Image

In [None]:
def set_plt_params(fontsize=11, linewidth=2.3):

  plt.rcParams['axes.prop_cycle'] = plt.cycler(color=['#0072B2', '#009E73', '#D55E00', '#CC79A7', '#F0E442', '#56B4E9'])
  plt.rcParams['lines.linewidth'] = linewidth
  plt.rc("axes", facecolor="#181c27", edgecolor="#2a2e39", grid=True)
  plt.rc("figure", figsize=(17.87, 10.45), facecolor="#181c27", edgecolor="#2a2e39")
  plt.rc("grid", color="#2a2e39", linestyle="dashed")
  plt.rc("xtick", labelsize=fontsize, color="#b2b5be", bottom=False)
  plt.rc("ytick", labelsize=fontsize, color="#b2b5be", left=False)
  plt.rc("legend", handleheight=1, handlelength=2)
  plt.rc("text", color="#b2b5be")

set_plt_params()

In [None]:
df = pd.read_csv("/content/creditcard.csv")

In [None]:
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,0.090794,-0.5516,-0.617801,-0.99139,-0.311169,1.468177,-0.470401,0.207971,0.025791,0.403993,0.251412,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,-0.166974,1.612727,1.065235,0.489095,-0.143772,0.635558,0.463917,-0.114805,-0.183361,-0.145783,-0.069083,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,0.207643,0.624501,0.066084,0.717293,-0.165946,2.345865,-2.890083,1.109969,-0.121359,-2.261857,0.52498,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,-0.054952,-0.226487,0.178228,0.507757,-0.287924,-0.631418,-1.059647,-0.684093,1.965775,-1.232622,-0.208038,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,0.753074,-0.822843,0.538196,1.345852,-1.11967,0.175121,-0.451449,-0.237033,-0.038195,0.803487,0.408542,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [None]:
df.drop(["Time"], axis=1, inplace=True)
df["Amount"] = StandardScaler().fit_transform(df["Amount"].to_numpy().reshape(-1, 1))

In [None]:
frauds = df[df.Class == 1]
normal = df[df.Class == 0]
frauds.shape, normal.shape

((492, 30), (284315, 30))

In [None]:
X_train, X_test = train_test_split(normal, test_size=0.2, random_state=77)
X_train.shape, X_test.shape

((227452, 30), (56863, 30))

In [None]:
X_train.drop(["Class"], axis=1, inplace=True)
X_test = pd.concat([X_test, frauds])
y_test = X_test["Class"]
X_test = X_test.drop(["Class"], axis=1)

In [None]:
X_train = X_train.to_numpy()
X_test = X_test.to_numpy()

In [None]:
X_train.shape, X_test.shape

((227452, 29), (57355, 29))

In [None]:
def credit_card_autoencoder():
  
  data_size = X_train.shape[1]
  data_input = Input(shape=(data_size))
  
  x = Dense(10, activation="relu")(data_input)
  x = Dense(data_size, activation="linear")(x)
  
  autoencoder = Model(inputs=data_input, outputs=x)
  autoencoder.compile(optimizer="Adam", loss="mse")
  
  return autoencoder 

In [None]:
def get_batch_size(shape):

  batches = []

  for i in range(2, 1000):
    if shape % i == 0:
      batches.append(i)
  
  if batches:
    return batches
  else:
    print("Didn't find integer divisors")

In [None]:
get_batch_size(X_train.shape[0])

[2, 4, 101, 202, 404, 563]

In [None]:
model = credit_card_autoencoder()
history = model.fit(X_train, X_train, epochs=5, batch_size=101)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
predictions = model.predict(X_test)

In [None]:
mse = np.mean(np.power(X_test - predictions, 2), axis=1)

In [None]:
mse

array([0.15625789, 0.20670171, 0.38737713, ..., 2.85662503, 5.70907313,
       0.31624663])

In [None]:
mse_normal = mse[y_test.values == 0]
mse_frauds = mse[y_test.values == 1]

In [None]:
print("Минимальная ошибка нормальных транзакций:", round(min(mse_normal), 3))
print("Максимальная ошибка нормальных транзакций:", round(max(mse_normal), 3))
print("Средняя ошибка нормальных транзакций:", round(sum(mse_normal) / len(mse_normal), 3))

Минимальная ошибка нормальных транзакций: 0.042
Максимальная ошибка нормальных транзакций: 74.061
Средняя ошибка нормальных транзакций: 0.35


In [None]:
print("Минимальная ошибка мошеннических транзакций:", round(min(mse_frauds), 3))
print("Максимальная ошибка мошеннических транзакций:", round(max(mse_frauds), 3))
print("Средняя ошибка мошеннических транзакций:", round(sum(mse_frauds) / len(mse_frauds), 3))

Минимальная ошибка мошеннических транзакций: 0.161
Максимальная ошибка мошеннических транзакций: 95.752
Средняя ошибка мошеннических транзакций: 19.023


In [None]:
def get_acc_by_bias(bias):
  
  is_normal = mse_normal < bias
  is_frauds = mse_frauds > bias

  acc_normal = sum(is_normal) / len(is_normal)
  acc_faruds = sum(is_frauds) / len(is_frauds)

  print("Распознано нормальных транзакций: ", round(100 * acc_normal), "%", sep="")
  print("Распознано мошеннических транзакций: ", round(100 * acc_faruds), "%", sep="")
  print("Средняя точность распознавания: ", round(50 * (acc_normal + acc_faruds)), "%", sep="")

In [None]:
get_acc_by_bias(0.62)

Распознано нормальных транзакций: 92%
Распознано мошеннических транзакций: 90%
Средняя точность распознавания: 91%
