In [27]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from keras.models import Sequential
from keras.layers import GRU, Dense
from keras.optimizers import Adam
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline


df = pd.read_excel('SampleData.xlsx', header=1)  


df['Date_Time'] = pd.to_datetime(df['Date_Time'])


df['hour'] = df['Date_Time'].dt.hour

df['indicator-2'] = df['indicator-2'].str.extract(r'(\d+)').astype(float)

categorical_cols = ['Decision', 'Merchant_State', 'Type']
numerical_cols = ['a-score', 'b-score', 'Category', 'indicator-1', 'C-score', 'indicator-2', 'vol-1', 'vol-2']


preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ])


y = df['Fraud'].astype(int)


X = df.drop(['Fraud', 'Date_Time', 'CreditCard#'], axis=1)  


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


X_train_preprocessed = preprocessor.fit_transform(X_train)
X_test_preprocessed = preprocessor.transform(X_test)



X_train_preprocessed = X_train_preprocessed.reshape((X_train_preprocessed.shape[0], 1, X_train_preprocessed.shape[1]))
X_test_preprocessed = X_test_preprocessed.reshape((X_test_preprocessed.shape[0], 1, X_test_preprocessed.shape[1]))


model = Sequential()
model.add(GRU(32, input_shape=(X_train_preprocessed.shape[1], X_train_preprocessed.shape[2]), return_sequences=True))
model.add(GRU(16))
model.add(Dense(1, activation='sigmoid'))


model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


model.fit(X_train_preprocessed, y_train, epochs=10, batch_size=32, validation_split=0.1)


loss, acc = model.evaluate(X_test_preprocessed, y_test, verbose=0)
print('Test Accuracy:', acc)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Accuracy: 0.800000011920929


In [28]:
model.save('/mnt/data/my_model.h5')


from joblib import dump
dump(preprocessor, '/mnt/data/my_preprocessor.joblib')


  saving_api.save_model(


['/mnt/data/my_preprocessor.joblib']

In [29]:
import pandas as pd
from keras.models import load_model
from joblib import load


predict_df = pd.read_excel('SampleData-Predict.xlsx')


predict_df['Date_Time'] = pd.to_datetime(predict_df['Date_Time'])
predict_df['hour'] = predict_df['Date_Time'].dt.hour


indicator_2_series = predict_df['indicator-2'].astype(str).str.extract(r'(\d+)',expand=False)
indicator_2_series = indicator_2_series.dropna().astype(float) 
predict_df['indicator-2'] = indicator_2_series


numerical_cols = ['a-score', 'b-score', 'Category', 'indicator-1', 'C-score', 'indicator-2', 'vol-1', 'vol-2']
categorical_cols = ['Decision', 'Merchant_State', 'Type']


preprocessor = load('my_preprocessor.joblib')


model = load_model('my_model.h5')


X_predict = predict_df[numerical_cols + categorical_cols]


X_predict_preprocessed = preprocessor.transform(X_predict)
X_predict_preprocessed = X_predict_preprocessed.reshape((X_predict_preprocessed.shape[0], 1, X_predict_preprocessed.shape[1]))


predictions = model.predict(X_predict_preprocessed)
predicted_classes = (predictions > 0.5).astype(int)


predict_df['Predicted_Fraud'] = predicted_classes


print(predict_df[['IDs', 'Predicted_Fraud']].head())


predict_df.to_excel('/mnt/data/Predicted_SampleData.xlsx', index=False)


      IDs  Predicted_Fraud
0  R3ts55                1
1  12ts55                0
2  R3ts55                0
3  12ts55                0
4  R3ts55                0
