In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import xgboost as xgb
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from tensorflow import keras
from scikeras.wrappers import KerasClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pickle as pkl

# Load data
def load_data(file_path):
    df = pd.read_csv("../data/transaction_detail.csv")
    return df

# Preprocess input data
def preprocess_input(input_data):
    # Create a DataFrame from the input data
    processed_input = pd.DataFrame(input_data)

    # Convert 'Transaction_Date' to datetime
    processed_input['Transaction_Date'] = pd.to_datetime(processed_input['Transaction_Date'], format='%Y-%m-%d')

    # Extract additional features from datetime columns if needed
    processed_input['Transaction_Year'] = processed_input['Transaction_Date'].dt.year
    processed_input['Transaction_Month'] = processed_input['Transaction_Date'].dt.month
    processed_input['Transaction_Day'] = processed_input['Transaction_Date'].dt.day
    processed_input['Transaction_Hour'] = processed_input['Transaction_Date'].dt.hour
    processed_input['Transaction_Minute'] = processed_input['Transaction_Date'].dt.minute

    # Drop original datetime columns
    processed_input = processed_input.drop(['Transaction_Date'], axis=1)

    # Perform one-hot encoding for categorical columns
    processed_input = pd.get_dummies(processed_input)

    return processed_input

# Train models
def train_models(X_train, y_train):
    # XGBoost model
    xgb_model = xgb.XGBClassifier()
    xgb_model.fit(X_train, y_train)
    
    # Random Forest model
    rf_model = RandomForestClassifier()
    rf_model.fit(X_train, y_train)
    
    # Neural Network model
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    model_nn = keras.Sequential([
        keras.layers.Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
        keras.layers.Dense(32, activation='relu'),
        keras.layers.Dense(16, activation='relu'),
        keras.layers.Dense(8, activation='relu'),
        keras.layers.Dense(1, activation='sigmoid')
    ])
    model_nn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    model_nn.fit(X_train_scaled, y_train, epochs=10, batch_size=32, validation_split=0.2, verbose=0)
    
    return xgb_model, rf_model, model_nn

# Train ensemble model
def train_ensemble(xgb_model, rf_model, model_nn, X_train, y_train):
    ensemble_model = VotingClassifier(estimators=[
        ('xgb', xgb_model),
        ('rf', rf_model),
        ('nn', KerasClassifier(build_fn=model_nn, epochs=1, batch_size=32, verbose=0))
    ], voting='soft')
    ensemble_model.fit(X_train, y_train)
    return ensemble_model

# Save model to file
def save_model(model, file_name):
    with open(file_name, 'wb') as model_file:
        pkl.dump(model, model_file)

# Function to predict fraud
def predict_fraud(model, input_data):
    # Load the ensemble model from the pickle file
    with open(model, 'rb') as model_file:
        loaded_model = pkl.load(model_file)
    
    # Preprocess the input data
    processed_input = preprocess_input(input_data)
    
    # Make predictions using the loaded model
    fraud_prediction = loaded_model.predict(processed_input)
    
    return fraud_prediction

# Load and preprocess data
file_path = '../data/transaction_detail.csv'
df = load_data(file_path)
processed_data = preprocess_input(df)

# Split the data into features (X) and target variable (y)
X = processed_data.drop('Fraud_Label', axis=1)
y = df['Fraud_Label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

# Train individual models
xgb_model, rf_model, model_nn = train_models(X_train, y_train)

# Train ensemble model
ensemble_model = train_ensemble(xgb_model, rf_model, model_nn, X_train, y_train)

# Save the ensemble model to a pickle file
save_model(ensemble_model, 'ensemble_model.pkl')

# Call predict_fraud function to predict fraud




INFO:tensorflow:Assets written to: C:\Users\Yash\AppData\Local\Temp\tmp4jydicwm\assets


INFO:tensorflow:Assets written to: C:\Users\Yash\AppData\Local\Temp\tmp4jydicwm\assets
  X, y = self._initialize(X, y)


INFO:tensorflow:Assets written to: C:\Users\Yash\AppData\Local\Temp\tmprps2vlw3\assets


INFO:tensorflow:Assets written to: C:\Users\Yash\AppData\Local\Temp\tmprps2vlw3\assets


INFO:tensorflow:Assets written to: C:\Users\Yash\AppData\Local\Temp\tmpj6bd8xqq\assets


INFO:tensorflow:Assets written to: C:\Users\Yash\AppData\Local\Temp\tmpj6bd8xqq\assets


In [2]:
new_input_data = {
    'Transaction_Amount': [1500],
    'User_Account_ID': [104],
    'Account_Creation_Date': ['2022-11-15'],
    'Payment_Method': ['Credit Card'],
    'Billing_Location': ['Bangalore'],
    'Shipping_Location': ['Hyderabad'],
    'Device_IP_Address': ['192.168.1.40'],
    'Session_Duration': ['500 seconds'],
    'Frequency_of_Transactions': [7],
    'Time_Between_Transactions': ['80 seconds'],
    'Unusual_Time_of_Transaction': [0],
    'Unusual_Transaction_Amounts': [0],
    'IP_Address_History': ['192.168.1.40']
}


new_input = pd.DataFrame(new_input_data)


In [3]:
new_input['Account_Creation_Date'] = pd.to_datetime(new_input['Account_Creation_Date'])
new_input['Session_Duration'] = new_input['Session_Duration'].str.extract('(\d+)').astype(int)
new_input['Time_Between_Transactions'] = new_input['Time_Between_Transactions'].str.extract('(\d+)').astype(int)

In [4]:
new_input = pd.get_dummies(new_input)


In [5]:
missing_cols = set(X_train.columns) - set(new_input.columns)
for col in missing_cols:
    new_input[col] = 0

  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[col] = 0
  new_input[c

In [6]:
missing_cols = set(new_input.columns) - set(X_train.columns)
new_input = new_input[X_train.columns]

In [8]:
fraud_prediction = predict_fraud('ensemble_model.pkl', X_test)
print(f"Fraud Prediction: {fraud_prediction}")

KeyError: 'Transaction_Date'