In [3]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pickle

def load_model(file_path):
    # Load the pre-trained XGBoost model
    with open(file_path, 'rb') as model_file:
        loaded_model = pickle.load(model_file)
    return loaded_model

def preprocess_input(new_input_data):
    # Convert new input data to DataFrame
    new_input = pd.DataFrame([new_input_data])

    # Preprocess the new input data to align with the training data
    new_input['Account_Creation_Date'] = pd.to_datetime(new_input['Account_Creation_Date'], errors='coerce')

    # Handle errors in datetime conversion
    if pd.isnull(new_input['Account_Creation_Date']).any():
        print("Invalid 'Account_Creation_Date' format. Please ensure it's in YYYY-MM-DD format.")
        return None

    new_input['Session_Duration'] = new_input['Session_Duration'].str.extract('(\d+)').astype(float)
    new_input['Time_Between_Transactions'] = new_input['Time_Between_Transactions'].str.extract('(\d+)').astype(float)
    
    # Perform one-hot encoding for categorical variables
    new_input = pd.get_dummies(new_input)

    # Ensure new_input columns match X_train columns
    missing_cols = set(X_train.columns) - set(new_input.columns)
    for col in missing_cols:
        new_input[col] = 0

    new_input = new_input[X_train.columns]  # Align columns with X_train
    
    return new_input


# Load the model
model_path = 'xgboost_model.pkl'
loaded_model = load_model(model_path)

# Sample input data
new_input_data = {
    'Transaction_Amount': [1500],
    'User_Account_ID': [104],
    'Account_Creation_Date': ['2022-11-15'],
    'Payment_Method': ['Credit Card'],
    'Billing_Location': ['Bangalore'],
    'Shipping_Location': ['Hyderabad'],
    'Device_IP_Address': ['192.168.1.40'],
    'Session_Duration': ['500 seconds'],
    'Frequency_of_Transactions': [7],
    'Time_Between_Transactions': ['80 seconds'],
    'Unusual_Time_of_Transaction': [0],
    'Unusual_Transaction_Amounts': [0],
    'IP_Address_History': ['192.168.1.40']
}

# Load the dataset for column reference
file_path = 'transaction_detail.csv'
df = pd.read_csv(file_path)

# Preprocess the training data
df['Transaction_DateTime'] = pd.to_datetime(df['Transaction_Date'] + ' ' + df['Transaction_Time'])
df = df.drop(['Transaction_ID', 'Transaction_Date', 'Transaction_Time'], axis=1)
df = pd.get_dummies(df)
X_train = df.drop('Fraud_Label', axis=1)  # Features
# ... (rest of the training process)

# Preprocess new input data
new_input = preprocess_input(new_input_data)

# Make predictions using the loaded model
fraud_prediction = loaded_model.predict(new_input)

print(f"Fraud Prediction: {fraud_prediction}")


Invalid 'Account_Creation_Date' format. Please ensure it's in YYYY-MM-DD format.


XGBoostError: [17:15:05] C:\buildkite-agent\builds\buildkite-windows-cpu-autoscaling-group-i-0fdc6d574b9c0d168-1\xgboost\xgboost-ci-windows\src\c_api\c_api.cc:675: DMatrix/Booster has not been initialized or has already been disposed.

In [6]:
import pandas as pd
import xgboost as xgb
import pickle

# Function to load the XGBoost model


def load_model(file_path):
    try:
        with open(file_path, 'rb') as model_file:
            loaded_model = pickle.load(model_file)
        return loaded_model
    except Exception as e:
        print(f"Error loading the model: {e}")
        return None

# Function to preprocess input data


def preprocess_input(new_input_data):
    try:
        new_input_data['Account_Creation_Date'] = pd.to_datetime(
            new_input_data['Account_Creation_Date'], errors='coerce')
        if new_input_data['Account_Creation_Date'].isnull().any():
            raise ValueError(
                "Invalid 'Account_Creation_Date' format. Please ensure it's in YYYY-MM-DD format.")

            # Handle errors in datetime conversion
        if pd.isnull(new_input['Account_Creation_Date']).any():
            print(
                "Invalid 'Account_Creation_Date' format. Please ensure it's in YYYY-MM-DD format.")
            return None

        new_input['Session_Duration'] = new_input['Session_Duration'].str.extract(
            '(\d+)').astype(float)
        new_input['Time_Between_Transactions'] = new_input['Time_Between_Transactions'].str.extract(
            '(\d+)').astype(float)

        # Perform one-hot encoding for categorical variables
        new_input = pd.get_dummies(new_input)

        # Ensure new_input columns match X_train columns
        missing_cols = set(X_train.columns) - set(new_input.columns)
        for col in missing_cols:
            new_input[col] = 0

        new_input = new_input[X_train.columns]  # Align columns with X_train

        return new_input
    except Exception as e:
        print(f"Error preprocessing input data: {e}")
        return None

# Function to predict fraud based on new input


def predict_fraud(loaded_model, new_input):
    try:
        preprocessed_input = preprocess_input(new_input)
        if preprocessed_input is not None:
            fraud_prediction = loaded_model.predict(preprocessed_input)
            return fraud_prediction
        else:
            return None
    except Exception as e:
        print(f"Error predicting fraud: {e}")
        return None


# Example usage:
# Provide the path to your model file
file_path = './xgboost_model.pkl'
loaded_model = load_model(file_path)

if loaded_model:
    new_input_data = {
        'Transaction_Amount': [1500],
        'User_Account_ID': [104],
        'Account_Creation_Date': ['2022-11-15'],
        'Payment_Method': ['Credit Card'],
        'Billing_Location': ['Bangalore'],
        'Shipping_Location': ['Hyderabad'],
        'Device_IP_Address': ['192.168.1.40'],
        'Session_Duration': ['500 seconds'],
        'Frequency_of_Transactions': [7],
        'Time_Between_Transactions': ['80 seconds'],
        'Unusual_Time_of_Transaction': [0],
        'Unusual_Transaction_Amounts': [0],
        'IP_Address_History': ['192.168.1.40']
    }

    fraud_prediction = predict_fraud(loaded_model, new_input_data)
    if fraud_prediction is not None:
        print(f"Fraud Prediction: {fraud_prediction}")
    else:
        print("Error occurred during prediction.")
else:
    print("Model loading failed.")

Error preprocessing input data: cannot access local variable 'new_input' where it is not associated with a value
Error occurred during prediction.
