In [2]:
import os
import pickle
import time
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
import warnings

warnings.filterwarnings('ignore')

def prepOneHotEncoder(df, col, pathPackages):
    oneHotEncoder = OneHotEncoder(handle_unknown='ignore')
    dfOneHotEncoder = pd.DataFrame(oneHotEncoder.fit_transform(df[[col]]).toarray(),
                                   columns=[col + "_" + str(i+1) for i in range(len(df[col].unique()))])
    
    filename = 'prep' + col + '.pkl'
    # Use os.path.join for the path
    pickle.dump(oneHotEncoder, open(os.path.join(pathPackages, filename), 'wb'))
    print(f"Preprocessing data {col} has been saved...")
    
    df = pd.concat([df.drop(col, axis=1), dfOneHotEncoder], axis=1)
    return df

def prepStandardScaler(df, col, pathPackages):
    scaler = StandardScaler()
    df[col] = scaler.fit_transform(df[[col]])
    
    filename = 'prep' + col + '.pkl'
    # Use os.path.join for the path
    pickle.dump(scaler, open(os.path.join(pathPackages, filename), 'wb'))
    print(f"Preprocessing data {col} has been saved...")
    
    return df

def runModel(data, path):
    # Use os.path.join for the path
    path = os.path.join(path, "modelling", "packages")
    col = pickle.load(open(os.path.join(path, 'columnModelling.pkl'), 'rb'))
    df = pd.DataFrame(data, index=[0])
    df = df[col]

    prepType = pickle.load(open(os.path.join(path, 'preptype.pkl'), 'rb'))
    dfType = pd.DataFrame(prepType.transform(df[['type']]).toarray(),
                          columns=["type_" + str(i+1) for i in range(len(prepType.transform(df[['type']]).toarray()[0]))])
    df = pd.concat([df.drop('type', axis=1), dfType], axis=1)

    X = df.values.tolist()
    model = pickle.load(open(os.path.join(path, 'modelFraud.pkl'), 'rb'))
    y = model.predict(X)[0]
    if y == 0:
        return "White List"
    else:
        return "Fraud"

if __name__ == "__main__":
    # Correct the path to use os.path.join
    pathPackages = os.path.join(os.getcwd(), "packages")
    target = 'isFraud'
    
    # Use os.path.join for the path
    data = pd.read_csv(os.path.join(pathPackages, 'Fraud_Detection.csv'))
    data = data.drop(['nameOrig', 'nameDest'], axis=1)
    
    df = data.drop(target, axis=1)
    # Use os.path.join for the path
    pickle.dump(df.columns.tolist(), open(os.path.join(pathPackages, 'columnModelling.pkl'), 'wb'))

    colOneHotEncoder = ['type']
    for col in colOneHotEncoder:
        df = prepOneHotEncoder(df, col, pathPackages)

    colprepStandardScaler = ['amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest']
    for col in colprepStandardScaler:
        df = prepStandardScaler(df, col, pathPackages)

    X = df.values.tolist()
    y = data[[target]].values.ravel()
    
    start = time.time()
    model = LogisticRegression()
    model.fit(X, y)
    stop = time.time()
    
    # Use os.path.join for the path
    with open(os.path.join(pathPackages, 'modelFraud.pkl'), 'wb') as file:
        pickle.dump(model, file)
    print(f"Training model done in {stop-start} seconds...")


Preprocessing data type has been saved...
Preprocessing data amount has been saved...
Preprocessing data oldbalanceOrg has been saved...
Preprocessing data newbalanceOrig has been saved...
Preprocessing data oldbalanceDest has been saved...
Preprocessing data newbalanceDest has been saved...
Training model done in 0.03306770324707031 seconds...


In [6]:
pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.
