# AI Transaction Guardian â€“ GitHub-Compatible Notebook (Python 3.11 Kernelspec)

In [None]:
import os, numpy as np, pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score, classification_report, confusion_matrix, precision_recall_curve
import joblib

print('Libraries loaded.')

In [None]:
DATA_PATH = 'data/transactions.csv'

def load_or_generate(path):
    if os.path.exists(path):
        return pd.read_csv(path)
    n = 3000
    rng = np.random.default_rng(42)
    df = pd.DataFrame({
        'transaction_id': np.arange(n),
        'customer_id': rng.integers(1, 500, n),
        'amount': rng.gamma(2, 50, n),
        'merchant_category': rng.choice(['grocery','electronics','travel','gaming'], n),
        'channel': rng.choice(['ecom','pos','ivr','mobile'], n),
        'country': rng.choice(['US','CA','GB','DE'], n),
    })
    prob = 0.03 + (df['amount']>250)*0.05 + (df['channel']=='ecom')*0.03
    df['is_fraud'] = rng.binomial(1, prob.clip(0,0.95))
    return df

df = load_or_generate(DATA_PATH)
df.head()

In [None]:
numeric = ['amount']
categorical = ['merchant_category','channel','country']
X = df[numeric + categorical]
y = df['is_fraud']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

pre = ColumnTransformer([
    ('num', StandardScaler(), numeric),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical)
])

rf = Pipeline([
    ('prep', pre),
    ('model', RandomForestClassifier(n_estimators=200, class_weight='balanced'))
])
rf.fit(X_train, y_train)

proba = rf.predict_proba(X_test)[:,1]
auc = roc_auc_score(y_test, proba)
auc

In [None]:
precision, recall, thresholds = precision_recall_curve(y_test, proba)
target_recall = 0.80
best_t = 0.5
for p, r, t in zip(precision, recall, np.append(thresholds,1)):
    if r >= target_recall and p > 0.2:
        best_t = t
best_t

In [None]:
def score_transactions(df_new, model, threshold):
    X_new = df_new[numeric + categorical]
    s = model.predict_proba(X_new)[:,1]
    df_new = df_new.copy()
    df_new['risk_score'] = s
    df_new['is_flagged'] = (s>=threshold).astype(int)
    return df_new

score_transactions(df.head(5), rf, best_t)