In [1]:
# i want to finetune a model to do credit card fraud detection. A sample input is given below.

# the huggingface dataset for legitimate transactions is called "LouisXO/fraud-detection-legitimate"

# the huggingface dataset for fraudulent transactions is called "LouisXO/fraud-detection-all-fraud"

# all datasets have columns "conversation" and "response" 
# the response is either "LEGITIMATE" or "FRAUD"

# here is a sample data: 

# conversation: Transaction Details: - Date/Time: 2019-05-26 05:20:36 - Merchant: fraud_Romaguera, Cruickshank and Greenholt - Amount: $104.9 - Category: shopping_net - Gender: M - State: OR

# response: LEGITIMATE



In [2]:
# Import libraries
import datasets
from datasets import load_dataset, concatenate_datasets
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
import torch
import wandb  # Add this line

wandb.init(project="fraud_detection")

  from .autonotebook import tqdm as notebook_tqdm





[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33maidenyang66[0m ([33myyfsss[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [23]:
# Load legitimate transactions dataset
legitimate_dataset = load_dataset("LouisXO/fraud-detection-legitimate")

# Load fraudulent transactions dataset
fraudulent_dataset = load_dataset("LouisXO/fraud-detection-all-fraud")

In [24]:
# Assign label 0 to legitimate transactions
legitimate_dataset = legitimate_dataset.map(lambda x: {'label': 0})

# Assign label 1 to fraudulent transactions
fraudulent_dataset = fraudulent_dataset.map(lambda x: {'label': 1})

# Combine the datasets
full_dataset = concatenate_datasets([legitimate_dataset['train'], fraudulent_dataset['train']])

In [25]:
# Shuffle the combined dataset
full_dataset = full_dataset.shuffle(seed=42)

# Split into training and validation sets (e.g., 90% train, 10% validation)
split_dataset = full_dataset.train_test_split(test_size=0.1)
train_dataset = split_dataset['train']
eval_dataset = split_dataset['test']

In [26]:
# Initialize the tokenizer
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

# Tokenization function
def tokenize_function(example):
    return tokenizer(example['conversation'], padding='max_length', truncation=True)

# Apply the tokenizer to the datasets
tokenized_train = train_dataset.map(tokenize_function, batched=True)
tokenized_eval = eval_dataset.map(tokenize_function, batched=True)



In [27]:
# Load a pre-trained model for sequence classification
model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [28]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=-1)
    acc = accuracy_score(labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='binary')
    return {
        'accuracy': acc,
        'precision': precision,
        'recall': recall,
        'f1': f1,
    }

In [29]:
training_args = TrainingArguments(
    output_dir='./results',            # Output directory
    evaluation_strategy='steps',       # Evaluate every N steps
    save_strategy='steps',             # Save the model every N steps
    eval_steps=500,                    # Evaluation interval
    save_steps=500,                    # Save interval
    num_train_epochs=1,                # Number of training epochs
    per_device_train_batch_size=8,    # Batch size for training
    per_device_eval_batch_size=8,     # Batch size for evaluation
    logging_dir='./logs',              # Directory for logs
    logging_steps=10,
    load_best_model_at_end=True,       # Load the best model when finished training
    report_to="wandb",                 
    
)



In [30]:
# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_eval,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,  
)

# Train the model
trainer.train()

  lambda data: self._console_raw_callback("stderr", data),
  1%|          | 98/12095 [07:15<14:48:15,  4.44s/it]
  0%|          | 10/12095 [00:01<36:23,  5.53it/s]
  lambda data: self._console_raw_callback("stderr", data),
[A                                                

                                                  
  0%|          | 11/12095 [00:01<36:44,  5.48it/s]

{'loss': 0.4332, 'grad_norm': 2.2821335792541504, 'learning_rate': 4.9958660603555194e-05, 'epoch': 0.0}


  0%|          | 20/12095 [00:03<36:20,  5.54it/s]
[A                                                

                                                  
  0%|          | 21/12095 [00:03<36:45,  5.48it/s]

{'loss': 0.312, 'grad_norm': 1.4064308404922485, 'learning_rate': 4.991732120711038e-05, 'epoch': 0.0}


  0%|          | 30/12095 [00:05<36:49,  5.46it/s]
[A                                                

                                                  
  0%|          | 31/12095 [00:05<37:15,  5.40it/s]

{'loss': 0.2424, 'grad_norm': 2.5422654151916504, 'learning_rate': 4.987598181066557e-05, 'epoch': 0.0}


  0%|          | 40/12095 [00:07<37:04,  5.42it/s]
[A                                                

                                                  
  0%|          | 41/12095 [00:07<37:25,  5.37it/s]

{'loss': 0.2214, 'grad_norm': 0.8496065735816956, 'learning_rate': 4.9834642414220755e-05, 'epoch': 0.0}


  0%|          | 50/12095 [00:09<37:52,  5.30it/s]
[A                                                

                                                  
  0%|          | 51/12095 [00:09<38:01,  5.28it/s]

{'loss': 0.1165, 'grad_norm': 0.43381261825561523, 'learning_rate': 4.9793303017775947e-05, 'epoch': 0.0}


  0%|          | 60/12095 [00:11<36:53,  5.44it/s]
[A                                                

                                                  
  1%|          | 61/12095 [00:11<37:03,  5.41it/s]

{'loss': 0.4326, 'grad_norm': 4.714425086975098, 'learning_rate': 4.975196362133113e-05, 'epoch': 0.0}


  1%|          | 70/12095 [00:12<36:46,  5.45it/s]
[A                                                

                                                  
  1%|          | 71/12095 [00:13<36:53,  5.43it/s]

{'loss': 0.4841, 'grad_norm': 7.424402236938477, 'learning_rate': 4.971062422488632e-05, 'epoch': 0.01}


  1%|          | 80/12095 [00:14<36:14,  5.53it/s]
[A                                                

                                                  
  1%|          | 81/12095 [00:14<36:29,  5.49it/s]

{'loss': 0.2156, 'grad_norm': 13.056736946105957, 'learning_rate': 4.966928482844151e-05, 'epoch': 0.01}


  1%|          | 90/12095 [00:16<36:14,  5.52it/s]
[A                                                

                                                  
  1%|          | 91/12095 [00:16<36:29,  5.48it/s]

{'loss': 0.4952, 'grad_norm': 3.069056987762451, 'learning_rate': 4.96279454319967e-05, 'epoch': 0.01}


  1%|          | 100/12095 [00:18<36:07,  5.53it/s]
[A                                                

                                                   
  1%|          | 101/12095 [00:18<36:39,  5.45it/s]

{'loss': 0.3345, 'grad_norm': 3.2471821308135986, 'learning_rate': 4.9586606035551884e-05, 'epoch': 0.01}


  1%|          | 110/12095 [00:20<36:11,  5.52it/s]
[A                                                

                                                   
  1%|          | 111/12095 [00:20<36:32,  5.47it/s]

{'loss': 0.3026, 'grad_norm': 5.892436981201172, 'learning_rate': 4.9545266639107076e-05, 'epoch': 0.01}


  1%|          | 120/12095 [00:21<35:58,  5.55it/s]
[A                                                

                                                   
  1%|          | 121/12095 [00:22<36:23,  5.48it/s]

{'loss': 0.2382, 'grad_norm': 2.005268096923828, 'learning_rate': 4.9503927242662254e-05, 'epoch': 0.01}


  1%|          | 130/12095 [00:23<36:15,  5.50it/s]
[A                                                

                                                   
  1%|          | 131/12095 [00:23<36:37,  5.44it/s]

{'loss': 0.2311, 'grad_norm': 1.553870439529419, 'learning_rate': 4.9462587846217445e-05, 'epoch': 0.01}


  1%|          | 140/12095 [00:25<36:05,  5.52it/s]
[A                                                

                                                   
  1%|          | 141/12095 [00:25<36:33,  5.45it/s]

{'loss': 0.3794, 'grad_norm': 1.7577718496322632, 'learning_rate': 4.942124844977264e-05, 'epoch': 0.01}


  1%|          | 150/12095 [00:27<36:11,  5.50it/s]
[A                                                

                                                   
  1%|          | 151/12095 [00:27<36:33,  5.44it/s]

{'loss': 0.2863, 'grad_norm': 7.821184158325195, 'learning_rate': 4.937990905332782e-05, 'epoch': 0.01}


  1%|▏         | 160/12095 [00:29<37:09,  5.35it/s]
[A                                                

                                                   
  1%|▏         | 161/12095 [00:29<37:37,  5.29it/s]

{'loss': 0.193, 'grad_norm': 0.8207614421844482, 'learning_rate': 4.933856965688301e-05, 'epoch': 0.01}


  1%|▏         | 170/12095 [00:31<36:35,  5.43it/s]
[A                                                

                                                   
  1%|▏         | 171/12095 [00:31<37:44,  5.27it/s]

{'loss': 0.1288, 'grad_norm': 4.262768745422363, 'learning_rate': 4.92972302604382e-05, 'epoch': 0.01}


  1%|▏         | 180/12095 [00:32<36:52,  5.38it/s]
[A                                                

                                                   
  1%|▏         | 181/12095 [00:33<37:31,  5.29it/s]

{'loss': 0.1682, 'grad_norm': 0.7002806067466736, 'learning_rate': 4.925589086399339e-05, 'epoch': 0.01}


  2%|▏         | 190/12095 [00:34<37:10,  5.34it/s]
[A                                                

                                                   
  2%|▏         | 191/12095 [00:35<37:32,  5.29it/s]

{'loss': 0.2924, 'grad_norm': 4.20320463180542, 'learning_rate': 4.9214551467548574e-05, 'epoch': 0.02}


  2%|▏         | 200/12095 [00:36<36:54,  5.37it/s]
[A                                                

                                                   
  2%|▏         | 201/12095 [00:36<36:55,  5.37it/s]

{'loss': 0.0651, 'grad_norm': 0.36486226320266724, 'learning_rate': 4.9173212071103766e-05, 'epoch': 0.02}


  2%|▏         | 210/12095 [00:38<36:03,  5.49it/s]
[A                                                

                                                   
  2%|▏         | 211/12095 [00:38<36:24,  5.44it/s]

{'loss': 0.1361, 'grad_norm': 0.24481438100337982, 'learning_rate': 4.913187267465895e-05, 'epoch': 0.02}


  2%|▏         | 220/12095 [00:40<35:55,  5.51it/s]
[A                                                

                                                   
  2%|▏         | 221/12095 [00:40<36:22,  5.44it/s]

{'loss': 0.1971, 'grad_norm': 0.24916037917137146, 'learning_rate': 4.909053327821414e-05, 'epoch': 0.02}


  2%|▏         | 230/12095 [00:42<36:57,  5.35it/s]
[A                                                

                                                   
  2%|▏         | 231/12095 [00:42<37:36,  5.26it/s]

{'loss': 0.2749, 'grad_norm': 4.0610761642456055, 'learning_rate': 4.904919388176933e-05, 'epoch': 0.02}


  2%|▏         | 240/12095 [00:44<38:20,  5.15it/s]
[A                                                

                                                   
  2%|▏         | 241/12095 [00:44<38:32,  5.13it/s]

{'loss': 0.011, 'grad_norm': 0.21870948374271393, 'learning_rate': 4.900785448532452e-05, 'epoch': 0.02}


  2%|▏         | 250/12095 [00:46<37:21,  5.28it/s]
[A                                                

                                                   
  2%|▏         | 251/12095 [00:46<37:17,  5.29it/s]

{'loss': 0.1338, 'grad_norm': 0.17639416456222534, 'learning_rate': 4.89665150888797e-05, 'epoch': 0.02}


  2%|▏         | 260/12095 [00:47<37:07,  5.31it/s]
[A                                                

                                                   
  2%|▏         | 261/12095 [00:48<36:56,  5.34it/s]

{'loss': 0.1417, 'grad_norm': 0.2700081169605255, 'learning_rate': 4.8925175692434895e-05, 'epoch': 0.02}


  2%|▏         | 270/12095 [00:49<36:36,  5.38it/s]
[A                                                

                                                   
  2%|▏         | 271/12095 [00:49<36:37,  5.38it/s]

{'loss': 0.1998, 'grad_norm': 0.1884690523147583, 'learning_rate': 4.888383629599008e-05, 'epoch': 0.02}


  2%|▏         | 280/12095 [00:51<35:51,  5.49it/s]
[A                                                

                                                   
  2%|▏         | 281/12095 [00:51<36:08,  5.45it/s]

{'loss': 0.2524, 'grad_norm': 4.057923316955566, 'learning_rate': 4.884249689954527e-05, 'epoch': 0.02}


  2%|▏         | 290/12095 [00:53<35:41,  5.51it/s]
[A                                                

                                                   
  2%|▏         | 291/12095 [00:53<36:05,  5.45it/s]

{'loss': 0.131, 'grad_norm': 0.34633177518844604, 'learning_rate': 4.8801157503100456e-05, 'epoch': 0.02}


  2%|▏         | 300/12095 [00:55<35:32,  5.53it/s]
[A                                                

                                                   
  2%|▏         | 301/12095 [00:55<35:57,  5.47it/s]

{'loss': 0.124, 'grad_norm': 0.2868196368217468, 'learning_rate': 4.875981810665565e-05, 'epoch': 0.02}


  3%|▎         | 310/12095 [00:57<35:33,  5.52it/s]
[A                                                

                                                   
  3%|▎         | 311/12095 [00:57<35:51,  5.48it/s]

{'loss': 0.0072, 'grad_norm': 0.17783790826797485, 'learning_rate': 4.871847871021083e-05, 'epoch': 0.03}


  3%|▎         | 320/12095 [00:58<36:28,  5.38it/s]
[A                                                

                                                   
  3%|▎         | 321/12095 [00:59<36:46,  5.34it/s]

{'loss': 0.2113, 'grad_norm': 0.13851796090602875, 'learning_rate': 4.8677139313766024e-05, 'epoch': 0.03}


  3%|▎         | 330/12095 [01:00<36:32,  5.37it/s]
[A                                                

                                                   
  3%|▎         | 331/12095 [01:00<36:34,  5.36it/s]

{'loss': 0.0732, 'grad_norm': 0.11791936308145523, 'learning_rate': 4.8635799917321215e-05, 'epoch': 0.03}


  3%|▎         | 340/12095 [01:02<36:36,  5.35it/s]
[A                                                

                                                   
  3%|▎         | 341/12095 [01:02<36:42,  5.34it/s]

{'loss': 0.08, 'grad_norm': 0.11513553559780121, 'learning_rate': 4.859446052087639e-05, 'epoch': 0.03}


  3%|▎         | 350/12095 [01:04<35:37,  5.50it/s]
[A                                                

                                                   
  3%|▎         | 351/12095 [01:04<35:51,  5.46it/s]

{'loss': 0.0779, 'grad_norm': 0.11702175438404083, 'learning_rate': 4.8553121124431585e-05, 'epoch': 0.03}


  3%|▎         | 360/12095 [01:06<35:34,  5.50it/s]
[A                                                

                                                   
  3%|▎         | 361/12095 [01:06<35:52,  5.45it/s]

{'loss': 0.2228, 'grad_norm': 4.147719860076904, 'learning_rate': 4.851178172798677e-05, 'epoch': 0.03}


  3%|▎         | 370/12095 [01:08<36:19,  5.38it/s]
[A                                                

                                                   
  3%|▎         | 371/12095 [01:08<36:35,  5.34it/s]

{'loss': 0.1357, 'grad_norm': 0.2306356132030487, 'learning_rate': 4.847044233154196e-05, 'epoch': 0.03}


  3%|▎         | 380/12095 [01:10<35:46,  5.46it/s]
[A                                                

                                                   
  3%|▎         | 381/12095 [01:10<36:03,  5.41it/s]

{'loss': 0.1337, 'grad_norm': 0.28859391808509827, 'learning_rate': 4.8429102935097146e-05, 'epoch': 0.03}


  3%|▎         | 390/12095 [01:11<35:20,  5.52it/s]
[A                                                

                                                   
  3%|▎         | 391/12095 [01:12<36:13,  5.38it/s]

{'loss': 0.0682, 'grad_norm': 0.23794610798358917, 'learning_rate': 4.838776353865234e-05, 'epoch': 0.03}


  3%|▎         | 400/12095 [01:13<35:23,  5.51it/s]
[A                                                

                                                   
  3%|▎         | 401/12095 [01:13<35:40,  5.46it/s]

{'loss': 0.2043, 'grad_norm': 0.25237521529197693, 'learning_rate': 4.834642414220752e-05, 'epoch': 0.03}


  3%|▎         | 410/12095 [01:15<35:21,  5.51it/s]
[A                                                

                                                   
  3%|▎         | 411/12095 [01:15<35:51,  5.43it/s]

{'loss': 0.1965, 'grad_norm': 4.104362964630127, 'learning_rate': 4.8305084745762714e-05, 'epoch': 0.03}


  3%|▎         | 420/12095 [01:17<35:27,  5.49it/s]
[A                                                

                                                   
  3%|▎         | 421/12095 [01:17<35:44,  5.44it/s]

{'loss': 0.0642, 'grad_norm': 0.2631126642227173, 'learning_rate': 4.82637453493179e-05, 'epoch': 0.03}


  4%|▎         | 430/12095 [01:19<35:13,  5.52it/s]
[A                                                

                                                   
  4%|▎         | 431/12095 [01:19<35:42,  5.44it/s]

{'loss': 0.1957, 'grad_norm': 0.22675563395023346, 'learning_rate': 4.822240595287309e-05, 'epoch': 0.04}


  4%|▎         | 440/12095 [01:20<35:22,  5.49it/s]
[A                                                

                                                   
  4%|▎         | 441/12095 [01:21<35:43,  5.44it/s]

{'loss': 0.1843, 'grad_norm': 0.3339103162288666, 'learning_rate': 4.818106655642828e-05, 'epoch': 0.04}


  4%|▎         | 450/12095 [01:22<35:18,  5.50it/s]
[A                                                

                                                   
  4%|▎         | 451/12095 [01:23<36:35,  5.30it/s]

{'loss': 0.0714, 'grad_norm': 0.2423594444990158, 'learning_rate': 4.8139727159983466e-05, 'epoch': 0.04}


  4%|▍         | 460/12095 [01:24<36:48,  5.27it/s]
[A                                                

                                                   
  4%|▍         | 461/12095 [01:24<37:22,  5.19it/s]

{'loss': 0.199, 'grad_norm': 0.22195391356945038, 'learning_rate': 4.809838776353866e-05, 'epoch': 0.04}


  4%|▍         | 470/12095 [01:26<35:20,  5.48it/s]
[A                                                

                                                   
  4%|▍         | 471/12095 [01:26<35:36,  5.44it/s]

{'loss': 0.1275, 'grad_norm': 0.27878180146217346, 'learning_rate': 4.805704836709384e-05, 'epoch': 0.04}


  4%|▍         | 480/12095 [01:28<35:21,  5.47it/s]
[A                                                

                                                   
  4%|▍         | 481/12095 [01:28<36:26,  5.31it/s]

{'loss': 0.2465, 'grad_norm': 0.358732670545578, 'learning_rate': 4.8015708970649034e-05, 'epoch': 0.04}


  4%|▍         | 490/12095 [01:30<35:29,  5.45it/s]
[A                                                

                                                   
  4%|▍         | 491/12095 [01:30<35:52,  5.39it/s]

{'loss': 0.0107, 'grad_norm': 0.2641292214393616, 'learning_rate': 4.797436957420422e-05, 'epoch': 0.04}


  4%|▍         | 500/12095 [01:32<35:19,  5.47it/s]
[A                                                

                                                   
  4%|▍         | 500/12095 [01:32<35:19,  5.47it/s]

{'loss': 0.1939, 'grad_norm': 0.19023583829402924, 'learning_rate': 4.793303017775941e-05, 'epoch': 0.04}




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

KeyboardInterrupt: 

In [None]:
# Evaluate the model
evaluation_results = trainer.evaluate()

# Print evaluation results
print(evaluation_results)