# Pre-processing

In [1]:
import os
import re
import pandas as pd
import numpy as np
from tqdm import tqdm

In [2]:
# Params for bert model and tokenization
Nsamp = 2000 # number of samples to generate in each class - 'spam', 'not spam'
maxtokens = 200 # the maximum number of tokens per document
maxtokenlen = 100 # the maximum length of each token

In [3]:
def tokenize(row):
    if row is None or row is '':
        tokens = ""
    else:
        try:
            tokens = row.split(" ")[:maxtokens]
        except:
            tokens=""
    return tokens

  if row is None or row is '':


In [4]:
def reg_expressions(row):
    tokens = []
    try:
        for token in row:
            token = token.lower()
            token = re.sub(r'[\W\d]', "", token)
            token = token[:maxtokenlen] # truncate token
            tokens.append(token)
    except:
        token = ""
        tokens.append(token)
    return tokens

In [5]:
import nltk

nltk.download('stopwords')
from nltk.corpus import stopwords
stopwords = stopwords.words('english')
print(stopwords) # see default stopwords

def stop_word_removal(row):
    token = [token for token in row if token not in stopwords]
    token = filter(None, token)
    return token

['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', '

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [6]:
bodies_df = pd.read_csv('./bodies.csv')

In [7]:
fraud_bodies_df = pd.read_csv('./fraud_bodies_df.csv')

In [8]:
import random

# Convert everything to lower-case, truncate to maxtokens and truncate each token to maxtokenlen
EnronEmails = bodies_df.iloc[:,0].apply(tokenize)
EnronEmails = EnronEmails.apply(stop_word_removal)
EnronEmails = EnronEmails.apply(reg_expressions)
EnronEmails = EnronEmails.sample(Nsamp)

SpamEmails = fraud_bodies_df.iloc[:,0].apply(tokenize)
SpamEmails = SpamEmails.apply(stop_word_removal)
SpamEmails = SpamEmails.apply(reg_expressions)
SpamEmails = SpamEmails.sample(Nsamp)

raw_data = pd.concat([SpamEmails,EnronEmails], axis=0).values

In [9]:
print("Shape of combined data represented as numpy array is:")
print(raw_data.shape)
print("Data represented as numpy array is:")
print(raw_data)

# corresponding labels
Categories = ['spam','notspam']
header = ([1]*Nsamp)
header.extend(([0]*Nsamp))

Shape of combined data represented as numpy array is:
(4000,)
Data represented as numpy array is:
[list(['from', 'john', 'carlostel', '', '', 'cape', 'town', 'southafrica', 'dear', 'sirmadam', 'assistant', 'and', 'investmentyou', 'may', 'surprise', 'receive', 'letter', 'sinceyou', 'know', 'personally', 'i', 'son', 'dr', 'davidcarlos', 'recently', 'murdered', 'land', 'dispute', 'inzimbabwe', 'i', 'got', 'contact', 'network', 'line', 'mysearch', 'reliable', 'reputable', 'person', 'handle', 'veryconfidential', 'transaction', 'involves', 'transfer', 'fundto', 'foreign', 'account', 'i', 'decided', 'write', 'you', 'my', 'latefather', 'among', 'black', 'zimbabwean', 'rich', 'farmersmurdered', 'cold', 'blood', 'agents', 'rulinggovernment', 'president', 'robert', 'mugabe', 'allegedsupport', 'sympathy', 'zimbabwean', 'opposition', 'partycontrolled', 'white', 'minority', 'before', 'death', 'taken', 'johannesburg', '', 'southafrica', 'deposit', 'sum', 'us', '', 'million', 'eighteenmillion', 'five'

In [10]:
# function for shuffling data in unison with labels/header
def unison_shuffle(a, b):
    p = np.random.permutation(len(b))
    data = a[p]
    header = np.asarray(b)[p]
    return data, header

# function for converting data into the right format, due to the difference in required format from sklearn models
# we expect a single string per email here, versus a list of tokens for the sklearn models previously explored
def convert_data(raw_data,header):
    converted_data, labels = [], []
    for i in range(raw_data.shape[0]):
        out = ' '.join(raw_data[i])
        converted_data.append(out)
        labels.append(header[i])
        #print(i)
    converted_data = np.array(converted_data, dtype=object)[:, np.newaxis]

    return converted_data, np.array(labels)

raw_data, header = unison_shuffle(raw_data, header)

# split into independent 70% training and 30% testing sets
#idx = int(0.6*raw_data.shape[0])
# 70% of data for training
#train_x, train_y = convert_data(raw_data[:idx],header[:idx])
# remaining 30% for testing
#valid_x, valid_y = convert_data(raw_data[idx:],header[idx:])

total_size = raw_data.shape[0]

# Calculate indices
idx_train = int(0.6 * total_size)  # end of training set
idx_val = int(0.8 * total_size)  # end of validation set

# Split the data
train_x, train_y = convert_data(raw_data[:idx_train], header[:idx_train])
val_x, val_y = convert_data(raw_data[idx_train:idx_val], header[idx_train:idx_val])
test_x, test_y = convert_data(raw_data[idx_val:], header[idx_val:])

print("train_x/train_y list details, to make sure it is of the right form:")
print(len(train_x))
print(train_x)
print(train_y[:5])
print(train_y.shape)

train_x/train_y list details, to make sure it is of the right form:
2400
[['dear sirfmadamci mr martins oluseguncwe group business men deal rawmaterials export europefamericaewe searching representatives help us astablish mediumof getting costumers americafeurope well makingpayments useif interested transacting businesswithus gladeplease contact us via emaila olusegunfmartinsmailecom moreinformationesubject satisfaction given opportunity tonegotiate mode pay services ourrepresentative europefamericaeplease interested forward us phoneffax number andyour full contact addressesethankscceomremartins olusegune']
 ['from mrjim nelson  easton streetlondon wcx dw ukoccupations external auditorattndo accept sincere apologies mail meet personalethics although i wish use medium get touch firstbecause fastest means i staff account section knownbank united kingdomin one periodic auditing i disproved dormant account holdingbalance a fifteen million britishpounds operated past three years from myinve

In [11]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.32.0-py3-none-any.whl (7.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.5/7.5 MB[0m [31m32.2 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.15.1 (from transformers)
  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m32.7 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m90.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m62.4 MB/s[0m eta [36m0:00:0

# Obtain the dataset

In [12]:
import tensorflow as tf
from transformers import TFRobertaForSequenceClassification, RobertaTokenizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pandas as pd

In [13]:
# Load the tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

In [14]:
train_x_list = train_x.flatten().tolist()
train_encodings = tokenizer(train_x_list, truncation=True, padding=True, max_length=256)


In [15]:
val_x_list = val_x.flatten().tolist()
valid_encodings = tokenizer(val_x_list, truncation=True, padding=True, max_length=256)


In [16]:
test_x_list = test_x.flatten().tolist()
test_encodings = tokenizer(test_x_list, truncation=True, padding=True, max_length=256)


In [17]:
print(len(train_encodings['input_ids']))  # The length of encoded texts
print(len(train_y))  # The length of labels


2400
2400


In [18]:
print(len(valid_encodings['input_ids']))  # The length of encoded texts
print(len(val_y))  # The length of labels


800
800


In [19]:
print(len(test_encodings['input_ids']))  # The length of encoded texts
print(len(test_y))  # The length of labels


800
800


In [20]:
# Convert the data to TensorFlow tensors
train_dataset_tf = tf.data.Dataset.from_tensor_slices((
    dict(train_encodings),
    train_y
))
valid_dataset_tf = tf.data.Dataset.from_tensor_slices((
    dict(valid_encodings),
    val_y
))
test_dataset = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings),
    test_y
))

In [21]:
sum(test_y)

408

# load the model

In [22]:
model = TFRobertaForSequenceClassification.from_pretrained('roberta-base')

Downloading model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFRobertaForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predicti

In [23]:

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [24]:

# Train the model
model.fit(train_dataset_tf.shuffle(1000).batch(16), epochs=1, batch_size=16, validation_data=valid_dataset_tf.batch(16) )



<keras.callbacks.History at 0x7f9b63541de0>

In [25]:
test_dataset_batched = test_dataset.batch(16)

In [26]:
predict = model.predict(test_dataset_batched)



In [27]:
from sklearn.metrics import classification_report

# Evaluate the performance

In [28]:
# Convert raw predictions to class predictions
y_pred = np.argmax(predict.logits, axis=-1)

# Get the actual class labels
y_true = []
for features, label in test_dataset:
    y_true.append(label.numpy())
y_true = np.array(y_true)


# Generate a classification report
report = classification_report(y_true, y_pred, target_names=["legitimate", "phishing"],digits=6)  # replace target_names with your actual class names
print(report)


              precision    recall  f1-score   support

  legitimate   0.989796  0.989796  0.989796       392
    phishing   0.990196  0.990196  0.990196       408

    accuracy                       0.990000       800
   macro avg   0.989996  0.989996  0.989996       800
weighted avg   0.990000  0.990000  0.990000       800



In [41]:
test_new = test_x.reshape(-1)

In [42]:
test_new_df = pd.DataFrame(test_new)

In [43]:
test_new_df = pd.DataFrame(test_new, columns=['text'])

In [44]:
test_new_df['target'] = test_y

In [33]:
test_dataset_raw_0 = test_new_df[test_new_df['target']==0]

In [75]:
random_100_rows = test_dataset_raw_0.sample(n=100)

# Build the new DataFrame using the randomly selected rows
test_dataset_raw_0_100 = pd.DataFrame(random_100_rows)

In [110]:
test_dataset_raw_0_100.head()

Unnamed: 0,text,target
135,mikehere draft version ca enron km power co pl...,0
179,note this article gives strategy economics pur...,0
798,metingreat hear you believe not brother lives ...,0
275,maybe meet i sure dave told intent is least ne...,0
65,yeah cant make home original messagefrom huble...,0


In [76]:
test_dataset_raw_1 = test_new_df[test_new_df['target']==1]

In [77]:
random_1_rows = test_dataset_raw_1.sample(n=25)

# Build the new DataFrame using the randomly selected rows
test_dataset_raw_1_25 = pd.DataFrame(random_1_rows)

In [111]:
test_dataset_raw_1_25.head()

Unnamed: 0,text,target
532,emailmessagemessage object xdb emailmessagemes...,1
120,hello dearmy name aishatu ahmedmy father recen...,1
85,dear sirfmadamci got contact email directory d...,1
258,emailmessagemessage object xdc emailmessagemes...,1
528,emailmessagemessage object xdfe emailmessageme...,1


In [78]:
merged_df = pd.concat([test_dataset_raw_0_100, test_dataset_raw_1_25], ignore_index=True)

# Shuffle the index
test_125_df = merged_df.sample(frac=1).reset_index(drop=True)

In [79]:
test_encodings_125 = tokenizer(test_125_df['text'].tolist(), truncation=True, padding=True, max_length=256)

test_dataset_125 = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings_125),
    test_125_df['target'].values
))


In [80]:
test_dataset_125_batched = test_dataset_125.batch(16)
y_pred_raw_125 = model.predict(test_dataset_125_batched)

# Convert raw predictions to class predictions
y_pred_125 = np.argmax(y_pred_raw_125.logits, axis=1)

# Get the actual class labels
y_true_125 = []
for features, label in test_dataset_125:
    y_true_125.append(label.numpy())
y_true_125 = np.array(y_true_125)


# Generate a classification report
report_125 = classification_report(y_true_125, y_pred_125, target_names=["legitimate", "phishing"],digits=4)  # replace target_names with your actual class names
print(report_125)

              precision    recall  f1-score   support

  legitimate     1.0000    0.9600    0.9796       100
    phishing     0.8621    1.0000    0.9259        25

    accuracy                         0.9680       125
   macro avg     0.9310    0.9800    0.9528       125
weighted avg     0.9724    0.9680    0.9689       125



# chatgpt 3.5 Rephrase

In [112]:
test_dataset_raw_1_25.index

Int64Index([532, 120,  85, 258, 528, 603,  39, 374, 169, 614, 440, 751, 547,
            168, 159, 719, 548,  82, 402, 665,   4,  38, 526,  35, 734],
           dtype='int64')

In [135]:
test_dataset_raw_gpt4 = test_dataset_raw_1_25

In [191]:
test_dataset_raw_gpt4['text'][734]


'from auditor dr charakhead of departmentbank of scotlandunited kingdomcontact me privately through this email boxcharakroultwell_bankofscotlandlatinmailcom dear partner in order to transfer out  fifteenmillion pounds sterlings from our bank here in london i have the courage to look for a reliable and honest person who will be capable for this important business transactionbelieving that you will never let me down either now or in future the owner of this account is mr john hughes foreigner and the manager of petrol chemical service here in londona chemical engineer by proffession and he died since the account has no other beneficiary and my investigation proved to me as well that his company does not know anything about this account and the amount involved is pounds sterlings i want to transfer this money into a safe foreign account abroad but i dont know any foreigneri know that this message will come to you as a surprise as we dont know our self beforebut be sure that it is real and

In [139]:
test_dataset_raw_gpt4['text'][532] ="A message object named 'emailmessagemessage' within the 'xdb' context."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt4['text'][532] ="A message object named 'emailmessagemessage' within the 'xdb' context."


In [141]:
test_dataset_raw_gpt4['text'][120] = "My name is Aishatu Ahmed. Following a family conflict driven by disputes over a substantial amount of money my father obtained from his involvement with the ministry, he was tragically murdered in Tripoli. Instead of keeping the money in an illicit home, he chose to deposit it in a Libyan bank without proper explanation. This decision had severe consequences, as some believed the money was still in our mansion, leading to a violent search and the unfortunate death of my younger brother. Fearing for our lives, my mother and I fled, eventually finding refuge in Burkina Faso, where I established contact with a consultancy firm, after my late father had diplomatically moved the funds to a European financial institution, designating me as the beneficiary."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt4['text'][120] = "My name is Aishatu Ahmed. Following a family conflict driven by disputes over a substantial amount of money my father obtained from his involvement with the ministry, he was tragically murdered in Tripoli. Instead of keeping the money in an illicit home, he chose to deposit it in a Libyan bank without proper explanation. This decision had severe consequences, as some believed the money was still in our mansion, leading to a violent search and the unfortunate death of my younger brother. Fearing for our lives, my mother and I fled, eventually finding refuge in Burkina Faso, where I established contact with a consultancy firm, after my late father had diplomatically moved the funds to a European financial institution, designating me as the benefi

In [144]:
test_dataset_raw_gpt4['text'][85] = "I am reaching out to you with a matter of utmost importance. My name is Johnson Savimbi, the son of the late rebel leader from Angola who tragically passed away on the 20th of February while opposing the Angolan army. Please consider the information provided on this webpage: [webpage link]. Prior to his demise, my father had securely deposited sixteen million dollars in a European security company, supported by all the necessary legal documentation. Both my parents had entrusted me with this knowledge for safekeeping. Since my family and I had to flee to South Africa following my father's death, we are currently seeking refuge and attempting to secure visas for travel to Europe, which has proven to be a challenging process. We are seeking assistance to facilitate the claims process for the funds my father left behind in Europe, with the intention of transferring the funds to an account of our choice once the legal formalities are settled. In consideration of your support, my family has agreed to share a considerable portion, around 30%, of the total amount, which translates to three million dollars, as a token of our gratitude. Your assistance would be invaluable in helping us navigate this intricate situation and find a more stable life for our family."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt4['text'][85] = "I am reaching out to you with a matter of utmost importance. My name is Johnson Savimbi, the son of the late rebel leader from Angola who tragically passed away on the 20th of February while opposing the Angolan army. Please consider the information provided on this webpage: [webpage link]. Prior to his demise, my father had securely deposited sixteen million dollars in a European security company, supported by all the necessary legal documentation. Both my parents had entrusted me with this knowledge for safekeeping. Since my family and I had to flee to South Africa following my father's death, we are currently seeking refuge and attempting to secure visas for travel to Europe, which has proven to be a challenging process. We are seeking assist

In [146]:
test_dataset_raw_gpt4['text'][258] = "In the realm of 'xdc,' there exists an email message object denoted as 'emailmessagemessage.'"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt4['text'][258] = "In the realm of 'xdc,' there exists an email message object denoted as 'emailmessagemessage.'"


In [148]:
test_dataset_raw_gpt4['text'][528] = "In the context of 'xdfe,' a message entity known as 'emailmessagemessage object xdffa' exists."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt4['text'][528] = "In the context of 'xdfe,' a message entity known as 'emailmessagemessage object xdffa' exists."


In [152]:
test_dataset_raw_gpt4['text'][603] ="I am writing to you from the office of Dr. Yinka Craige, a senior accountant at the Nigerian Liquified Natural Gas (NLNG) in Lagos. I have been tasked by my associates to seek a foreign partner capable of aiding us in the transfer of a significant amount totaling $27.5 million USD. This sum has resulted from a deliberate overinvoicing of a specific contract granted by our organization. My colleagues and I have collectively agreed that if you or your company can furnish an account for the funds' reception, the distribution will be as follows: 85% for our team here in Nigeria, including the involved officials, and 15% for you or your company that facilitates this endeavor. Your assistance in this matter would be greatly appreciated."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt4['text'][603] ="I am writing to you from the office of Dr. Yinka Craige, a senior accountant at the Nigerian Liquified Natural Gas (NLNG) in Lagos. I have been tasked by my associates to seek a foreign partner capable of aiding us in the transfer of a significant amount totaling $27.5 million USD. This sum has resulted from a deliberate overinvoicing of a specific contract granted by our organization. My colleagues and I have collectively agreed that if you or your company can furnish an account for the funds' reception, the distribution will be as follows: 85% for our team here in Nigeria, including the involved officials, and 15% for you or your company that facilitates this endeavor. Your assistance in this matter would be greatly appreciated."


In [154]:
test_dataset_raw_gpt4['text'][39] = "emailmessagemessage object xde."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt4['text'][39] = "emailmessagemessage object xde."


In [156]:
test_dataset_raw_gpt4['text'][374] = "Dear Friend, I hope this email finds you well. I am Dr. Ibrahim Usman, the Manager of the Bill and Exchange Department at the Bank of Africa (BOA) in Ouagadougou, Burkina Faso. I understand that this message might come as a surprise, but I am writing to you with a proposal that I believe could be of mutual benefit to both of our families. There is an urgent matter at hand involving the transfer of a substantial sum of money—millions—into an account. This money has been dormant in our bank for several years without any activity or claim from its owner, Joseph F. Grillo, a foreigner and the manager of a petrochemical service. Tragically, he lost his life as a victim of the September incident at the World Trade Center in the United States. Despite our efforts, we have been unable to locate his relatives to claim the funds. I am seeking your assistance in facilitating the transfer of this abandoned fund into the bank's treasury. Your involvement is essential, and I assure you that all proceedings will be handled discreetly and with utmost confidentiality. I kindly request your prompt response to this matter. For more information, you can verify the details on this website: [website link]. Thank you for considering this proposal, and I look forward to your positive response."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt4['text'][374] = "Dear Friend, I hope this email finds you well. I am Dr. Ibrahim Usman, the Manager of the Bill and Exchange Department at the Bank of Africa (BOA) in Ouagadougou, Burkina Faso. I understand that this message might come as a surprise, but I am writing to you with a proposal that I believe could be of mutual benefit to both of our families. There is an urgent matter at hand involving the transfer of a substantial sum of money—millions—into an account. This money has been dormant in our bank for several years without any activity or claim from its owner, Joseph F. Grillo, a foreigner and the manager of a petrochemical service. Tragically, he lost his life as a victim of the September incident at the World Trade Center in the United States. Despite

In [158]:
test_dataset_raw_gpt4['text'][169] = "Dear Sir/Madam, I hope this message finds you well. I am reaching out to you with a proposal that holds significant importance and urgency. I am Frani Korona, and I have experienced both joyful and challenging moments in life. My late brother, Abdul Korona Junior, and his 7-year-old daughter are no longer with us. We tragically lost our beloved father, Dr. Abdul Korona, who was a highly respected business figure and the Director General of the National Gold Diamond Mining Corporation in Sierra Leone. His sudden demise occurred during a business trip to Dublin under mysterious circumstances. While the circumstances surrounding his passing have raised suspicions of foul play orchestrated by an uncle who accompanied him, only divine knowledge holds the complete truth. The loss extended further with the passing of our mother, leaving me, aged 21, and my younger brother, 17, without parental guidance. Our father was caring and attentive, ensuring our needs were met. Before his untimely passing, he entrusted us with important documents and a sum of money that he had deposited with an overseas security company. This amount is secured within two trunk boxes that hold our family's cherished valuables and treasures. My brother, Paul John, stands as my only remaining family. This proposal carries immense significance, and I eagerly await your response."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt4['text'][169] = "Dear Sir/Madam, I hope this message finds you well. I am reaching out to you with a proposal that holds significant importance and urgency. I am Frani Korona, and I have experienced both joyful and challenging moments in life. My late brother, Abdul Korona Junior, and his 7-year-old daughter are no longer with us. We tragically lost our beloved father, Dr. Abdul Korona, who was a highly respected business figure and the Director General of the National Gold Diamond Mining Corporation in Sierra Leone. His sudden demise occurred during a business trip to Dublin under mysterious circumstances. While the circumstances surrounding his passing have raised suspicions of foul play orchestrated by an uncle who accompanied him, only divine knowledge hold

In [160]:
test_dataset_raw_gpt4['text'][614] = "Hello, dear recipient. I understand that this email might be unexpected, and there could be a temptation to dismiss it as unserious. However, I sincerely request that you consider this message with genuine humility. I am Mr. Zeid Ahmadi, responsible for the Telex Computing Department at the Bank of Africa (BOA) in Ouagadougou, Burkina Faso, West Africa. With utmost respect, I have reached out to you to propose a business transaction that has the potential to be mutually beneficial. During an investigation and audit in our bank's department, we stumbled upon a significant sum of money that belonged to a deceased customer. This individual tragically passed away on July 15th in a motor accident, leaving behind a dormant account with considerable funds that have remained unclaimed by any family member or relation. This discovery has led me to contemplate a course of action that could yield profit for both parties involved. While I have maintained confidentiality regarding this information, I believe that your involvement could contribute to the successful execution of this plan. The total sum in question is $18.5 million USD, and I have been impressed by your credentials."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt4['text'][614] = "Hello, dear recipient. I understand that this email might be unexpected, and there could be a temptation to dismiss it as unserious. However, I sincerely request that you consider this message with genuine humility. I am Mr. Zeid Ahmadi, responsible for the Telex Computing Department at the Bank of Africa (BOA) in Ouagadougou, Burkina Faso, West Africa. With utmost respect, I have reached out to you to propose a business transaction that has the potential to be mutually beneficial. During an investigation and audit in our bank's department, we stumbled upon a significant sum of money that belonged to a deceased customer. This individual tragically passed away on July 15th in a motor accident, leaving behind a dormant account with considerable f

In [162]:
test_dataset_raw_gpt4['text'][440] = "Dear Friend, I understand that you may have encountered messages of this nature online, and they might have seemed like mere jokes or games. However, I urge you to take this seriously and provide an urgent response. I am Barrister Alexander Afadia, a solicitor and the personal attorney of Mr. Adams Kielar, a national of your country who was an expatriate contractor in the oil industry. Tragically, on April 1st, Mr. Kielar, along with his wife and three children, lost their lives in a car accident on the Sagamu-Lagos Express Road. Since the accident, I have made numerous efforts, including embassy inquiries, to locate any extended relatives of my late client, but all my attempts have been unsuccessful. In light of this, I turned to the internet to find a family member, and that is why I am reaching out to you. My goal is to seek your assistance in repatriating the money and property left behind by my late client before they are confiscated or declared unserviceable by the bank. He had substantial deposits, particularly in one specific bank, and the total value is significant. Your involvement could greatly aid in this process."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt4['text'][440] = "Dear Friend, I understand that you may have encountered messages of this nature online, and they might have seemed like mere jokes or games. However, I urge you to take this seriously and provide an urgent response. I am Barrister Alexander Afadia, a solicitor and the personal attorney of Mr. Adams Kielar, a national of your country who was an expatriate contractor in the oil industry. Tragically, on April 1st, Mr. Kielar, along with his wife and three children, lost their lives in a car accident on the Sagamu-Lagos Express Road. Since the accident, I have made numerous efforts, including embassy inquiries, to locate any extended relatives of my late client, but all my attempts have been unsuccessful. In light of this, I turned to the internet 

In [164]:
test_dataset_raw_gpt4['text'][751]= "A message entity labeled as emailmessagemessage object xdaf corresponds to emailmessagemessage object xdac."


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt4['text'][751]= "A message entity labeled as emailmessagemessage object xdaf corresponds to emailmessagemessage object xdac."


In [166]:
test_dataset_raw_gpt4['text'][547] = "Mr. Paul Wemba of Ellwood Crescent, Pretoria, can be contacted via email at p_webay@yahoo.co.in. I am Mr. Paul Wemba, the Chief Auditor at the African Development Bank (ADB). There is an account that has been established within our bank, but it has remained dormant without any activity. Upon reviewing old files and records, I have made a discovery that requires urgent attention. If no action is taken promptly, the funds within this account could be lost. It appears that nobody has operated on this account for quite some time. The implications are that either the bank's board of directors might gain access to these funds, which could eventually be classified as dormant and seized by the government's treasury during the forthcoming audit by national auditors. To prevent this outcome, I urgently need to remit the funds. It is my belief that the government's treasury will confiscate the funds if not claimed, and considering that individuals like you and I are involved, it is imperative that we act to secure these funds. The account belongs to the late Dr. B. D. Redo, a foreign miner associated with Kruger Gold Co and a professional geologist, who has since passed away. Nobody else has any knowledge of this account or its contents, and my investigation has confirmed that the company is unaware of its existence. The total amount involved is $10.5 million USD."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt4['text'][547] = "Mr. Paul Wemba of Ellwood Crescent, Pretoria, can be contacted via email at p_webay@yahoo.co.in. I am Mr. Paul Wemba, the Chief Auditor at the African Development Bank (ADB). There is an account that has been established within our bank, but it has remained dormant without any activity. Upon reviewing old files and records, I have made a discovery that requires urgent attention. If no action is taken promptly, the funds within this account could be lost. It appears that nobody has operated on this account for quite some time. The implications are that either the bank's board of directors might gain access to these funds, which could eventually be classified as dormant and seized by the government's treasury during the forthcoming audit by natio

In [169]:
test_dataset_raw_gpt4['text'][168] = "From the office of Dr. Dan Kabo, Accountant General of the Nigerian Liquefied Natural Gas (NLNG), I am writing to you with a business proposal that I sincerely believe holds genuine potential, despite the possibility that you may have encountered similar propositions in the past. I am approaching you in my capacity as the Chief Accountant, with full endorsement from the Auditor General of the Nigerian Liquefied Natural Gas Company (NLNG). Following an internal audit conducted after the close of the third quarter, a revelation has come to light. This disclosure pertains to the sum of $32 million USD, which has been unearthed from the records of executed contracts awarded by NLNG since the inception of democracy in Nigeria. Notably, NLNG has scaled its operations to a significant working asset worth $3 billion USD in the current civilian dispensation, with a projected completion cost of $2 billion USD for the final phase. Importantly, this $32 million USD has been authorized for payment by NLNG and endorsed by the Federal Ministry of Finance, with the Accountant General of the Federation overseeing the payment through the apex bank."


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt4['text'][168] = "From the office of Dr. Dan Kabo, Accountant General of the Nigerian Liquefied Natural Gas (NLNG), I am writing to you with a business proposal that I sincerely believe holds genuine potential, despite the possibility that you may have encountered similar propositions in the past. I am approaching you in my capacity as the Chief Accountant, with full endorsement from the Auditor General of the Nigerian Liquefied Natural Gas Company (NLNG). Following an internal audit conducted after the close of the third quarter, a revelation has come to light. This disclosure pertains to the sum of $32 million USD, which has been unearthed from the records of executed contracts awarded by NLNG since the inception of democracy in Nigeria. Notably, NLNG has scal

In [171]:
test_dataset_raw_gpt4['text'][159] ="The message entity denoted as emailmessagemessage object xdd can be equivalently expressed as emailmessagemessage object xdc."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt4['text'][159] ="The message entity denoted as emailmessagemessage object xdd can be equivalently expressed as emailmessagemessage object xdc."


In [173]:
test_dataset_raw_gpt4['text'][719] = "Attention: The Managing Director,Allow me to introduce myself as Mrs. Amina Eabello, an Iraqi refugee. Following recent events, my husband, who was once a personal aide to the former President of Iraq before the American government's intervention, has unfortunately passed away. Amidst the ongoing crisis that has engulfed our country, I find myself in the position of seeking refuge in Thailand. I wish to share that we have inherited a substantial sum of $7 million USD from my late husband's endeavors. This fund was originally obtained during his tenure as a personal aide to the President, and it has been safeguarded through diplomatic channels. Now residing in Thailand, I am reaching out to you with the intention of seeking guidance on making a prudent investment in a business-friendly environment. Additionally, I am interested in purchasing a residential property as I plan to relocate my family in the near future. I kindly request your expert assistance in ensuring that all technical and logistical aspects are meticulously addressed to our satisfaction. Your insights and support are invaluable, and I look forward to your guidance in this endeavor."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt4['text'][719] = "Attention: The Managing Director,Allow me to introduce myself as Mrs. Amina Eabello, an Iraqi refugee. Following recent events, my husband, who was once a personal aide to the former President of Iraq before the American government's intervention, has unfortunately passed away. Amidst the ongoing crisis that has engulfed our country, I find myself in the position of seeking refuge in Thailand. I wish to share that we have inherited a substantial sum of $7 million USD from my late husband's endeavors. This fund was originally obtained during his tenure as a personal aide to the President, and it has been safeguarded through diplomatic channels. Now residing in Thailand, I am reaching out to you with the intention of seeking guidance on making a 

In [175]:
test_dataset_raw_gpt4['text'][548]="Dear Sir/Madam,Greetings. I am reaching out to you with a request for your confidence in a transaction that I am proposing. I understand that the sensitive nature and potential scale of this transaction might raise concerns, but I assure you of its confidentiality and successful outcome. Urgency has prompted me to contact you, and I trust in your discretion and capability to manage a matter of utmost importance. Allow me to properly introduce myself: I am Mr. Mike Charles Kobic, the son of the late Cole Johnson Kobi, who served as the Assistant Chief of the General Staff for Sierra Leone's Revolutionary United Front (RUF). I also hold the title of Crown Prince in the Kuloma village of the South West District in Sierra Leone. My father tragically passed away on August 11th, due to circumstances related to the war in our country. My purpose in reaching out to you is because I have conducted a private search on the internet to find a reliable and reputable individual who can handle a confidential transaction that involves the transfer of a substantial sum of money."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt4['text'][548]="Dear Sir/Madam,Greetings. I am reaching out to you with a request for your confidence in a transaction that I am proposing. I understand that the sensitive nature and potential scale of this transaction might raise concerns, but I assure you of its confidentiality and successful outcome. Urgency has prompted me to contact you, and I trust in your discretion and capability to manage a matter of utmost importance. Allow me to properly introduce myself: I am Mr. Mike Charles Kobic, the son of the late Cole Johnson Kobi, who served as the Assistant Chief of the General Staff for Sierra Leone's Revolutionary United Front (RUF). I also hold the title of Crown Prince in the Kuloma village of the South West District in Sierra Leone. My father tragically 

In [177]:
test_dataset_raw_gpt4['text'][82] = "Dear Sir/Madam,I extend my apologies for utilizing your valuable time, but I consider it imperative to introduce a business proposal that I believe could prove mutually beneficial. As the first son of the late Chief Ken Saro Wiwa, a prominent environmental activist, writer, publisher, and business leader in the oil-producing community of Ogoni Land within the Niger Delta region of Nigeria, I am reaching out. My father was not only the President and Founder of the Movement for the Survival of the Ogoni People (MOSOP), a nonviolent advocacy group, but also a crucial figure in the fight for the rights of the Ogoni people and their land's resources. His demise occurred through a hanging under the military regime of President General Sani Abacha, who he was actively opposing. Prior to his untimely death, my father was well aware of the precarious situation he was in and, as such, he crafted a will that specifically outlined his stocks, bonds, properties, and funds – assets that rightfully belong to me. However, his personal legal advisor, Mr. Briggs, conspired with the government, leading to the confiscation of my father's fixed assets and the freezing of his accounts."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt4['text'][82] = "Dear Sir/Madam,I extend my apologies for utilizing your valuable time, but I consider it imperative to introduce a business proposal that I believe could prove mutually beneficial. As the first son of the late Chief Ken Saro Wiwa, a prominent environmental activist, writer, publisher, and business leader in the oil-producing community of Ogoni Land within the Niger Delta region of Nigeria, I am reaching out. My father was not only the President and Founder of the Movement for the Survival of the Ogoni People (MOSOP), a nonviolent advocacy group, but also a crucial figure in the fight for the rights of the Ogoni people and their land's resources. His demise occurred through a hanging under the military regime of President General Sani Abacha, who

In [179]:
test_dataset_raw_gpt4['text'][402] = "The phrase emailmessagemessage object xcffdf can be rephrased as a message entity labeled as xcffdf."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt4['text'][402] = "The phrase emailmessagemessage object xcffdf can be rephrased as a message entity labeled as xcffdf."


In [181]:
test_dataset_raw_gpt4['text'][665] = "Greetings, I am Rabi Al Salih, the wife of Mohammed Mahdi Al Salih, the former Iraqi Minister of Interior Affairs. I came across your email address while searching the internet for a reliable person who could assist my family. I believe you might be able to offer the help we need. My husband was captured by US forces in April and is currently imprisoned, facing trial on charges of terrorism, corruption, embezzlement, and the mysterious charge of plunder, which carries the risk of a death sentence. During his time as the Minister of Interior, I realized a substantial amount of money from various successful deals. My intention was to invest this money for the future of our children, focusing on real estate and industrial production. While my husband was in power, I discreetly siphoned a sum of $10 million USD from Iraq and deposited it with a security firm that specializes in transporting valuable goods through diplomatic channels. I also declared this consignment as solid gold, belonging to a foreign business partner. I am reaching out to you because I am seeking assistance in safeguarding these funds and ensuring our family's security in this difficult time."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt4['text'][665] = "Greetings, I am Rabi Al Salih, the wife of Mohammed Mahdi Al Salih, the former Iraqi Minister of Interior Affairs. I came across your email address while searching the internet for a reliable person who could assist my family. I believe you might be able to offer the help we need. My husband was captured by US forces in April and is currently imprisoned, facing trial on charges of terrorism, corruption, embezzlement, and the mysterious charge of plunder, which carries the risk of a death sentence. During his time as the Minister of Interior, I realized a substantial amount of money from various successful deals. My intention was to invest this money for the future of our children, focusing on real estate and industrial production. While my husb

In [183]:
test_dataset_raw_gpt4['text'][4] ="The phrase emailmessagemessage object xcffdbe can be rephrased as a message entity identified as xcffdbe."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt4['text'][4] ="The phrase emailmessagemessage object xcffdbe can be rephrased as a message entity identified as xcffdbe."


In [185]:
test_dataset_raw_gpt4['text'][38] = "From Nicholas Mavis in Abidjan, Ivory Coast, using a private email address nicholasfmavis@yahoo.eco.jp, I am making an earnest appeal for your urgent assistance. With all due respect, I believe that with the divine's grace, it's possible for me to reach out for help. Kindly allow me to express my desire for establishing a business relationship with you. I am Nicholas Mavis, the son of the late Mavis Radcic. My father was a prosperous cocoa merchant in Abidjan, the economic capital of Ivory Coast. Unfortunately, my father was poisoned to death by his business associates during an outing on the first day of January while celebrating the New Year. Tragedy further struck as my mother passed away when I was still an infant. My father had taken special care of me before his demise. On his deathbed in January, within a private hospital in Abidjan, he confided in me about a sum of $12 million USD that he had discreetly left in a suspense account at a prime bank in Abidjan. He used my name as his son and next of kin to deposit these funds. He also expressed his wish for me to find a trustworthy foreign partner who could assist in moving these funds abroad for investment purposes."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt4['text'][38] = "From Nicholas Mavis in Abidjan, Ivory Coast, using a private email address nicholasfmavis@yahoo.eco.jp, I am making an earnest appeal for your urgent assistance. With all due respect, I believe that with the divine's grace, it's possible for me to reach out for help. Kindly allow me to express my desire for establishing a business relationship with you. I am Nicholas Mavis, the son of the late Mavis Radcic. My father was a prosperous cocoa merchant in Abidjan, the economic capital of Ivory Coast. Unfortunately, my father was poisoned to death by his business associates during an outing on the first day of January while celebrating the New Year. Tragedy further struck as my mother passed away when I was still an infant. My father had taken specia

In [187]:
test_dataset_raw_gpt4['text'][526] = "Dear Friend,Greetings to you. I hope this message finds you well. I am reaching out with a request that holds immense potential benefit for both of us. As an executor of wills, you might understand the situations that arise in which we are tempted to make fortunes for our clients. Unfortunately, circumstances often leave us with limited options. The matter I am presenting pertains to a unique case involving a client who willed a significant fortune to a next of kin. However, tragically, this individual passed away on the same day as the October 1st incident involving an Egyptian airline carrying passengers. You can verify this from the news published on the BBC World News website: http://news.bbc.co.uk/hi/world/americas/6650315.stm. The challenge I am facing is how to handle this fortune. According to English law, if nobody comes forward as the next of kin within seven years of the benefactor's demise, the fortune is supposed to be bequeathed to the government. In light of this, I am contacting you to propose that you act as the beneficiary and lay claim to this legacy of millions that my deceased client had bequeathed to their next of kin. At this point, I am the sole individual who is aware of this situation."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt4['text'][526] = "Dear Friend,Greetings to you. I hope this message finds you well. I am reaching out with a request that holds immense potential benefit for both of us. As an executor of wills, you might understand the situations that arise in which we are tempted to make fortunes for our clients. Unfortunately, circumstances often leave us with limited options. The matter I am presenting pertains to a unique case involving a client who willed a significant fortune to a next of kin. However, tragically, this individual passed away on the same day as the October 1st incident involving an Egyptian airline carrying passengers. You can verify this from the news published on the BBC World News website: http://news.bbc.co.uk/hi/world/americas/6650315.stm. The challen

In [190]:
test_dataset_raw_gpt4['text'][35] = "Dear Friend, I sincerely apologize for any past efforts you might have made to assist me. I am pleased to inform you that I have successfully managed to facilitate the transfer of funds with the cooperation of a new partner from Chile. You can contact the secretary in Cotonou, Benin, for further instructions on my behalf. His name is Davide Marks, and his email is davide_marks@myway.com. Please take note of the required information that you need to provide to my secretary: your full names, your residential address, your direct cellphone number, and your home phone, along with a fax if applicable. Currently, I am occupied with investment projects in Chile with my new partner, so please don't hesitate to get in touch with Mr. Davide Marks. He will ensure the prompt dispatch of the bank draft. Please accept my regards, and kindly note that this email was sent via http://webmail.zoom.co.uk.Warm regards,Barr Ahmed Abdulaziz"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt4['text'][35] = "Dear Friend, I sincerely apologize for any past efforts you might have made to assist me. I am pleased to inform you that I have successfully managed to facilitate the transfer of funds with the cooperation of a new partner from Chile. You can contact the secretary in Cotonou, Benin, for further instructions on my behalf. His name is Davide Marks, and his email is davide_marks@myway.com. Please take note of the required information that you need to provide to my secretary: your full names, your residential address, your direct cellphone number, and your home phone, along with a fax if applicable. Currently, I am occupied with investment projects in Chile with my new partner, so please don't hesitate to get in touch with Mr. Davide Marks. He will

In [192]:
test_dataset_raw_gpt4['text'][734] = "From: Dr. Charak Head of Department Bank of Scotland United Kingdom Contact me privately through this email address: charakroultwell_bankofscotlandlatinmail.comDear Partner,I am reaching out in pursuit of a significant business transaction that entails transferring fifteen million pounds sterling from our bank in London. In seeking a trustworthy individual, I am convinced that your reliability and honesty make you suitable for this endeavor. I have confidence that you will prove your commitment, not only now but also in the future. The account holder, Mr. John Hughes, was a foreigner and the manager of a petrol chemical service in London. He was a chemical engineer by profession and has since passed away. Given that there are no other beneficiaries for this account and my investigation revealed that his company is unaware of the account, I intend to transfer the funds into a secure foreign account abroad. Although we are not acquainted, I am confident that this proposal is legitimate and sincere. Your participation in this investment is highly appreciated. To facilitate communication, kindly provide your full contact address and send me your full name.Best regards,Dr. Chara"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt4['text'][734] = "From: Dr. Charak Head of Department Bank of Scotland United Kingdom Contact me privately through this email address: charakroultwell_bankofscotlandlatinmail.comDear Partner,I am reaching out in pursuit of a significant business transaction that entails transferring fifteen million pounds sterling from our bank in London. In seeking a trustworthy individual, I am convinced that your reliability and honesty make you suitable for this endeavor. I have confidence that you will prove your commitment, not only now but also in the future. The account holder, Mr. John Hughes, was a foreigner and the manager of a petrol chemical service in London. He was a chemical engineer by profession and has since passed away. Given that there are no other beneficia

In [193]:
test_dataset_raw_gpt4.head()

Unnamed: 0,text,target
532,A message object named 'emailmessagemessage' w...,1
120,My name is Aishatu Ahmed. Following a family c...,1
85,I am reaching out to you with a matter of utmo...,1
258,"In the realm of 'xdc,' there exists an email m...",1
528,"In the context of 'xdfe,' a message entity kno...",1


In [194]:
merged_df_gpt4 = pd.concat([test_dataset_raw_0_100, test_dataset_raw_gpt4], ignore_index=True)

# Shuffle the index
test_125_df_gpt4 = merged_df_gpt4.sample(frac=1).reset_index(drop=True)

In [195]:
test_encodings_125_gpt4 = tokenizer(test_125_df_gpt4['text'].tolist(), truncation=True, padding=True, max_length=256)

test_dataset_125_gpt4 = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings_125_gpt4),
    test_125_df_gpt4['target'].values
))


In [196]:
test_encodings_125_gpt4_batched = test_dataset_125_gpt4.batch(16)
y_pred_raw_125_gpt4 = model.predict(test_encodings_125_gpt4_batched)

# Convert raw predictions to class predictions
y_pred_125_gpt4 = np.argmax(y_pred_raw_125_gpt4.logits, axis=1)

# Get the actual class labels
y_true_125_gpt4 = []
for features, label in test_dataset_125_gpt4:
    y_true_125_gpt4.append(label.numpy())
y_true_125_gpt4 = np.array(y_true_125_gpt4)


# Generate a classification report
report_125_gpt4 = classification_report(y_true_125_gpt4, y_pred_125_gpt4, target_names=["legitimate", "phishing"],digits=4)
print(report_125_gpt4)

              precision    recall  f1-score   support

  legitimate     1.0000    0.9600    0.9796       100
    phishing     0.8621    1.0000    0.9259        25

    accuracy                         0.9680       125
   macro avg     0.9310    0.9800    0.9528       125
weighted avg     0.9724    0.9680    0.9689       125



# gpt 4.0 Rephrase

In [200]:
test_dataset_raw_gpt3 = test_dataset_raw_1_25

In [201]:
test_dataset_raw_gpt3.index

Int64Index([532, 120,  85, 258, 528, 603,  39, 374, 169, 614, 440, 751, 547,
            168, 159, 719, 548,  82, 402, 665,   4,  38, 526,  35, 734],
           dtype='int64')

In [208]:
test_dataset_raw_1_25['text'][38]

"From Nicholas Mavis in Abidjan, Ivory Coast, using a private email address nicholasfmavis@yahoo.eco.jp, I am making an earnest appeal for your urgent assistance. With all due respect, I believe that with the divine's grace, it's possible for me to reach out for help. Kindly allow me to express my desire for establishing a business relationship with you. I am Nicholas Mavis, the son of the late Mavis Radcic. My father was a prosperous cocoa merchant in Abidjan, the economic capital of Ivory Coast. Unfortunately, my father was poisoned to death by his business associates during an outing on the first day of January while celebrating the New Year. Tragedy further struck as my mother passed away when I was still an infant. My father had taken special care of me before his demise. On his deathbed in January, within a private hospital in Abidjan, he confided in me about a sum of $12 million USD that he had discreetly left in a suspense account at a prime bank in Abidjan. He used my name as 

In [202]:
indices = [532, 120, 85, 258, 528, 603, 39, 374, 169, 614, 440, 751, 547, 168, 159, 719, 548, 82, 402, 665, 4, 38, 526, 35, 734]


In [209]:
test_dataset_raw_gpt3['text'][532] = "Email message object from XDB."
test_dataset_raw_gpt3['text'][120]="Two email message objects from XDB."
test_dataset_raw_gpt3['text'][85]="Hello, my name is Aishatu Ahmed. Recently, my father was tragically murdered in Tripoli due to conflicts over wealth within our family and friends circle. Many affluent individuals in Libya keep their funds at home due to banking regulations. Some believed that my father's wealth was still at our residence and attacked our home in an attempt to seize it. However, he had already transferred his assets to a European finance house, naming me as the beneficiary. A week after his burial, our house was ambushed, leading to my younger brother's death. My mother and I managed to escape to Burkina Faso, where we are currently in hiding at a hotel. I received assistance from a consultancy firm here."
test_dataset_raw_gpt3['text'][258]= "Dear Sir/Madam,I am the son of Johnson Savimbi, the late rebel leader of Angola, who passed away on February 22nd. For more context, please refer to this webpage: http://news.bbc.co.uk/hi/english/world/africa/newsid_1835000/1835101.stm. Before his demise, my father deposited $16 million in a European security company. Following his death, my family and I relocated to South Africa. We aim to move to Europe, but visa acquisition has been challenging from Africa. We seek your help to claim the deposited fund, and in gratitude, we're willing to offer you 10% ($1.6 million) for your assistance."
test_dataset_raw_gpt3['text'][528]= "Two email message objects labeled XDC."
test_dataset_raw_gpt3['text'][603]= "Two email message objects: XDFE and XDFFA."
test_dataset_raw_gpt3['text'][39]= "I'm Dr. Yinka Craige, a senior accountant at the Nigerian Liquified Natural Gas (NLNG) in Lagos. My colleagues and I are seeking an international partner to help transfer USD 27.5 million, which originates from an intentionally over-invoiced contract from our corporation. In appreciation for your assistance, we propose a distribution: 70% for us, the officials in Nigeria, and 30% for you or your company that provides the account."
test_dataset_raw_gpt3['text'][374]= "Two email message objects: XD and XDE."
test_dataset_raw_gpt3['text'][169]= "Dr. Ibrahim Usman, the Manager of Bill and Exchange at the Bank of Africa in Ouagadougou, Burkina Faso, writes to you unexpectedly. He oversees a dormant account holding $15 million belonging to Joseph F. Grillo, a foreign chemical engineer who tragically died during the September 11 attacks in the U.S. Despite multiple attempts, the bank has been unable to contact any of Grillo's relatives. Without intervention, the money will be absorbed into the bank's treasury as unclaimed funds. For more information on the victim, visit [http://www.septembervictims.com/septembervictims/victiminfo.asp?id=]. Dr. Usman seeks assistance in transferring this money before it's claimed by the bank."
test_dataset_raw_gpt3['text'][614]= "Dear Sir/Madam,I'm reaching out with a pressing matter. I am Miss Frani Korona, and along with my younger brother, Abdul Korona Junior, we are the children of the late Dr. Abdul Korona, a respected businessman and the Director General of the National Gold & Diamond Mining Corporation in Sierra Leone. Tragically, our father died under suspicious circumstances in Dublin on a business trip, which we suspect was orchestrated by our uncle. With our mother having passed away when I was just 15 and my brother 3, our father became our sole guardian. Prior to his death, he entrusted us with documents regarding money and family valuables stored in two trunk boxes overseas, registered under the name of my brother as the son of Paul John."
test_dataset_raw_gpt3['text'][440]= "I'm Mr. Zeid Ahmadi, the manager of the Telex/Computing Department at the Bank of Africa in Ouagadougou, Burkina Faso. I've stumbled upon a dormant account containing $18.5 million belonging to a deceased customer who tragically passed away in a motor accident on July 4th, 2003. This account has no claimants from family or relatives. I've kept this discovery confidential and believe collaborating with you could be mutually beneficial in claiming these funds. I was impressed by your profile and decided to reach out."
test_dataset_raw_gpt3['text'][751]= "Although you might have received similar letters online, please understand that my request is genuine and urgent. I'm Barrister Alexander Afadia, representing the late Mr. Adams Kielar, an expatriate and oil industry contractor. Tragically, he and his family died in a car accident on April 1st along the Sagamu-Lagos express road. Despite extensive efforts, I've been unable to locate any living relatives, leading me to seek your assistance. Without intervention, the considerable funds left in his account at a specific bank, as well as other properties, risk being seized. I hope you can help repatriate these assets."
test_dataset_raw_gpt3['text'][547]= "Two email message objects: XDAF and XDAC."
test_dataset_raw_gpt3['text'][168]= "Attention President/CEO/Manager,I am Mr. Paul Wemba, Chief Auditor at the African Development Bank (ADB). I've discovered a dormant account opened in 1999 with $10.5 million USD. The account owner, Dr. B.D. Redo, a foreign miner with Kruger Gold Co and a geologist, died in 2003. No one has been aware or claimed this account since. Without urgent action, these funds risk being absorbed either by our bank's board or identified by national auditors as dormant, ultimately being turned over to the government. Given that the government and treasury officials are just people like us, I feel compelled to ensure this money doesn't get misallocated."
test_dataset_raw_gpt3['text'][159]= "From Dr. Dan Kabo, Accountant General at Nigerian Liquefied Natural Gas (NLNG):Dear Sir,I approach you with a business proposal borne out of genuine intent, despite the possibility of you receiving similar ones before. As the Chief Accountant, in collaboration with the Auditor-General, our recent internal audit for the NLNG revealed an unallocated $32 million from contracts since the start of Nigeria's democracy. The NLNG, operating at its peak during the current civilian rule, boasts assets worth $3 billion, anticipating a total expenditure of $2 billion upon completion of its fifth phase. The mentioned $32 million has been sanctioned for payment by both the NLNG and the Federal Ministry of Finance and awaits release from the apex bank."
test_dataset_raw_gpt3['text'][719]= "Two email message objects: XDD and XDC."
test_dataset_raw_gpt3['text'][548]= "Mrs. Aminaea Bello, an Iraqi refugee, writes to the Managing Director. Once closely tied to the former president of Iraq who was overthrown by the American government, her husband was a personal aide to the president. Following recent upheavals in Iraq, they relocated to Thailand. They possess $35 million, acquired through her late husband's endeavors, which they want to invest in a friendly nation. Mrs. Bello seeks guidance on proper investments and is interested in purchasing residential property, intending to relocate her family soon. She emphasizes a need for mutual satisfaction in any agreements made."
test_dataset_raw_gpt3['text'][82]= "Greetings,I am Mr. Mike Charles Kobi, son of the late Col. Johnson Kobi, the Assistant Chief General Staff of Sierra Leone's Revolutionary United Front (RUF) and crown prince of the Kuloma village. Understanding the significant nature of my proposal, I assure you of its authenticity and confidentiality. I discovered you during an online search for a reputable individual capable of managing a discreet transaction involving a substantial transfer of funds. My father tragically died on August 10th during the conflicts in our nation."
test_dataset_raw_gpt3['text'][402]= "Dear Sir/Madam,I'm the eldest son of the late Chief Ken Saro Wiwa, a renowned environmental activist, writer, and businessman from the oil-rich Ogoni region in Nigeria. He founded MOSOP, a non-violent advocacy group for Ogoni rights. Tragically, due to his activism, he was executed by the regime of General Sani Abacha. Anticipating danger, my father wrote a will entrusting his assets, stocks, and bonds to me. However, his legal advisor, Mr. Briggs, in collusion with the government, has since seized these assets and frozen his accounts."
test_dataset_raw_gpt3['text'][665]= "Email message with the object code XCFFDF."
test_dataset_raw_gpt3['text'][4]= "Greetings,I am Rabi Al Salih, the wife of Mahammed Mahdi Al Salih, former Iraqi Minister of Interior Affairs. He's currently incarcerated by US forces, facing serious charges. During his tenure, I managed to secure funds from deals and have $18 million USD stored in a security firm, misrepresented as solid gold for a foreign partner. I found your contact online and seek your assistance to invest this money discreetly for my children's future in sectors like real estate or industrial production."
test_dataset_raw_gpt3['text'][38]= "Dear [Recipient],I am Nicholas Mavis from Abidjan, Ivory Coast, the son of the late Mavis Radcic, a prosperous cocoa merchant. Tragically, my father was poisoned by business associates in early January, leaving me as his sole heir. Prior to his death, he confidentially informed me about $7 million USD he deposited in a prime bank in Abidjan, with me as the next of kin. I am now seeking a trustworthy foreign partner to assist with this fund."
test_dataset_raw_gpt3['text'][526]= "Dear friend,I'm reaching out regarding a unique opportunity related to my late client's will. As the executor, I've learned that both he and his next of kin tragically died on the Egyptian Airline in 1999, which you can verify on the BBC's website. According to English law, if no one claims a legacy within seven years, it goes to the government. I'm seeking someone trustworthy to act as the beneficiary for the $18 million legacy to prevent it from defaulting to the state, and currently, I'm the only one aware of this."
test_dataset_raw_gpt3['text'][35]= "Dear friend,I wanted to update you on my financial situation. Although our initial efforts didn't succeed, I've since managed to transfer the funds with the assistance of a new partner from Chile. To compensate you for your past help, I've instructed my secretary, Mr. Davide Marks (email: davide_marks@myway.com), based in Cotonou, Benin, to send you some funds. Please provide him with your full name, address, and phone numbers. Currently, I'm in Chile focused on new investment projects, but you can contact Davide directly for the bank draft.Regards,Barr. Ahmed Abdulaziz."
test_dataset_raw_gpt3['text'][734]= "Dr. Charak, the head of the department at the Bank of Scotland in the UK, seeks a trustworthy individual to assist in transferring £15 million from the bank. The funds originally belonged to Mr. John Hughes, a foreign chemical engineer with no known beneficiaries. Since his passing, Dr. Charak has determined that no one is aware of this account or the sum contained within. Desiring to transfer the money to a secure overseas account, Dr. Charak is reaching out for a reliable partner for this significant transaction, emphasizing its authenticity and importance. He requests the recipient's full contact details for smooth communication"


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt3['text'][532] = "Email message object from XDB."
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt3['text'][120]="Two email message objects from XDB."
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_gpt3['text'][85]="Hello, my name is Aishatu Ahmed. Recently, my father was tragically murdered in Tripoli due to conflicts over wealth within our family and friends circle. Many affluent individuals in Libya keep the

In [210]:
merged_df_gpt3 = pd.concat([test_dataset_raw_0_100, test_dataset_raw_gpt3], ignore_index=True)

# Shuffle the index
test_125_df_gpt3 = merged_df_gpt3.sample(frac=1).reset_index(drop=True)

In [211]:
test_encodings_125_gpt3 = tokenizer(test_125_df_gpt3['text'].tolist(), truncation=True, padding=True, max_length=256)

test_dataset_125_gpt3 = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings_125_gpt3),
    test_125_df_gpt3['target'].values
))


In [212]:
test_encodings_125_gpt3_batched = test_dataset_125_gpt3.batch(16)
y_pred_raw_125_gpt3 = model.predict(test_encodings_125_gpt3_batched)

# Convert raw predictions to class predictions
y_pred_125_gpt3 = np.argmax(y_pred_raw_125_gpt3.logits, axis=1)

# Get the actual class labels
y_true_125_gpt3 = []
for features, label in test_dataset_125_gpt3:
    y_true_125_gpt3.append(label.numpy())
y_true_125_gpt3 = np.array(y_true_125_gpt3)


# Generate a classification report
report_125_gpt3 = classification_report(y_true_125_gpt3, y_pred_125_gpt3, target_names=["legitimate", "phishing"],digits=4)
print(report_125_gpt3)

              precision    recall  f1-score   support

  legitimate     0.9697    0.9600    0.9648       100
    phishing     0.8462    0.8800    0.8627        25

    accuracy                         0.9440       125
   macro avg     0.9079    0.9200    0.9138       125
weighted avg     0.9450    0.9440    0.9444       125



# GPT 3.5 random

In [213]:


data3_1 = {
    'text': [
        "Dear Valued [Bank Name] Customer,\n\nDue to recent security updates and to ensure the continued safety of your account, we require you to verify your account information as part of our routine security measures. Your cooperation in this matter is highly appreciated.\n\nTo proceed with the verification process, please click on the link below or copy and paste it into your browser:\n[Phishing Link]\n\nFailure to complete the verification within the next 48 hours may result in a temporary suspension of your account. We apologize for any inconvenience this may cause and appreciate your prompt attention to this matter.\n\nThank you for being a valued customer of [Bank Name].\n\nSincerely,\n[Fake Bank Name] Security Team",

        "Congratulations [Recipient's Name],\n\nYou've been selected as the lucky winner of a special promotion from [Fake Company]. You've won a gift card worth $500 that can be used for any of our products or services. To claim your prize, simply follow the link below and provide your contact details.\n\nClaim Your Prize Now: [Phishing Link]\n\nAct fast, as this offer is time-limited! Please note that by participating, you agree to receive occasional promotional emails from us. Your privacy is important to us, and your information will not be shared.\n\nThank you for choosing [Fake Company]!\n\nBest Regards,\nThe [Fake Company] Team",

        "Dear PayPal User,\n\nWe regret to inform you that we have detected some unusual activity on your PayPal account. In order to ensure the security of your account, we have temporarily limited its functionality.\n\nTo resolve this issue and regain access to your account, please click the following link and follow the instructions to verify your identity and update your account information:\n[Phishing Link]\n\nWe take the security of your information seriously and apologize for any inconvenience this may cause. Your prompt attention to this matter is appreciated.\n\nSincerely,\nPayPal Security Team",


    ]
}

df3_1 = pd.DataFrame(data3_1)




In [214]:


data3_2 = {
    'text': [
        "Hello [Recipient's Name],\n\nWe hope this message finds you well. Are you looking for a flexible work-from-home opportunity that allows you to earn up to $1000 per week? Look no further! We have a limited number of positions available in your area.\n\nOur program offers you the chance to achieve financial independence from the comfort of your own home. To learn more about this incredible opportunity and secure your spot, click on the link below:\n[Phishing Link]\n\nDon't miss out on this chance to change your life. Take the first step toward financial freedom today!\n\nRegards,\nThe [Fake Company] Hiring Team",

        "Dear Taxpayer,\n\nWe would like to inform you that there is an issue with the processing of your tax return. It appears that some of the information you provided is incomplete or incorrect. In order to proceed with the processing, we need you to verify your personal details.\n\nPlease click on the link below to access the secure verification form and update your information:\n[Phishing Link]\n\nYour prompt attention to this matter is crucial to ensure the timely processing of your tax return. Thank you for your cooperation.\n\nSincerely,\nThe IRS Support Team",


    ]
}

df3_2 = pd.DataFrame(data3_2)



In [215]:


data3_3 = {
    'text': [
        "Dear [Your Name],\n\nI hope this message finds you in good health. I am [Your Name], a Human Rights activist from [Your Location]. I am writing to seek your assistance for a mission I am working on, focusing on providing educational materials and healthcare support to underserved communities in our region. The lack of resources is our biggest challenge, and your generous contribution could go a long way to help. I would be extremely grateful for any financial support you can offer.",

        "Dear [Your Name],\n\nI am reaching out to you with a unique and potentially lucrative business proposal. I am looking for a trustworthy individual who can assist me in a profitable business venture. Due to my position as a government official, I am currently unable to move a considerable sum of money to a foreign account. I am requesting your assistance in this operation, promising a sizable percentage of the sum for your cooperation.",

        "Dear [Your Name],\n\nI am writing this email with a heavy heart, burdened by the financial obstacles preventing me from pursuing my dream of a college education. My name is [Your Name], and I was recently accepted into [College/University Name]. Unfortunately, due to my financial circumstances, I am struggling to pay my tuition fees. I humbly ask for your assistance in helping me continue my education.",

        "Hello [Your Name],\n\nI am [Your Name], a friend of [Patient's Name]. Unfortunately, [Patient's Name] has recently been diagnosed with [Disease Name] and the treatment cost is beyond what we can afford. The financial burden is overwhelming and I am asking for your help to raise funds for [Patient's Name]'s medical expenses.",

        "Dear [Your Name],\n\nI am writing on behalf of a Non-Governmental Organization that assists refugees. We are currently facing a severe funding crisis and are unable to extend the much-needed support to the displaced victims. I am reaching out to ask for your help. Your donation will help us provide essential supplies, medical care, and educational resources.",

        "Hello [Your Name],\n\nI am the coach of a local youth sports team, and we're preparing for the upcoming national championship. Unfortunately, we are struggling with a lack of resources to provide the necessary equipment for our young athletes. I am writing to ask for your generous contribution to help us equip these promising athletes.",

        "Dear [Your Name],\n\nI run an animal shelter that provides a safe haven for stray and abandoned animals. We are currently facing a financial crisis and are struggling to provide necessary medical care and shelter for these helpless animals. Your financial support can help us feed, treat, and find homes for these lovely animals.",

        "Dear [Your Name],\n\nAs a researcher at [Your Institute's Name], I am leading a project on [Briefly Describe the Project]. Despite the promising nature of this research, we are currently facing financial constraints that are hindering our progress. I am writing to ask for your financial support to help us continue our groundbreaking work.",

        "Hello [Your Name],\n\nOur charity organization is actively involved in disaster relief work for the victims of the recent [Natural Disaster]. However, due to the scale of the disaster, we are struggling to keep up with the needs of the affected people. Any financial assistance you could provide would greatly help us in our mission.",

        "Dear [Your Name],\n\nI am a local artist working on a community art project. The aim is to use art as a medium to promote cultural awareness and unity in our community. Unfortunately, I lack the necessary funds to bring this project to life. Any financial support from your end would help immensely in executing this project."
    ]
}

df3_3 = pd.DataFrame(data3_3)
print(df)


In [216]:


data3_4 = {
    'text': [
        "Dear [Your Name],\n\nI am writing to inform you about an incredible investment opportunity that promises substantial returns. As an esteemed businessperson, I believe you would appreciate the potential of this venture. By investing in our project, you can secure your financial future and enjoy unprecedented profits. Your involvement would be greatly valued, and I would be happy to provide you with more details.",

        "Hello [Your Name],\n\nI hope this email finds you well. I am [Your Name], a representative of [Fake Charity Name]. Our organization is dedicated to providing humanitarian aid to impoverished communities around the world. We are seeking compassionate individuals like you to contribute to our cause. Your donation will directly impact the lives of those in need by providing them with essential resources and support.",

        "Dear [Your Name],\n\nWe are excited to offer you an exclusive opportunity to join our elite membership program. As a member, you will gain access to unparalleled benefits, including luxury vacations, premium discounts, and personalized services. Act now to secure your spot and elevate your lifestyle to new heights. Click the link below to learn more and sign up for this limited-time offer.",

        "Dear [Your Name],\n\nI am [Your Name], a representative of [Fake Insurance Company Name]. We understand the importance of protecting your assets and loved ones. Our comprehensive insurance policies provide peace of mind and financial security. Contact us today to discuss your insurance needs and receive a customized plan that suits your requirements.",

        "Hello [Your Name],\n\nI am [Your Name], a renowned author, and I am excited to introduce you to my latest book, '[Fake Book Title].' This thought-provoking masterpiece has garnered critical acclaim and has the potential to change lives. By purchasing a copy, you not only support my work but also embark on a journey of self-discovery and enlightenment.",

        "Dear [Your Name],\n\nI am reaching out to you on behalf of [Fake Tech Company Name], a leader in innovative technology solutions. Our cutting-edge products have revolutionized industries and empowered businesses. I invite you to explore our latest offerings and discover how our solutions can enhance efficiency and drive growth for your organization.",

        "Hello [Your Name],\n\nI am [Your Name], a representative of [Fake Survey Company Name]. We are conducting a research study on consumer preferences and would greatly appreciate your input. By participating in our survey, you contribute to valuable market insights and have a chance to win exciting prizes. Click the link below to start the survey and enter the prize draw.",

        "Dear [Your Name],\n\nI am [Your Name], a talent scout for [Fake Talent Agency Name]. We have identified you as a potential star with incredible potential in the entertainment industry. Our agency is dedicated to nurturing emerging talents and providing them with lucrative opportunities. I would love to discuss how we can shape your future success together.",

        "Hello [Your Name],\n\nI am writing to inform you that your account has been selected for a security audit. In order to ensure the safety of your sensitive information, we kindly ask you to update your account credentials. Click the link below to securely update your password and maintain the integrity of your account. Your cooperation is essential in preventing unauthorized access.",

        "Dear [Your Name],\n\nI am [Your Name], a representative of [Fake Event Planning Company Name]. We specialize in creating unforgettable experiences and cherished memories. Whether you are planning a wedding, corporate event, or private celebration, our team of experts is here to bring your vision to life. Contact us today to discuss your event and receive a personalized proposal."
    ]
}

df3_4 = pd.DataFrame(data3_4)



In [217]:
data_gpt3_random = pd.concat([df3_1,df3_2,df3_3,df3_4], ignore_index=True)

In [218]:
data_gpt3_random['target'] = 1

In [219]:
merged_df_gpt3_random = pd.concat([test_dataset_raw_0_100, data_gpt3_random], ignore_index=True)

# Shuffle the index
test_125_df_gpt3_random = merged_df_gpt3_random.sample(frac=1).reset_index(drop=True)

In [220]:
test_encodings_125_gpt3_random = tokenizer(test_125_df_gpt3_random['text'].tolist(), truncation=True, padding=True, max_length=256)

test_dataset_125_gpt3_random = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings_125_gpt3_random),
    test_125_df_gpt3_random['target'].values
))

In [221]:
test_encodings_125_gpt3_random_batched = test_dataset_125_gpt3_random.batch(16)
y_pred_raw_125_gpt3_random = model.predict(test_encodings_125_gpt3_random_batched)

# Convert raw predictions to class predictions
y_pred_125_gpt3_random = np.argmax(y_pred_raw_125_gpt3_random.logits, axis=1)

# Get the actual class labels
y_true_125_gpt3_random = []
for features, label in test_dataset_125_gpt3_random:
    y_true_125_gpt3_random.append(label.numpy())
y_true_125_gpt3_random = np.array(y_true_125_gpt3_random)


# Generate a classification report
report_125_gpt3_random = classification_report(y_true_125_gpt3_random, y_pred_125_gpt3_random, target_names=["legitimate", "phishing"],digits=4)  # replace target_names with your actual class names
print(report_125_gpt3_random)

              precision    recall  f1-score   support

  legitimate     0.8972    0.9600    0.9275       100
    phishing     0.7778    0.5600    0.6512        25

    accuracy                         0.8800       125
   macro avg     0.8375    0.7600    0.7893       125
weighted avg     0.8733    0.8800    0.8723       125



# chatgpt4 random

In [125]:

data1 = {'text': [
    "I hope this message finds you in good health. I am [Your Name], a Human Rights activist from [Your Location]. I am writing to seek your assistance for a mission I am working on, focusing on providing educational materials and healthcare support to underserved communities in our region. The lack of resources is our biggest challenge, and your generous contribution could go a long way to help. I would be extremely grateful for any financial support you can offer.",
    "I am reaching out to you with a unique and potentially lucrative business proposal. I am looking for a trustworthy individual who can assist me in a profitable business venture. Due to my position as a government official, I am currently unable to move a considerable sum of money to a foreign account. I am requesting your assistance in this operation, promising a sizable percentage of the sum for your cooperation.",
    "I am writing this email with a heavy heart, burdened by the financial obstacles preventing me from pursuing my dream of a college education. My name is [Your Name], and I was recently accepted into [College/University Name]. Unfortunately, due to my financial circumstances, I am struggling to pay my tuition fees. I humbly ask for your assistance in helping me continue my education.",
    "I am [Your Name], a friend of [Patient's Name]. Unfortunately, [Patient's Name] has recently been diagnosed with [Disease Name] and the treatment cost is beyond what we can afford. The financial burden is overwhelming and I am asking for your help to raise funds for [Patient's Name]'s medical expenses.",
    "I am writing on behalf of a Non-Governmental Organization that assists refugees. We are currently facing a severe funding crisis and are unable to extend the much-needed support to the displaced victims. I am reaching out to ask for your help. Your donation will help us provide essential supplies, medical care, and educational resources.",
    "I am the coach of a local youth sports team, and we're preparing for the upcoming national championship. Unfortunately, we are struggling with a lack of resources to provide the necessary equipment for our young athletes. I am writing to ask for your generous contribution to help us equip these promising athletes.",
    "I run an animal shelter that provides a safe haven for stray and abandoned animals. We are currently facing a financial crisis and are struggling to provide necessary medical care and shelter for these helpless animals. Your financial support can help us feed, treat, and find homes for these lovely animals.",
    "As a researcher at [Your Institute's Name], I am leading a project on [Briefly Describe the Project]. Despite the promising nature of this research, we are currently facing financial constraints that are hindering our progress. I am writing to ask for your financial support to help us continue our groundbreaking work.",
    "Our charity organization is actively involved in disaster relief work for the victims of the recent [Natural Disaster]. However, due to the scale of the disaster, we are struggling to keep up with the needs of the affected people. Any financial assistance you could provide would greatly help us in our mission.",
    "I am a local artist working on a community art project. The aim is to use art as a medium to promote cultural awareness and unity in our community. Unfortunately, I lack the necessary funds to bring this project to life. Any financial support from your end would help immensely in executing this project."
]}

df1 = pd.DataFrame(data1)

In [126]:
data2 = {'text': [
    "Greetings! I am writing to you from the heart of a small, impoverished school in a rural area. Our school is the only source of education for the children in our community, who display an undeniable eagerness to learn and broaden their horizons. Unfortunately, we lack the basic facilities needed to provide these children with the quality education they deserve. Our classrooms are deteriorating, educational resources are scarce, and our hardworking teachers are struggling with the limited tools at their disposal. We humbly request your generous contributions to help us rebuild our school and create an environment conducive to learning for these deserving children.",
    "Dear friend, this letter finds its way to you with a heavy heart. My name is [Your Name], and I served as a professor at [University Name] for over two decades. Recently, due to a series of unfortunate circumstances, I have found myself grappling with a dire financial situation. From health problems leading to skyrocketing medical bills to the financial market downturn affecting my savings, the road has been challenging. In these difficult times, I find myself turning to the kindness of individuals who may have the means to provide assistance. Any financial help, no matter how small, would go a long way in helping me navigate through these tough times.",
    "Dear Sir/Madam, I am a single parent blessed with three beautiful children. Our world turned upside down when I recently lost my job due to company-wide layoffs as a result of the economic downturn. Since then, our days have been marred by uncertainty and struggles to make ends meet. I am doing my best to provide for my family and keep a roof over our heads, but it is becoming increasingly difficult with each passing day. I am writing to you today, in hope that you might find it in your heart to assist us during this difficult period. Your generous assistance, in any form or amount, will help us greatly.",
    "Hello, I am writing on behalf of our local church, an establishment that has stood as a cornerstone of our community for over a century. Sadly, due to years of wear and tear, the church is in dire need of repairs. The roof is leaking, the paint is peeling, and our heating system is on its last legs. Without sufficient funds, we face the heartbreaking possibility of closing the church. As such, we are seeking donations to help us preserve this important community institution. We believe that with your help, we can restore the church to its former glory and continue serving our community.",
    "Dear friend, I am an independent filmmaker currently working on a project that highlights the effects of climate change on indigenous communities. This documentary aims to bring attention to the often-overlooked victims of our changing environment, showcasing their resilience and struggle in the face of adversity. However, without the necessary funds, it has been challenging to provide this project the justice it deserves. Travel expenses, equipment rental, and post-production costs are proving to be overwhelming. I write to you in hopes of garnering financial support to help complete this crucial project. Any amount, big or small, will bring us closer to our goal.",
]}

df2 = pd.DataFrame(data2)


In [127]:
data3 = {'text': [
    "Greetings! I am writing to you from the heart of a small, impoverished school in a rural area. Our school is the only source of education for the children in our community, who display an undeniable eagerness to learn and broaden their horizons. Unfortunately, we lack the basic facilities needed to provide these children with the quality education they deserve. Our classrooms are deteriorating, educational resources are scarce, and our hardworking teachers are struggling with the limited tools at their disposal. We humbly request your generous contributions to help us rebuild our school and create an environment conducive to learning for these deserving children.",
    "Dear friend, this letter finds its way to you with a heavy heart. My name is [Your Name], and I served as a professor at [University Name] for over two decades. Recently, due to a series of unfortunate circumstances, I have found myself grappling with a dire financial situation. From health problems leading to skyrocketing medical bills to the financial market downturn affecting my savings, the road has been challenging. In these difficult times, I find myself turning to the kindness of individuals who may have the means to provide assistance. Any financial help, no matter how small, would go a long way in helping me navigate through these tough times.",
    "Dear Sir/Madam, I am a single parent blessed with three beautiful children. Our world turned upside down when I recently lost my job due to company-wide layoffs as a result of the economic downturn. Since then, our days have been marred by uncertainty and struggles to make ends meet. I am doing my best to provide for my family and keep a roof over our heads, but it is becoming increasingly difficult with each passing day. I am writing to you today, in hope that you might find it in your heart to assist us during this difficult period. Your generous assistance, in any form or amount, will help us greatly.",
    "Hello, I am writing on behalf of our local church, an establishment that has stood as a cornerstone of our community for over a century. Sadly, due to years of wear and tear, the church is in dire need of repairs. The roof is leaking, the paint is peeling, and our heating system is on its last legs. Without sufficient funds, we face the heartbreaking possibility of closing the church. As such, we are seeking donations to help us preserve this important community institution. We believe that with your help, we can restore the church to its former glory and continue serving our community.",
    "Dear friend, I am an independent filmmaker currently working on a project that highlights the effects of climate change on indigenous communities. This documentary aims to bring attention to the often-overlooked victims of our changing environment, showcasing their resilience and struggle in the face of adversity. However, without the necessary funds, it has been challenging to provide this project the justice it deserves. Travel expenses, equipment rental, and post-production costs are proving to be overwhelming. I write to you in hopes of garnering financial support to help complete this crucial project. Any amount, big or small, will bring us closer to our goal.",
    "Hello, my name is [Your Name] and I am an aspiring entrepreneur. I have developed an innovative product that I believe has the potential to make a significant impact in the [industry name]. I have devoted a great deal of time and personal savings into this project, but I now find myself at a standstill due to insufficient funding. I'm writing to request financial assistance to push this product to market. Your support would greatly help in covering manufacturing, marketing, and distribution costs. I sincerely believe in the potential of this product and I hope you would consider investing in its success.",
    "Dear friend, I am a researcher at [Institution Name] and our team has been working tirelessly to find solutions for environmental challenges. However, scientific research often requires substantial funding. I'm writing to ask for your support. Your generous contribution would allow us to invest in essential equipment, expand our research team, and fund testing and experiments. Every donation brings us one step closer to realizing our goals for a cleaner, more sustainable world.",
    "Hello, my name is [Your Name], and I am a struggling artist. I've dedicated my life to creating art that communicates the human experience, evokes emotion, and sparks thought. Unfortunately, art supplies and exhibition expenses are cost-prohibitive, often creating a barrier to my creative process. I'm writing to kindly ask for your financial assistance, which would provide much-needed support for purchasing materials and showcasing my work. Your generosity would significantly impact my ability to continue creating and sharing my art.",
    "Dear Sir/Madam, I am a recent graduate carrying a substantial student loan. While I am actively seeking employment, my financial situation is precarious at best. Until I can secure a stable job, I am struggling to manage my day-to-day expenses and loan repayments. I am writing to ask for your help during this difficult time. Any assistance, financial or otherwise, would greatly alleviate the burden I am currently facing.",
    "Hello, I am writing to you on behalf of a local animal shelter. We provide a safe haven for abused, abandoned, and stray animals, but operating costs are high and our resources are dwindling. From food and medical supplies to maintenance and staffing, every aspect of running the shelter needs funding. If you are in a position to help, we would greatly appreciate your support. Your donation could give an innocent animal a second chance at life."
]}

df3 = pd.DataFrame(data3)


In [128]:
data_gpt4_random = pd.concat([df1,df2,df3], ignore_index=True)

In [129]:
data_gpt4_random['target'] = 1


In [130]:
merged_df_gpt4_random = pd.concat([test_dataset_raw_0_100, data_gpt4_random], ignore_index=True)

# Shuffle the index
test_125_df_gpt4_random = merged_df_gpt4_random.sample(frac=1).reset_index(drop=True)

In [131]:
test_125_df_gpt4_random.head()

Unnamed: 0,text,target
0,hello guysim bugging you completed questionnai...,0
1,forwarded request joe hillings forwarded lora ...,0
2,not theorybut highly confidential jeff islikel...,0
3,mikehere draft version ca enron km power co pl...,0
4,please find attached weekly report ets optimiz...,0


In [132]:
test_encodings_125_gpt4_random = tokenizer(test_125_df_gpt4_random['text'].tolist(), truncation=True, padding=True, max_length=256)

test_dataset_125_gpt4_random = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings_125_gpt4_random),
    test_125_df_gpt4_random['target'].values
))


In [133]:
test_encodings_125_gpt4_random_batched = test_dataset_125_gpt4_random.batch(16)
y_pred_raw_125_gpt4_random = model.predict(test_encodings_125_gpt4_random_batched)

# Convert raw predictions to class predictions
y_pred_125_gpt4_random = np.argmax(y_pred_raw_125_gpt4_random.logits, axis=1)

# Get the actual class labels
y_true_125_gpt4_random = []
for features, label in test_dataset_125_gpt4_random:
    y_true_125_gpt4_random.append(label.numpy())
y_true_125_gpt4_random = np.array(y_true_125_gpt4_random)


# Generate a classification report
report_125_gpt4_random = classification_report(y_true_125_gpt4_random, y_pred_125_gpt4_random, target_names=["legitimate", "phishing"],digits=4)  # replace target_names with your actual class names
print(report_125_gpt4_random)

              precision    recall  f1-score   support

  legitimate     0.8276    0.9600    0.8889       100
    phishing     0.5556    0.2000    0.2941        25

    accuracy                         0.8080       125
   macro avg     0.6916    0.5800    0.5915       125
weighted avg     0.7732    0.8080    0.7699       125



In [232]:
data5_1 = {
    'text': [
        "Dear Valued [Bank Name] Customer,We're committed to protecting your account. Due to recent security updates, we've noticed some inconsistencies with your login credentials. For your safety, we require all users to undergo a quick account verification. Click the link below to start the process. If you don't verify within 24 hours, we might have to suspend your account temporarily as a precautionary measure. We understand this might be an inconvenience, but your security is our top priority.",
        "Congratulations [Recipient's Name],Great news! You've been exclusively selected as the winner of our grand monthly giveaway. This means you're now eligible to claim a whopping $1,000 prize. Don't miss out on this limited-time offer, as slots fill up quickly. To get started, simply click the link below and follow the instructions. Hurry, before the offer runs out!",
        "Dear PayPal User,Our team is continuously monitoring PayPal accounts for any unusual activities. Regrettably, we have detected some suspicious transactions on your account. As part of our protocol, we require you to verify your recent transactions. To help us secure your account, please log in via the link below and follow the necessary steps to confirm your identity. Failure to do so might result in temporary limitations to your account functionality.",
        "Dear [Recipient's Name],We pride ourselves on the security of our user accounts. Our system recently detected an unusual login attempt to your account from an unrecognized device on [Date]. If you did not authorize this, it could mean someone tried accessing your account. Click the link below to secure your account and reset your password. Remember, never share your password with anyone.",
        "Dear User,During our regular account maintenance, we noticed that your registered email address hasn't been verified yet. This is a crucial step in ensuring uninterrupted service. Confirm your email address by clicking the link below. This will also enhance the security of your account and help in faster recovery in case you ever forget your credentials.",
        "Hello [Recipient's Name],Based on your last year's financial activities, our records indicate that you are eligible for a tax refund of $500. To process this, you need to confirm some personal details. Click the link below to fill out the necessary form and claim your refund. Act now, or you might miss out on your deserved refund.",
        "Dear [Recipient's Name],Thank you for your recent purchase on Amazon. Your order, comprising of a [Product Name], has been confirmed, and the total charge is $299.99. If you believe there's been a mistake and you did not authorize this purchase, please click the link below immediately to get in touch with our support team and prevent potential fraudulent activity.",
        "Hello,A new voicemail was left on your account at 4:32 PM today. Given the importance of the message, we urge you to listen to it at the earliest. Simply click the link below to access the voicemail. Please note that for security reasons, the link will expire in 24 hours.",
        "Dear User, A trusted contact, [Contact's Name], has shared a crucial document with you via Google Docs. We believe this could be of utmost importance. Click the link below to access the document. For the safety and privacy of both parties involved, we request you not to forward this email or the document link to anyone else.",
        "Dear [Service Name] User,We've encountered an issue with your recent payment. Our system was unable to process it due to some inconsistencies with the provided payment details. To ensure uninterrupted service, kindly click the link below to update your payment information. Remember, timely action can prevent potential service lags and disruptions."
    ]
}

df5_1 = pd.DataFrame(data5_1)



In [233]:
data5_2 = {
    'text': [
        "Dear [Recipient's Name],We've noticed that your Microsoft Office 365 account hasn't been updated to our latest security protocol. In order to maintain uninterrupted access to your services and enhance your security, please click the link below and follow the instructions to update your account. Failure to do so within 72 hours might result in reduced functionalities.",
        "Hello Valued Customer,As a routine check, our team has identified that your email storage has reached its maximum limit. To avoid bouncing back of incoming messages, it is essential that you extend your storage limit. Click the link below to purchase additional storage and ensure a smooth email experience.",
        "Dear [Recipient's Name],We are excited to announce our special anniversary giveaway! As a token of our gratitude for your continued loyalty, you've been chosen to take part in our exclusive sweepstakes. Click the link below for a chance to win exciting prizes, including a brand new iPhone! Act now, the offer is for a limited period.",
        "Hello,We regret to inform you that your credit card transaction on our website faced an error. To avoid any disruption and rectify this, we need you to confirm your payment details. Click the link below to provide the necessary information. Your prompt action is highly appreciated.",
        "Dear User,We've detected an unauthorized login to your Facebook account from an unfamiliar location. For your security, we've temporarily locked your account. To regain access, click the link below and follow the steps to verify your identity. Remember, it's crucial to act swiftly to prevent any potential misuse.",
        "Hello,Your iCloud account shows that a new device has been added. If you did not authorize this change, it's crucial to secure your account immediately. Click the link below to verify your devices and, if needed, change your password. Your digital safety is our priority.",
        "Dear [Recipient's Name],We are updating our database and wish to validate all user accounts to better tailor our services. Please click the link below to confirm your account details. Users who fail to confirm their details by the given deadline might experience service interruptions.",
        "Greetings,We noticed you haven't been using your Dropbox account recently. To keep your account active and avoid deletion due to inactivity, please log in by clicking the link below and accessing some of your stored files. Remember, active usage ensures the continued safety of your files.",
        "Dear [Service Name] Subscriber,Our records indicate that your subscription is about to expire. To continue enjoying our premium benefits, please renew your subscription by clicking the link below. Early renewals might be eligible for special discounts!",
        "Hello [Recipient's Name],We noticed an attempt to change the password for your Gmail account. If you did not request this change, please click the link below immediately to secure your account. Regular checks and timely actions are crucial for maintaining the integrity of your personal information."
    ]
}

df5_2 = pd.DataFrame(data5_2)



In [234]:
data5_3 = {
    'text': [
        "Dear Amazon Shopper,During our regular account verification, we found a discrepancy in your delivery details. To ensure uninterrupted service and timely deliveries, please update your shipping address by clicking the link below. Remember, keeping your details up-to-date ensures a seamless shopping experience.",
        "Hello [Recipient's Name],Thank you for participating in our recent survey. Based on your responses, you qualify for a $100 gift voucher from our partner brands! Claim your reward by clicking the link below. Hurry, this offer is time-sensitiv",
        "Dear [Recipient's Name],We noticed multiple failed login attempts on your Netflix account. For your protection, we've temporarily locked your account. Click the link below to validate your identity and reset your password. Ensure you use strong passwords for enhanced safety.",
        "Greetings,You're receiving this because you've been selected to participate in our exclusive beta testing for the new Google features. Experience the future of online search before anyone else! Click the link below to join the program and provide us with invaluable feedback.",
        "Hello [Recipient's Name],We're updating our user agreement. To continue using our services, you must agree to the new terms and conditions. Click the link below to review and accept the changes. Users who don't accept the updated terms may face service limitations."
    ]
}

df5_3 = pd.DataFrame(data5_3)



In [235]:
data_gpt5_random = pd.concat([df5_1,df5_2,df5_3], ignore_index=True)

In [236]:
data_gpt5_random['target'] = 1


In [237]:
data_gpt5_random.shape

(25, 2)

In [242]:
merged_df_gpt5_random = pd.concat([test_dataset_raw_0_100, data_gpt5_random], ignore_index=True)

# Shuffle the index
test_125_df_gpt5_random = merged_df_gpt5_random.sample(frac=1).reset_index(drop=True)

In [243]:
test_125_df_gpt5_random

Unnamed: 0,text,target
0,"Dear User,During our regular account maintenan...",1
1,"Dear [Recipient's Name],We've noticed that you...",1
2,"Greetings,We noticed you haven't been using yo...",1
3,mike i already seen resume i know gas market a...,0
4,jeffmy name tom mashington im construction pro...,0
...,...,...
120,i proud owner doors even door handles andloc...,0
121,kay london eventregardsdelainey forwarded davi...,0
122,fyi michelle forwarded michelle cashhouect a...,0
123,business highlightsenron industrial marketsth...,0


In [244]:
test_encodings_125_gpt5_random = tokenizer(test_125_df_gpt5_random['text'].tolist(), truncation=True, padding=True, max_length=256)

test_dataset_125_gpt5_random = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings_125_gpt5_random),
    test_125_df_gpt5_random['target'].values
))

In [245]:
test_encodings_125_gpt5_random_batched = test_dataset_125_gpt5_random.batch(16)
y_pred_raw_125_gpt5_random = model.predict(test_encodings_125_gpt5_random_batched)

# Convert raw predictions to class predictions
y_pred_125_gpt5_random = np.argmax(y_pred_raw_125_gpt5_random.logits, axis=1)

# Get the actual class labels
y_true_125_gpt5_random = []
for features, label in test_dataset_125_gpt5_random:
    y_true_125_gpt5_random.append(label.numpy())
y_true_125_gpt5_random = np.array(y_true_125_gpt5_random)


# Generate a classification report
report_125_gpt5_random = classification_report(y_true_125_gpt5_random, y_pred_125_gpt5_random, target_names=["legitimate", "phishing"],digits=4)  # replace target_names with your actual class names
print(report_125_gpt5_random)

              precision    recall  f1-score   support

  legitimate     0.7934    0.9600    0.8688       100
    phishing     0.0000    0.0000    0.0000        25

    accuracy                         0.7680       125
   macro avg     0.3967    0.4800    0.4344       125
weighted avg     0.6347    0.7680    0.6950       125

