In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd

In [None]:
url = 'https://docs.google.com/spreadsheets/d/1nvbMXbLLjQDSz3URI1lfJxT5Scghm_ar_lOiTE_K0XE/export?format=csv'
df = pd.read_csv(url)

In [None]:
df['timestamp'] = pd.to_datetime(df['timestamp']).astype(int) / 10**9

In [None]:
df_encoded = pd.get_dummies(df, columns=['transaction_type', 'location_region', 'ip_prefix', 'purchase_pattern', 'age_group'])

In [None]:
df_encoded['transaction_type_suspicious'] = df_encoded['transaction_type_phishing'] | df_encoded['transaction_type_scam']

In [None]:
df_encoded.head()

Unnamed: 0,timestamp,hour_of_day,sending_address,receiving_address,amount,login_frequency,session_duration,risk_score,anomaly,transaction_type_phishing,...,ip_prefix_172.16,ip_prefix_192.0,ip_prefix_192.168,purchase_pattern_focused,purchase_pattern_high_value,purchase_pattern_random,age_group_established,age_group_new,age_group_veteran,transaction_type_suspicious
0,1649681000.0,12,0x9d32d0bf2c00f41ce7ca01b66e174cc4dcb0c1da,0x39f82e1c09bc6d7baccc1e79e5621ff812f50572,796.949206,3,48,18.75,low_risk,False,...,False,True,False,True,False,False,True,False,False,False
1,1655234000.0,19,0xd6e251c23cbf52dbd472f079147873e655d8096f,0x51e8fbe24f124e0e30a614e14401b9bbfed5384c,0.01,5,61,25.0,low_risk,False,...,False,False,False,True,False,False,True,False,False,False
2,1642523000.0,16,0x2e0925b922fed01f6a85d213ae2718f54b8ca305,0x52c7911879f783d590af45bda0c0ef2b8536706f,778.19739,3,74,31.25,low_risk,False,...,False,False,True,True,False,False,True,False,False,False
3,1655285000.0,9,0x93efefc25fcaf31d7695f28018d7a11ece55457f,0x8ac3b7bd531b3a833032f07d4e47c7af6ea7bace,300.838358,8,111,36.75,low_risk,False,...,False,False,False,False,True,False,False,False,True,False
4,1645195000.0,14,0xad3b8de45d63f5cce28aef9a82cf30c397c6ceb9,0x6fdc047c2391615b3facd79b4588c7e9106e49f2,775.569344,6,100,62.5,moderate_risk,False,...,True,False,False,False,True,False,False,False,True,False


In [None]:
risk_mapping = {'high_risk': -1, 'moderate_risk': 0, 'low_risk': 1}
df['anomaly_encoded'] = df['anomaly'].map(risk_mapping)

In [None]:
df.head()

Unnamed: 0,timestamp,hour_of_day,sending_address,receiving_address,amount,transaction_type,location_region,ip_prefix,login_frequency,session_duration,purchase_pattern,age_group,risk_score,anomaly,anomaly_encoded
0,1649681000.0,12,0x9d32d0bf2c00f41ce7ca01b66e174cc4dcb0c1da,0x39f82e1c09bc6d7baccc1e79e5621ff812f50572,796.949206,transfer,Europe,192.0,3,48,focused,established,18.75,low_risk,1
1,1655234000.0,19,0xd6e251c23cbf52dbd472f079147873e655d8096f,0x51e8fbe24f124e0e30a614e14401b9bbfed5384c,0.01,purchase,South America,172.0,5,61,focused,established,25.0,low_risk,1
2,1642523000.0,16,0x2e0925b922fed01f6a85d213ae2718f54b8ca305,0x52c7911879f783d590af45bda0c0ef2b8536706f,778.19739,purchase,Asia,192.168,3,74,focused,established,31.25,low_risk,1
3,1655285000.0,9,0x93efefc25fcaf31d7695f28018d7a11ece55457f,0x8ac3b7bd531b3a833032f07d4e47c7af6ea7bace,300.838358,transfer,South America,172.0,8,111,high_value,veteran,36.75,low_risk,1
4,1645195000.0,14,0xad3b8de45d63f5cce28aef9a82cf30c397c6ceb9,0x6fdc047c2391615b3facd79b4588c7e9106e49f2,775.569344,sale,Africa,172.16,6,100,high_value,veteran,62.5,moderate_risk,0


**Anomaly detection and fraud analysis in blockchain transactions**

In [None]:
features = ['timestamp', 'hour_of_day', 'amount', 'login_frequency', 'session_duration', 'risk_score','transaction_type_suspicious']

In [None]:
from sklearn.ensemble import IsolationForest

model = IsolationForest()
model.fit(df_encoded[features])  # 'features' contains the selected features

# Predict anomaly scores
anomaly_scores = model.predict(df_encoded[features])

In [None]:
y_true = df['anomaly_encoded']

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

precision = precision_score(y_true, anomaly_scores, average='weighted')
recall = recall_score(y_true, anomaly_scores, average='weighted')
f1 = f1_score(y_true, anomaly_scores, average='weighted')

print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Precision: 0.8048728264303386
Recall: 0.6142493638676845
F1-score: 0.6608591137224099


  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

y_pred_binary = (anomaly_scores == -1).astype(int)

# Extract true labels for high-risk anomalies
y_true_high_risk = (df['anomaly_encoded'] == -1).astype(int)

precision = precision_score(y_true_high_risk, y_pred_binary)
recall = recall_score(y_true_high_risk, y_pred_binary)
f1 = f1_score(y_true_high_risk, y_pred_binary)

print("Precision (High Risk):", precision)
print("Recall (High Risk):", recall)
print("F1-score (High Risk):", f1)

Precision (High Risk): 0.18108062897290064
Recall (High Risk): 1.0
F1-score (High Risk): 0.3066355074003258


In [None]:
y_true_low_risk = (df['anomaly_encoded'] == 1).astype(int)

# Calculate metrics for low-risk anomalies
precision_low = precision_score(y_true_low_risk, y_pred_binary)
recall_low = recall_score(y_true_low_risk, y_pred_binary)
f1_low = f1_score(y_true_low_risk, y_pred_binary)

print("Precision (Low Risk):", precision_low)
print("Recall (Low Risk):", recall_low)
print("F1-score (Low Risk):", f1_low)


Precision (Low Risk): 0.6052470168395226
Recall (Low Risk): 0.34190632185718334
F1-score (Low Risk): 0.4369678549143535


In [None]:
y_true_moderate_risk = (df['anomaly_encoded'] == 0).astype(int)

# Calculate metrics for moderate-risk anomalies
precision_moderate = precision_score(y_true_moderate_risk, y_pred_binary)
recall_moderate = recall_score(y_true_moderate_risk, y_pred_binary)
f1_moderate = f1_score(y_true_moderate_risk, y_pred_binary)

print("Precision (Moderate Risk):", precision_moderate)
print("Recall (Moderate Risk):", recall_moderate)
print("F1-score (Moderate Risk):", f1_moderate)

Precision (Moderate Risk): 0.21367235418757666
Recall (Moderate Risk): 0.8900243874114505
F1-score (Moderate Risk): 0.3446120641201466
