In [2]:
import pandas as pd

df = pd.read_csv("deanonymous_score_1000.csv")

df["confidence"] = 0.5
df["trustiness"] = 0.5
df["reliability"] = 0.7

max_iterations = 10000
epsilon = 0.01

for iteration in range(max_iterations):
    df["trustiness_prev"] = df["trustiness"]
    df["reliability_prev"] = df["reliability"]
    df["confidence_prev"] = df["confidence"]
    df["weighted_score"] = df["deanonymous"] * df["confidence"]
    
    trustiness_updates = df.groupby("to")["weighted_score"].mean()
    df["trustiness"] = df["to"].map(trustiness_updates)

    reliability_updates = df.groupby("from")["confidence"].mean()
    df["reliability"] = df["from"].map(reliability_updates)

    df["trustiness"] = df["trustiness"].clip(0, 1)
    df["reliability"] = df["reliability"].clip(0, 1)

    df["confidence"] = (df["reliability"] + (1 - abs(df["deanonymous"] - df["trustiness"]))) / 2

    df["confidence"] = df["confidence"].clip(0, 1)

    delta_t = (df["trustiness"] - df["trustiness_prev"]).abs().sum()
    delta_r = (df["reliability"] - df["reliability_prev"]).abs().sum()
    delta_c = (df["confidence"] - df["confidence_prev"]).abs().sum()
    delta = max(delta_t, delta_r, delta_c)

    print(f"Iteration {iteration + 1}: ΔT = {delta_t:.4f}, ΔR = {delta_r:.4f}, ΔC = {delta_c:.4f}, Δ = {delta:.4f}")

    if delta < epsilon:
        print("Convergence achieved!")
        break

df["risk"] = (1 - df["reliability"]) * 10

Iteration 1: ΔT = 1151342.5437, ΔR = 827292.8000, ΔC = 552698.4254, Δ = 1151342.5437
Iteration 2: ΔT = 85673.6954, ΔR = 542133.4258, ΔC = 304144.6588, Δ = 542133.4258
Iteration 3: ΔT = 73682.5238, ΔR = 303885.3980, ΔC = 181713.1775, Δ = 303885.3980
Iteration 4: ΔT = 56609.9787, ΔR = 181581.0003, ΔC = 114706.5829, Δ = 181581.0003
Iteration 5: ΔT = 43699.3641, ΔR = 114625.6661, ΔC = 76680.3169, Δ = 114625.6661
Iteration 6: ΔT = 34554.6571, ΔR = 76612.6544, ΔC = 54304.2610, Δ = 76612.6544
Iteration 7: ΔT = 28007.9492, ΔR = 54248.7343, ΔC = 40492.1121, Δ = 54248.7343
Iteration 8: ΔT = 23211.5023, ΔR = 40448.9041, ΔC = 31510.1575, Δ = 40448.9041
Iteration 9: ΔT = 19609.9650, ΔR = 31475.9495, ΔC = 25385.1840, Δ = 31475.9495
Iteration 10: ΔT = 16836.9637, ΔR = 25354.3857, ΔC = 21013.9680, Δ = 25354.3857
Iteration 11: ΔT = 14654.1446, ΔR = 20987.5867, ΔC = 17774.9074, Δ = 20987.5867
Iteration 12: ΔT = 12897.0723, ΔR = 17754.8228, ΔC = 15297.2975, Δ = 17754.8228
Iteration 13: ΔT = 11456.4753, Δ

In [None]:
risk_df = df.groupby("from")["risk"].mean().reset_index()

addresses = pd.read_csv("labeled_addresses.csv")
addresses = addresses[addresses["label"] == 1]["Address"]

merged_df = pd.merge(
    addresses.to_frame(name="address"),
    risk_df.rename(columns={"from": "address"}),
    on="address",
    how="left"
)

In [4]:
len(merged_df[merged_df['risk'] < 6])

78

In [5]:
len(merged_df[merged_df['risk'] >= 6])

157

In [44]:
len(merged_df[merged_df['risk'].isna()])

8

In [None]:
risk_df = df.groupby("from")["risk"].mean().reset_index()

addresses = pd.read_csv("labeled_addresses.csv")
addresses = addresses[addresses["label"] == 0]["Address"]

merged_df = pd.merge(
    addresses.to_frame(name="address"),
    risk_df.rename(columns={"from": "address"}),
    on="address",
    how="left"
)

In [7]:
len(merged_df[merged_df['risk'] < 6])

513

In [8]:
len(merged_df[merged_df['risk'] >= 6])

23

In [40]:
len(merged_df[merged_df['risk'].isna()])

24