In [1]:
import pandas as pd
import numpy as np

# Load data
df = pd.read_csv('deanonymous_score_default.csv')

# Define clipping values
clip_values = [100, 1000, 10000]

# Iterate over clipping values and calculate deanonymous score
for clip_value in clip_values:
    # Clip the values of outTxn and inTxn
    df["outTxn_clipped"] = df["outTxn"].clip(upper=clip_value)
    df["inTxn_clipped"] = df["inTxn"].clip(upper=clip_value)

    # Compute max values based on clipping
    max_outTxn = df["outTxn_clipped"].max()
    max_inTxn = df["inTxn_clipped"].max()

    # Calculate deanonymous score
    u = (2 * np.log10(df["outTxn_clipped"].replace(0, 1)) - np.log10(max_outTxn)) / np.log10(max_outTxn)
    v = (2 * np.log10(df["inTxn_clipped"].replace(0, 1)) - np.log10(max_inTxn)) / np.log10(max_inTxn)

    df["deanonymous"] = 0.5 * (u + v)

    # Save to file
    filename = f'deanonymous_score_{clip_value}.csv'
    df[["outTxn", "inTxn", "deanonymous"]].to_csv(filename, index=False)

print("Files saved successfully: deanonymous_score_100.csv, deanonymous_score_1000.csv, deanonymous_score_10000.csv")

Files saved successfully: deanonymous_score_100.csv, deanonymous_score_1000.csv, deanonymous_score_10000.csv
