# Setup

In [1]:
import pandas as pd
import numpy as np
from scipy.stats import fisher_exact

# Read the data
df = pd.read_csv("hack_history.csv")
df

Unnamed: 0,Project name,Number of times hacked,Total hacked value
0,Ronin Network,3,641000000.0
1,Poly Network,3,621400000.0
2,BNB Bridge,1,586000000.0
3,FTX,2,927000000.0
4,Wormhole,1,326000000.0
...,...,...,...
257,Lendf.me,1,1200000.0
258,Trinity Wallet,1,2350000.0
259,Gate.io,1,235000000.0
260,Parity Multisig,1,150000000.0


In [4]:
df['Number of times hacked'].sum()

295

# Fisher's Exact Test (with Haldane-Anscombe correction)

The Fisher’s Exact Test’s limitation in handling zeros in contingency tables (leading to an odds ratio of 0) doesn’t negate the overall conclusion from the relative risk and p-value, which indicate a significant relationship.

In [2]:


# Calculate the numbers for our contingency table
hacked_once_or_more = df[df['Number of times hacked'] >= 1].shape[0]
never_hacked = 4064 - hacked_once_or_more
hacked_multiple = df[df['Number of times hacked'] > 1].shape[0]
hacked_once = hacked_once_or_more - hacked_multiple

# Create the contingency table
contingency_table = np.array([[hacked_once, hacked_multiple],
                              [never_hacked, 0]]) + 0.5  # Haldane-Anscombe Correction

print("Contingency Table:")
print(pd.DataFrame(contingency_table, 
                   index=['Hacked Before', 'Never Hacked'], 
                   columns=['Not Hacked Again', 'Hacked Again']))

# Perform Fisher's Exact Test
odds_ratio, p_value = fisher_exact(contingency_table)

print(f"\np-value: {p_value}")

# Interpret the results
alpha = 0.05  # Significance level
if p_value < alpha:
    print("Reject the null hypothesis. There is a significant association between being hacked before and being hacked again.")
else:
    print("Fail to reject the null hypothesis. No significant evidence of an association between being hacked before and being hacked again.")

print(f"\nOdds ratio: {odds_ratio}")

# Calculate probabilities
prob_hacked_again = hacked_multiple / hacked_once_or_more
prob_hacked_first_time = hacked_once_or_more / 4064

print(f"\nProbability of being hacked again if previously hacked: {prob_hacked_again:.4f}")
print(f"Probability of being hacked for the first time: {prob_hacked_first_time:.4f}")

# Calculate relative risk
relative_risk = prob_hacked_again / prob_hacked_first_time

print(f"\nRelative Risk: {relative_risk:.4f}")
print("This means that projects that have been hacked before are {:.2f} times ".format(relative_risk) +
      "more likely to be hacked again compared to projects that have never been hacked.")

# Additional context
print(f"\nTotal number of projects: 4064")
print(f"Number of projects hacked at least once: {hacked_once_or_more}")
print(f"Number of projects hacked multiple times: {hacked_multiple}")
print(f"Percentage of hacked projects that were hacked multiple times: {(hacked_multiple/hacked_once_or_more)*100:.2f}%")

Contingency Table:
               Not Hacked Again  Hacked Again
Hacked Before             235.5          27.5
Never Hacked             3802.5           0.5

p-value: 1.9385706475142455e-33
Reject the null hypothesis. There is a significant association between being hacked before and being hacked again.

Odds ratio: 0.0

Probability of being hacked again if previously hacked: 0.1031
Probability of being hacked for the first time: 0.0645

Relative Risk: 1.5985
This means that projects that have been hacked before are 1.60 times more likely to be hacked again compared to projects that have never been hacked.

Total number of projects: 4064
Number of projects hacked at least once: 262
Number of projects hacked multiple times: 27
Percentage of hacked projects that were hacked multiple times: 10.31%


# Markov chain

In [3]:
import numpy as np
import pandas as pd

# Assume we have the data in a pandas DataFrame called 'df'
# with a column 'Times Hacked' indicating how many times each project was hacked

# Define our states
S0 = "Never Hacked"
S1 = "Hacked Once"
S2 = "Hacked Multiple Times"

# Calculate the number of projects in each state
n_S1 = df[df['Number of times hacked'] == 1].shape[0]
n_S2 = df[df['Number of times hacked'] > 1].shape[0]
n_S0 = 4064 - n_S1 - n_S2

total_projects = n_S0 + n_S1 + n_S2

# Calculate transition probabilities
p_S0_to_S1 = n_S1 / total_projects
p_S1_to_S2 = n_S2 / (n_S1 + n_S2)
p_S2_to_S2 = 1  # Once in S2, always in S2 in this model

# Create transition matrix
P = np.array([
    [1 - p_S0_to_S1, p_S0_to_S1, 0],
    [0, 1 - p_S1_to_S2, p_S1_to_S2],
    [0, 0, 1]
])

# Print transition matrix
print("Transition Matrix:")
print(pd.DataFrame(P, index=[S0, S1, S2], columns=[S0, S1, S2]))

# Calculate steady state
def steady_state(P, num_iterations=1000):
    state = np.array([1, 0, 0])  # Start with all projects in S0
    for _ in range(num_iterations):
        state = np.dot(state, P)
    return state

steady_state = steady_state(P)

print("\nSteady State Distribution:")
print(pd.DataFrame([steady_state], columns=[S0, S1, S2]))

# Compare probabilities
print(f"\nProbability of transitioning from Never Hacked to Hacked Once: {p_S0_to_S1:.4f}")
print(f"Probability of transitioning from Hacked Once to Hacked Multiple Times: {p_S1_to_S2:.4f}")

if p_S1_to_S2 > p_S0_to_S1:
    print("\nProjects that have been hacked once are more likely to be hacked again " +
          f"compared to projects that have never been hacked (by a factor of {p_S1_to_S2/p_S0_to_S1:.2f}).")
else:
    print("\nProjects that have been hacked once are not more likely to be hacked again " +
          "compared to projects that have never been hacked.")

# Additional context
print(f"\nTotal number of projects: {total_projects}")
print(f"Number of projects never hacked (S0): {n_S0}")
print(f"Number of projects hacked once (S1): {n_S1}")
print(f"Number of projects hacked multiple times (S2): {n_S2}")

Transition Matrix:
                       Never Hacked  Hacked Once  Hacked Multiple Times
Never Hacked               0.942175     0.057825               0.000000
Hacked Once                0.000000     0.896947               0.103053
Hacked Multiple Times      0.000000     0.000000               1.000000

Steady State Distribution:
   Never Hacked   Hacked Once  Hacked Multiple Times
0  1.354151e-26  1.731282e-26                    1.0

Probability of transitioning from Never Hacked to Hacked Once: 0.0578
Probability of transitioning from Hacked Once to Hacked Multiple Times: 0.1031

Projects that have been hacked once are more likely to be hacked again compared to projects that have never been hacked (by a factor of 1.78).

Total number of projects: 4064
Number of projects never hacked (S0): 3802
Number of projects hacked once (S1): 235
Number of projects hacked multiple times (S2): 27


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=5871b7c8-5436-440c-b4b2-2c2867d8b1c9' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>