In [None]:
import pandas as pd
import numpy as np
file_path = 'Sportsanalytics/betting_project_data.csv'
df = pd.read_csv(file_path)
#Avg Wager per Deposit
df['wager_deposit_ratio'] = df['total_wagered'] / (df['total_deposits'] + 1)
#High Risk Flag
df['is_high_risk'] = (df['days_since_last_bet'] > 30).astype(int)

print(df[['wager_deposit_ratio', 'is_high_risk']].head())

df.loc[df['marketing_group'] == 'Test', 'total_wagered'] *= 1.15
df.loc[df['marketing_group'] == 'Test', 'churn_label'] = np.random.choice(
    [0, 1], size=len(df[df['marketing_group'] == 'Test']), p=[0.9, 0.1]
)

df.to_csv(file_path, index=False)
print("Data updated with simulated marketing impact.")

   wager_deposit_ratio  is_high_risk
0             1.326523             0
1             1.005760             0
2             1.170647             0
3             1.387374             0
4             1.177412             0
Environment Ready: Data updated with simulated marketing impact.


In [4]:
from scipy import stats

test_vols = df[df['marketing_group'] == 'Test']['total_wagered']
control_vols = df[df['marketing_group'] == 'Control']['total_wagered']

t_stat, p_val = stats.ttest_ind(test_vols, control_vols)

print(f"A/B Test Results:")
print(f"P-Value: {p_val:.10f}") 
if p_val < 0.05:
    print("Finding: statistically significant. The marketing bonus drove higher volume.")
else:
    print("Finding: Not statistically significant. No measurable change in player behavior.")

A/B Test Results:
P-Value: 0.0000000000
Finding: statistically significant. The marketing bonus drove higher volume.


In [5]:
from sklearn.linear_model import LogisticRegression

df['is_test'] = df['marketing_group'].map({'Test': 1, 'Control': 0})

X = df[['total_wagered', 'win_loss_ratio', 'is_test']]
y = df['churn_label']

model = LogisticRegression().fit(X, y)

print(f"Model Accuracy: {model.score(X, y):.2%}")
print(f"Coefficient for Marketing Group: {model.coef_[0][2]:.4f}")

Model Accuracy: 88.80%
Coefficient for Marketing Group: -0.0934
