In [5]:
import pandas as pd
import numpy as np

In [6]:
np.random.seed(42)
n = 5000

# Demographic Attributes

In [7]:
age = np.random.randint(18, 75, size=n)
household_income = np.random.randint(25000, 250000, size=n)
household_size = np.random.randint(1, 7, size=n)

# Behavioral Attributes

In [8]:
tenure_months = np.random.randint(1, 61, size=n)
monthly_charges = np.random.uniform(10, 150, size=n).round(2)
total_spent = (tenure_months * monthly_charges).round(2)
num_logins = np.random.randint(0, 101, size=n)
num_support_tickets = np.random.randint(0, 11, size=n)
auto_pay = np.random.choice([0, 1], size=n, p=[0.4, 0.6])
plan_type = np.random.choice(['Basic', 'Premium', 'Enterprise'], size=n, p=[0.6, 0.3, 0.1])
contract_type = np.random.choice(['Monthly', 'Annual'], size=n, p=[0.7, 0.3])

# Churn - the target variable

In [9]:
base_churn_prob = 0.25

churn_prob = (
    base_churn_prob
    - 0.0005 * tenure_months
    - 0.000002 * household_income
    + 0.01 * (num_support_tickets > 5).astype(int)
    + 0.02 * (plan_type == 'Basic').astype(int)
    + 0.015 * (contract_type == 'Monthly').astype(int)
)

churn_prob = np.clip(churn_prob, 0.05, 0.7)

churn = np.random.binomial(1, churn_prob)

# Dataframe

In [10]:
data = pd.DataFrame({
    'age': age,
    'household_income': household_income,
    'household_size': household_size,
    'tenure_months': tenure_months,
    'monthly_charges': monthly_charges,
    'total_spent': total_spent,
    'num_logins': num_logins,
    'num_support_tickets': num_support_tickets,
    'auto_pay': auto_pay,
    'plan_type': plan_type,
    'contract_type': contract_type,
    'churn': churn
})

# View Data

In [16]:
print(data.head())
print("\nClass distribution:")
print(data['churn'].value_counts(normalize=True))

data.to_csv("../Data/synthetic_churn_data.csv", index=False)
print("\nSaved to synthetic_churn_data.csv")

   age  household_income  household_size  tenure_months  monthly_charges  \
0   56            102960               1             24            67.47   
1   69            109388               4              7            36.95   
2   46             47601               3             34           114.32   
3   32            174092               1             48            27.79   
4   60            164227               6             12            24.54   

   total_spent  num_logins  num_support_tickets  auto_pay plan_type  \
0      1619.28           4                   10         1     Basic   
1       258.65          53                    3         0     Basic   
2      3886.88          37                    1         1   Premium   
3      1333.92          61                    4         0     Basic   
4       294.48          24                    8         1     Basic   

  contract_type  churn  
0       Monthly      0  
1       Monthly      0  
2       Monthly      0  
3       Monthly 