In [203]:
from sklearn.datasets import load_digits

(X, y) = load_digits(return_X_y = True, as_frame=True)


In [204]:
import pandas as pd
import numpy as np

np.random.seed(0)

n_samples = 20  # per cluster

# Cluster 1: Young people targeting schools
cluster1 = pd.DataFrame({
    'AgeGroup': np.random.choice(['Young'], n_samples),
    'Gender': np.random.choice(['Male','Female'], n_samples),
    'Ethnicity': np.random.choice(['White'], n_samples),
    'Solo': np.random.choice(['Yes'], n_samples),
    'Motive': np.random.choice(['School'], n_samples),
    'Weapon': np.random.choice(['Firearm','Knife','Other'], n_samples),
    'Fatalities': np.random.choice(['None','Few','Several','Many'], n_samples),
    'Wealth': np.random.choice(['Low','Medium','High'], n_samples),
    'CrimeRate': np.random.choice(['Low','Medium','High'], n_samples),
    'Military': np.random.choice(['No'], n_samples),
    'LawEnforcement': np.random.choice(['No'], n_samples),
    'Religion': np.random.choice(['Undefined'], n_samples)
})

# Cluster 2: Older men in non-school public areas
cluster2 = pd.DataFrame({
    'AgeGroup': np.random.choice(['Middle','Old'], n_samples),
    'Gender': np.random.choice(['Male'], n_samples),
    'Ethnicity': np.random.choice(['White'], n_samples),
    'Solo': np.random.choice(['Yes'], n_samples),
    'Motive': np.random.choice(['Personal'], n_samples),
    'Weapon': np.random.choice(['Firearm','Knife','Other'], n_samples),
    'Fatalities': np.random.choice(['Few','Several','Many'], n_samples),
    'Wealth': np.random.choice(['Medium','High'], n_samples),
    'CrimeRate': np.random.choice(['Medium','High'], n_samples),
    'Military': np.random.choice(['Yes','No'], n_samples),
    'LawEnforcement': np.random.choice(['No'], n_samples),
    'Religion': np.random.choice(['Undefined'], n_samples)
})

# Cluster 3: Religious-motivated, mixed ethnicities
cluster3 = pd.DataFrame({
    'AgeGroup': np.random.choice(['Young','Middle'], n_samples),
    'Gender': np.random.choice(['Male'], n_samples),
    'Ethnicity': np.random.choice(['Black','Asian','Hispanic'], n_samples),
    'Solo': np.random.choice(['Yes'], n_samples),
    'Motive': np.random.choice(['Religious'], n_samples),
    'Weapon': np.random.choice(['Firearm','Knife','Other'], n_samples),
    'Fatalities': np.random.choice(['Few','Several'], n_samples),
    'Wealth': np.random.choice(['Low','Medium'], n_samples),
    'CrimeRate': np.random.choice(['Low','Medium','High'], n_samples),
    'Military': np.random.choice(['No'], n_samples),
    'LawEnforcement': np.random.choice(['No'], n_samples),
    'Religion': np.random.choice(['SomeReligion'], n_samples)
})

# Combine clusters
df = pd.concat([cluster1, cluster2, cluster3], ignore_index=True)


In [205]:
from pandas import get_dummies

dataset = get_dummies(df)

In [206]:
from pandas import read_csv


# dataset = read_csv('/workspaces/Main-DSDT-Example-Repo/synthetic_shooters_archetypes.csv')

X = dataset

In [207]:
from sklearn.cluster import KMeans




In [208]:
from sklearn.metrics import accuracy_score

In [209]:
from sklearn.metrics import silhouette_score

In [210]:
best_values = {}
best_values['score'] = 0

for k in range(2, 10):
    model = KMeans(n_clusters = k)
    model.fit(X)
    my_models_labels = model.labels_
    my_new_score = silhouette_score(X, my_models_labels)

    if my_new_score > best_values['score']:
        best_values['score'] = my_new_score
        best_values['k'] = k

    
    

In [211]:
best_k = best_values['k']

model = KMeans(n_clusters = best_k)
model.fit(X)

0,1,2
,n_clusters,3
,init,'k-means++'
,n_init,'auto'
,max_iter,300
,tol,0.0001
,verbose,0
,random_state,
,copy_x,True
,algorithm,'lloyd'


In [212]:
cluster_data = model.cluster_centers_

In [213]:
from pandas import DataFrame

In [214]:
my_original_column_names = X.columns

DataFrame(cluster_data,
          columns = my_original_column_names
          )

Unnamed: 0,AgeGroup_Middle,AgeGroup_Old,AgeGroup_Young,Gender_Female,Gender_Male,Ethnicity_Asian,Ethnicity_Black,Ethnicity_Hispanic,Ethnicity_White,Solo_Yes,...,Wealth_Low,Wealth_Medium,CrimeRate_High,CrimeRate_Low,CrimeRate_Medium,Military_No,Military_Yes,LawEnforcement_No,Religion_SomeReligion,Religion_Undefined
0,0.55,2.775558e-17,0.45,8.326673e-17,1.0,0.3,0.3,0.4,1.110223e-16,1.0,...,0.5,0.5,0.35,0.15,0.5,1.0,-1.387779e-17,1.0,1.0,1.110223e-16
1,1.110223e-16,2.775558e-17,1.0,0.55,0.45,-2.775558e-17,-2.775558e-17,0.0,1.0,1.0,...,0.25,0.4,0.2,0.55,0.25,1.0,-1.387779e-17,1.0,5.5511150000000004e-17,1.0
2,0.5,0.5,1.110223e-16,8.326673e-17,1.0,-2.775558e-17,-2.775558e-17,0.0,1.0,1.0,...,0.0,0.65,0.55,-2.775558e-17,0.45,0.65,0.35,1.0,5.5511150000000004e-17,1.0


In [215]:
dataset.to_csv("synthetic_shooters_official.csv", index = False)