Imports

In [1]:
import pandas as pd
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score

Loading the Data

In [2]:
file_path = 'preprocessed_crime_data.csv'
df = pd.read_csv(file_path)

Feature Engineering

In [3]:
df['DATE OCC'] = pd.to_datetime(df['DATE OCC'])
df['hour'] = df['TIME OCC'].astype(int) // 100
df['dayofweek'] = df['DATE OCC'].dt.dayofweek
df['month'] = df['DATE OCC'].dt.month
df['year'] = df['DATE OCC'].dt.year
df['is_weekend'] = df['dayofweek'].isin([5, 6]).astype(int)

Dropping Unused Columns

In [4]:
df = df.drop(columns=['Crime Count', 'Is Violent', 'DATE OCC'])

Ttain-Test Split by Year

In [5]:
train_df = df[df['year'].isin([2020, 2021, 2022])]
test_df = df[df['year'].isin([2023, 2024])]

X_train = train_df.drop(columns=['Target', 'year'])
y_train = train_df['Target']
X_test = test_df.drop(columns=['Target', 'year'])
y_test = test_df['Target']

Defining 10 Parameter Sets for AdaBoost

In [6]:
param_sets = [
    {'n_estimators': 50, 'learning_rate': 1.0, 'max_depth': 1},
    {'n_estimators': 100, 'learning_rate': 0.5, 'max_depth': 2},
    {'n_estimators': 150, 'learning_rate': 0.1, 'max_depth': 3},
    {'n_estimators': 200, 'learning_rate': 0.05, 'max_depth': 4},
    {'n_estimators': 75, 'learning_rate': 0.3, 'max_depth': 2},
    {'n_estimators': 120, 'learning_rate': 0.2, 'max_depth': 3},
    {'n_estimators': 80, 'learning_rate': 0.8, 'max_depth': 1},
    {'n_estimators': 180, 'learning_rate': 0.15, 'max_depth': 4},
    {'n_estimators': 60, 'learning_rate': 0.4, 'max_depth': 2},
    {'n_estimators': 140, 'learning_rate': 0.25, 'max_depth': 3}
]

Training and Evaluating Each Model

In [7]:
for i, params in enumerate(param_sets, 1):
    print(f"\n=== Model {i} ===")
    print(f"Parameters: {params}")

    estimator = DecisionTreeClassifier(max_depth=params['max_depth'], random_state=42)
    model = AdaBoostClassifier(
        estimator=estimator,
        n_estimators=params['n_estimators'],
        learning_rate=params['learning_rate'],
        random_state=42
    )

    model.fit(X_train, y_train)

    test_preds = model.predict(X_test)
    test_acc = accuracy_score(y_test, test_preds)
    print(f"Test Accuracy: {test_acc:.6f}")
    print("Test Classification Report:")
    print(classification_report(y_test, test_preds, digits=6, zero_division=0))


=== Model 1 ===
Parameters: {'n_estimators': 50, 'learning_rate': 1.0, 'max_depth': 1}
Test Accuracy: 0.842979
Test Classification Report:
              precision    recall  f1-score   support

           0   0.842979  1.000000  0.914800     21995
           1   0.000000  0.000000  0.000000      4097

    accuracy                       0.842979     26092
   macro avg   0.421489  0.500000  0.457400     26092
weighted avg   0.710613  0.842979  0.771157     26092


=== Model 2 ===
Parameters: {'n_estimators': 100, 'learning_rate': 0.5, 'max_depth': 2}
Test Accuracy: 0.847999
Test Classification Report:
              precision    recall  f1-score   support

           0   0.861346  0.976949  0.915513     21995
           1   0.557205  0.155724  0.243419      4097

    accuracy                       0.847999     26092
   macro avg   0.709276  0.566337  0.579466     26092
weighted avg   0.813589  0.847999  0.809980     26092


=== Model 3 ===
Parameters: {'n_estimators': 150, 'learning_rate