In [2]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Load and preprocess data
df = pd.read_csv("/content/preprocessed_crime_data.csv")
df['DATE OCC'] = pd.to_datetime(df['DATE OCC'])
df['Year'] = df['DATE OCC'].dt.year
df['Month'] = df['DATE OCC'].dt.month

# Aggregate monthly crime count per zone
monthly_zone_crime = df.groupby(['Year', 'Month', 'Rpt Dist No'])['Crime Count'].sum().reset_index()

# Features and target
feature_cols = ['Year', 'Month', 'Rpt Dist No']
X = monthly_zone_crime[feature_cols]
y = monthly_zone_crime['Crime Count']

# Split train/test
train_mask = X['Year'] < 2023
X_train, y_train = X[train_mask], y[train_mask]
X_test, y_test = X[~train_mask], y[~train_mask]

# List of 10 different parameter sets
param_list = [
    {"n_estimators": 100, "max_depth": 5},
    {"n_estimators": 200, "max_depth": 6},
    {"n_estimators": 150, "max_depth": 7},
    {"n_estimators": 100, "max_depth": 8},
    {"n_estimators": 120, "max_depth": 4},
    {"n_estimators": 180, "max_depth": 9},
    {"n_estimators": 160, "max_depth": 6},
    {"n_estimators": 130, "max_depth": 5},
    {"n_estimators": 110, "max_depth": 7},
    {"n_estimators": 140, "max_depth": 6},
]

# Run each model and collect results
results = []
for i, params in enumerate(param_list, 1):
    model = RandomForestRegressor(n_estimators=params["n_estimators"], max_depth=params["max_depth"], random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Aggregate predictions
    X_test_copy = X_test.copy()
    X_test_copy['Predicted Crime Count'] = y_pred
    zone_preds = X_test_copy.groupby('Rpt Dist No')['Predicted Crime Count'].sum().reset_index()
    most_dangerous = zone_preds.sort_values(by='Predicted Crime Count', ascending=False).iloc[0]

    mse = mean_squared_error(y_test, y_pred)

    results.append({
        "Model": i,
        "n_estimators": params["n_estimators"],
        "max_depth": params["max_depth"],

        "Predicted Crime Count": most_dangerous['Predicted Crime Count'],
        "MSE": mse
    })

# Display results
results_df = pd.DataFrame(results)
print(results_df)


   Model  n_estimators  max_depth  Predicted Crime Count       MSE
0      1           100          5              59.632850  1.073425
1      2           200          6             165.316636  0.997853
2      3           150          7             168.314998  0.933125
3      4           100          8             172.747681  0.880622
4      5           120          4              31.732848  1.105457
5      6           180          9             173.510545  0.822034
6      7           160          6             167.719462  1.000390
7      8           130          5              61.014182  1.071239
8      9           110          7             171.293597  0.933229
9     10           140          6             167.534819  0.999555
