In [11]:
import pandas as pd
import numpy as np 
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression, Lasso, Ridge, ElasticNet
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from sklearn.model_selection import train_test_split

In [13]:
# setup and feature engineering
df = pd.read_csv('../../data/processed/neighborhood_crime_counts.csv')

# set neighborhood as index so removed from feature columns
df.set_index("neighborhood", inplace=True)

# select feature columns and scale
feature_cols = ['program_count', 'Total Population All', 'Percent White', 'Median Household Income', 'Per Capita Income', 'program_count_per1000', 'crime_per1000' ]

# scale
scaler = StandardScaler()
scaled_features = scaler.fit_transform(df[feature_cols])

# scaled dataframe of features
X = pd.DataFrame(scaled_features, 
                         index=df.index, 
                         columns=feature_cols)

# target column - unscaled
target_col = 'avgOverallEquityScore'
y = df[target_col].round().astype(int).values

# train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



## Basic Logistic Regression without Regularization

In [18]:
log_reg = LogisticRegression(max_iter=1000, penalty=None)

log_reg.fit(X_train, y_train)

# 3. Predict on the test set
y_pred = log_reg.predict(X_test)

# 4. Evaluate performance
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred, zero_division=0))

Confusion Matrix:
[[8 1 0 0]
 [1 0 1 0]
 [0 3 1 0]
 [0 0 1 0]]

Classification Report:
              precision    recall  f1-score   support

           2       0.89      0.89      0.89         9
           3       0.00      0.00      0.00         2
           4       0.33      0.25      0.29         4
           5       0.00      0.00      0.00         1

    accuracy                           0.56        16
   macro avg       0.31      0.28      0.29        16
weighted avg       0.58      0.56      0.57        16



## Logistic Regression with L1 Regularization

In [21]:
lasso = LogisticRegression(max_iter=1000, penalty='l1', C=0.5, solver='saga')

lasso.fit(X_train, y_train)

# 3. Predict on the test set
y_pred = lasso.predict(X_test)

# 4. Evaluate performance
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred, zero_division=0))

Confusion Matrix:
[[9 0 0 0]
 [0 1 1 0]
 [0 3 1 0]
 [0 0 1 0]]

Classification Report:
              precision    recall  f1-score   support

           2       1.00      1.00      1.00         9
           3       0.25      0.50      0.33         2
           4       0.33      0.25      0.29         4
           5       0.00      0.00      0.00         1

    accuracy                           0.69        16
   macro avg       0.40      0.44      0.40        16
weighted avg       0.68      0.69      0.68        16



## Logistic Regression with L2 Regularization

In [22]:
ridge = LogisticRegression(max_iter=1000, penalty='l2', C=0.5)

ridge.fit(X_train, y_train)

# 3. Predict on the test set
y_pred = ridge.predict(X_test)

# 4. Evaluate performance
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred, zero_division=0))

Confusion Matrix:
[[9 0 0 0]
 [1 0 1 0]
 [0 3 1 0]
 [0 0 1 0]]

Classification Report:
              precision    recall  f1-score   support

           2       0.90      1.00      0.95         9
           3       0.00      0.00      0.00         2
           4       0.33      0.25      0.29         4
           5       0.00      0.00      0.00         1

    accuracy                           0.62        16
   macro avg       0.31      0.31      0.31        16
weighted avg       0.59      0.62      0.60        16



## Logistic Regression with Elasticnet Regularization

In [23]:
elastic = LogisticRegression(max_iter=1000, penalty='elasticnet', l1_ratio=0.5, solver='saga')

elastic.fit(X_train, y_train)

# 3. Predict on the test set
y_pred = elastic.predict(X_test)

# 4. Evaluate performance
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred, zero_division=0))

Confusion Matrix:
[[9 0 0 0]
 [1 0 1 0]
 [0 3 1 0]
 [0 0 1 0]]

Classification Report:
              precision    recall  f1-score   support

           2       0.90      1.00      0.95         9
           3       0.00      0.00      0.00         2
           4       0.33      0.25      0.29         4
           5       0.00      0.00      0.00         1

    accuracy                           0.62        16
   macro avg       0.31      0.31      0.31        16
weighted avg       0.59      0.62      0.60        16

