Imports

In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split

Loading Preprocessed Data

In [2]:
file_path = 'preprocessed_crime_data.csv'
df = pd.read_csv(file_path)

Dropping Columns

In [3]:
df = df.drop(columns=['Crime Count', 'Is Violent', 'DATE OCC'])

Ensuring All Features Are Numeric

In [4]:
numeric_df = df.select_dtypes(include=[np.number]).copy()
X = numeric_df.drop(columns=['Target']).values.astype('float32')
y = numeric_df['Target'].values

Train-Test Split

In [5]:
if 'year' in numeric_df.columns:
    train_mask = numeric_df['year'].isin([2020, 2021, 2022])
    test_mask = numeric_df['year'].isin([2023, 2024])
    X_train, y_train = X[train_mask], y[train_mask]
    X_test, y_test = X[test_mask], y[test_mask]
else:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Reshaping for LSTM (samples, 1, features) for Tabular Data

In [6]:
X_train = X_train.reshape(-1, 1, X_train.shape[1])
X_test = X_test.reshape(-1, 1, X_test.shape[1])

Converting to PyTorch Tensors

In [7]:
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.LongTensor(y_train)
X_test_tensor = torch.FloatTensor(X_test)
y_test_tensor = torch.LongTensor(y_test)

Defining 10 Parameter Combinations

In [8]:
param_combinations = [
    {'hidden_size': 64, 'num_layers': 2, 'lr': 0.001, 'batch_size': 64, 'dropout': 0.3},
    {'hidden_size': 128, 'num_layers': 1, 'lr': 0.0005, 'batch_size': 32, 'dropout': 0.2},
    {'hidden_size': 256, 'num_layers': 2, 'lr': 0.0001, 'batch_size': 128, 'dropout': 0.4},
    {'hidden_size': 64, 'num_layers': 3, 'lr': 0.002, 'batch_size': 64, 'dropout': 0.3, 'weight_decay': 0.001},
    {'hidden_size': 32, 'num_layers': 1, 'lr': 0.005, 'batch_size': 16, 'dropout': 0.1},
    {'hidden_size': 128, 'num_layers': 2, 'lr': 0.0002, 'batch_size': 256, 'dropout': 0.5},
    {'hidden_size': 512, 'num_layers': 1, 'lr': 0.0001, 'batch_size': 64, 'dropout': 0.3},
    {'hidden_size': 64, 'num_layers': 2, 'lr': 0.001, 'batch_size': 32, 'dropout': 0.2, 'weight_decay': 0.0001},
    {'hidden_size': 256, 'num_layers': 3, 'lr': 0.0005, 'batch_size': 128, 'dropout': 0.4},
    {'hidden_size': 128, 'num_layers': 1, 'lr': 0.002, 'batch_size': 64, 'dropout': 0.3}
]

LSTM Model

In [9]:
class CrimeLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True,
                           dropout=dropout if num_layers > 1 else 0, bidirectional=True)
        self.bn = nn.BatchNorm1d(hidden_size * 2)
        self.fc = nn.Linear(hidden_size * 2, num_classes)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]
        out = self.bn(out)
        out = self.dropout(out)
        out = self.fc(out)
        return out

Training and Evaluation Function

In [10]:
def train_and_evaluate(params, X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Initializing model
    model = CrimeLSTM(
        input_size=X_train_tensor.shape[2],
        hidden_size=params['hidden_size'],
        num_layers=params.get('num_layers', 1),
        num_classes=2,
        dropout=params.get('dropout', 0)
    ).to(device)

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(
        model.parameters(),
        lr=params['lr'],
        weight_decay=params.get('weight_decay', 0)
    )

    # DataLoader
    train_data = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_data, batch_size=params['batch_size'], shuffle=True)

    # Training loop
    max_epochs = 20
    for epoch in range(max_epochs):
        model.train()
        epoch_loss = 0
        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            epoch_loss += loss.item()

        avg_train_loss = epoch_loss / len(train_loader)
        print(f"Epoch {epoch+1}, Train Loss: {avg_train_loss:.4f}")

    # Evaluation on test set
    model.eval()
    with torch.no_grad():
        test_outputs = model(X_test_tensor.to(device))
        _, predicted = torch.max(test_outputs, 1)
        predicted = predicted.cpu().numpy()

    print("\nTest Results:")
    print(classification_report(y_test, predicted, digits=6, zero_division=0))
    accuracy = accuracy_score(y_test, predicted)
    print(f"Accuracy: {accuracy:.6f}")
    return accuracy

Running for Each Parameter Combination

In [11]:
best_accuracy = 0
best_params = None
for i, params in enumerate(param_combinations, 1):
    print(f"\n=== Testing Parameter Combination {i} ===")
    print(f"Parameters: {params}")
    accuracy = train_and_evaluate(params, X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor)
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_params = params
    print("="*80)

print(f"\nBest Parameters: {best_params}")
print(f"Best Accuracy: {best_accuracy:.6f}")


=== Testing Parameter Combination 1 ===
Parameters: {'hidden_size': 64, 'num_layers': 2, 'lr': 0.001, 'batch_size': 64, 'dropout': 0.3}
Epoch 1, Train Loss: 0.5149
Epoch 2, Train Loss: 0.4795
Epoch 3, Train Loss: 0.4712
Epoch 4, Train Loss: 0.4631
Epoch 5, Train Loss: 0.4594
Epoch 6, Train Loss: 0.4521
Epoch 7, Train Loss: 0.4466
Epoch 8, Train Loss: 0.4392
Epoch 9, Train Loss: 0.4372
Epoch 10, Train Loss: 0.4324
Epoch 11, Train Loss: 0.4258
Epoch 12, Train Loss: 0.4220
Epoch 13, Train Loss: 0.4202
Epoch 14, Train Loss: 0.4176
Epoch 15, Train Loss: 0.4149
Epoch 16, Train Loss: 0.4146
Epoch 17, Train Loss: 0.4144
Epoch 18, Train Loss: 0.4139
Epoch 19, Train Loss: 0.4115
Epoch 20, Train Loss: 0.4112

Test Results:
              precision    recall  f1-score   support

           0   0.828470  0.990652  0.902331     10804
           1   0.677316  0.087315  0.154688      2428

    accuracy                       0.824894     13232
   macro avg   0.752893  0.538983  0.528510     13232
weigh