In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from joblib import dump  # Import joblib for saving the model
import os  # Import for directory creation


# Load the dataset
data = pd.read_csv("C:/Users/Yash Waldia/Desktop/crime1/PAASBAAN-crime-prediction/pd2.csv")

# Select features and target variables
X = data[['YEAR', 'MONTH', 'DAY', 'HOUR','MINUTE', 'Latitude', 'Longitude']]

# Define the list of crime types
crime_types = ['crime1', 'crime2', 'crime3', 'crime4'] # Replace with your crime columns

# Create an empty dictionary to store trained models
models = {}

# Train separate Logistic Regression models for each crime type
for crime_type in crime_types:
  y = data[crime_type]
  
  # Split data into training and testing sets
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

  # Normalize features
  scaler = StandardScaler()
  X_train_scaled = scaler.fit_transform(X_train)
  X_test_scaled = scaler.transform(X_test)

  # Initialize and train the Logistic Regression model
  model = LogisticRegression()
  model.fit(X_train_scaled, y_train)

  # Predict on the test set
  y_pred = model.predict(X_test_scaled)

  # Evaluate the model
  accuracy = accuracy_score(y_test, y_pred)
  print(f"Accuracy for {crime_type}: {accuracy}")

  # Generate a classification report
  report = classification_report(y_test, y_pred, zero_division='warn')
  print(f"Classification Report for {crime_type}:\n{report}")

  # Save the trained model to the dictionary
  models[crime_type] = model

# Save the models to separate files (optional)
model_dir = 'C:/Users/Yash Waldia/Desktop/crime1/PAASBAAN-crime-prediction/model/'
os.makedirs(model_dir, exist_ok=True)  # Create directory if it doesn't exist

for crime_type, model in models.items():
  dump(model, f"{model_dir}{crime_type}_model.pkl")


Accuracy for crime1: 0.7731453781935506


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Classification Report for crime1:
              precision    recall  f1-score   support

       False       0.77      1.00      0.87    138267
        True       0.00      0.00      0.00     40570

    accuracy                           0.77    178837
   macro avg       0.39      0.50      0.44    178837
weighted avg       0.60      0.77      0.67    178837

Accuracy for crime2: 0.724548052136862
Classification Report for crime2:
              precision    recall  f1-score   support

       False       0.72      1.00      0.84    129578
        True       0.00      0.00      0.00     49259

    accuracy                           0.72    178837
   macro avg       0.36      0.50      0.42    178837
weighted avg       0.52      0.72      0.61    178837

Accuracy for crime3: 0.7486929438539005
Classification Report for crime3:
              precision    recall  f1-score   support

       False       0.75      1.00      0.86    133914
        True       0.38      0.00      0.00     44923

 

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam

# Load the preprocessed dataset
data = pd.read_csv("C:/Users/Yash Waldia/Desktop/crime1/PAASBAAN-crime-prediction/pd2.csv")

# Select relevant features and target variables
X = data[['YEAR', 'MONTH', 'DAY', 'HOUR', 'MINUTE', 'Latitude', 'Longitude']]
y = data[['crime1', 'crime2', 'crime3', 'crime4']]

# Split data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Reshape data for LSTM input (samples, timesteps, features)
X_train_reshaped = X_train_scaled.reshape(X_train_scaled.shape[0], 1, X_train_scaled.shape[1])
X_test_reshaped = X_test_scaled.reshape(X_test_scaled.shape[0], 1, X_test_scaled.shape[1])

# Define the LSTM model
model = Sequential()
model.add(LSTM(units=100, return_sequences=True, input_shape=(X_train_reshaped.shape[1], X_train_reshaped.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(units=100))
model.add(Dropout(0.2))
model.add(Dense(units=4, activation='sigmoid'))  # Assuming 4 output classes for crimes

# Compile the model
optimizer = Adam(learning_rate=0.001)  # Adjust learning rate here
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping callback
early_stopping = EarlyStopping(patience=3, monitor='val_loss', restore_best_weights=True)

# Train the model with different batch sizes
batch_sizes = [16, 32, 64]
for batch_size in batch_sizes:
    print(f"Training with batch size: {batch_size}")
    history = model.fit(X_train_reshaped, y_train, epochs=20, batch_size=batch_size, validation_data=(X_test_reshaped, y_test), callbacks=[early_stopping])

    # Evaluate the model
    train_loss, train_accuracy = model.evaluate(X_train_reshaped, y_train)
    test_loss, test_accuracy = model.evaluate(X_test_reshaped, y_test)

    print("Training loss:", train_loss)
    print("Training accuracy:", train_accuracy)
    print("Testing loss:", test_loss)
    print("Testing accuracy:", test_accuracy)


  super().__init__(**kwargs)


Training with batch size: 16
Epoch 1/20
[1m44710/44710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m205s[0m 4ms/step - accuracy: 0.4384 - loss: 0.4926 - val_accuracy: 0.4624 - val_loss: 0.4823
Epoch 2/20
[1m44710/44710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m210s[0m 5ms/step - accuracy: 0.4628 - loss: 0.4823 - val_accuracy: 0.4733 - val_loss: 0.4779
Epoch 3/20
[1m44710/44710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m228s[0m 5ms/step - accuracy: 0.4699 - loss: 0.4791 - val_accuracy: 0.4739 - val_loss: 0.4764
Epoch 4/20
[1m44710/44710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m222s[0m 5ms/step - accuracy: 0.4724 - loss: 0.4779 - val_accuracy: 0.4755 - val_loss: 0.4758
Epoch 5/20
[1m44706/44710[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.4731 - loss: 0.4770