In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

# Load the preprocessed dataset
data = pd.read_csv("C:/Users/Yash Waldia/Desktop/crime1/PAASBAAN-crime-prediction/preprocessed_data.csv")

# Convert Timestamp to datetime
data['Timestamp'] = pd.to_datetime(data['Timestamp'], format='%Y-%m-%d %H:%M:%S')

# Extract date-time features
db = pd.DataFrame({
    "year": data['Timestamp'].dt.year,
    "month": data['Timestamp'].dt.month,
    "day": data['Timestamp'].dt.day,
    "hour": data['Timestamp'].dt.hour,
    "week": data['Timestamp'].dt.isocalendar().week,  # Use isocalendar().week
    "weekday": data['Timestamp'].dt.weekday,
    "dayofyear": data['Timestamp'].dt.dayofyear,
    "quarter": data['Timestamp'].dt.quarter,
})

# Concatenate the extracted features with the original dataset
data1 = pd.concat([db, data.drop('Timestamp', axis=1)], axis=1)

# Split data into features (X) and target variables (y)
# X = data1[['year', 'month', 'day', 'hour','week', 'weekday', 'dayofyear', 'Lat', 'Long']]
X = data1[['year', 'month', 'day', 'hour', 'Lat', 'Long']]
y = data1[['Accident', 'Drug Violation', 'Harassment', 'Robbery']]

# Split data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize Random Forest Classifier
rfc = RandomForestClassifier()

# Define hyperparameters grid for tuning
param_grid = {
  'n_estimators': [50, 100],
  'max_depth': [10, 15, 20],  # Reduced options for max_depth
  'min_samples_split': [2, 5],
  'min_samples_leaf': [1, 2]
}

# Perform Grid Search CV for hyperparameter tuning
grid_search = GridSearchCV(estimator=rfc, param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Get the best hyperparameters
best_params = grid_search.best_params_
print("Best hyperparameters:", best_params)

# Initialize Random Forest Classifier with the best hyperparameters
rfc_best = RandomForestClassifier(**best_params)

# Train the model with the best hyperparameters
rfc_best.fit(X_train, y_train)

# Evaluate the model
train_accuracy = rfc_best.score(X_train, y_train)
test_accuracy = rfc_best.score(X_test, y_test)

print("Training accuracy:", train_accuracy)
print("Testing accuracy:", test_accuracy)



In [None]:
import joblib
joblib.dump(rfc_best, 'rf_model.pkl')


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

# Load the preprocessed dataset
data = pd.read_csv("C:/Users/Yash Waldia/Desktop/crime1/PAASBAAN-crime-prediction/pd.csv")

# Split data into features (X) and target variables (y)
X = data[['YEAR', 'MONTH', 'DAY', 'HOUR', 'Latitude', 'Longitude']]
y = data[['crime1', 'crime2', 'crime3', 'crime4']]

# Split data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize Random Forest Classifier
rfc = RandomForestClassifier()

# Define hyperparameters grid for tuning
param_grid = {
  'n_estimators': [50, 100],
  'max_depth': [10, 15, 20],  # Reduced options for max_depth
  'min_samples_split': [2, 5],
  'min_samples_leaf': [1, 2]
}

# Perform Grid Search CV for hyperparameter tuning
grid_search = GridSearchCV(estimator=rfc, param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Get the best hyperparameters
best_params = grid_search.best_params_
print("Best hyperparameters:", best_params)

# Initialize Random Forest Classifier with the best hyperparameters
rfc_best = RandomForestClassifier(**best_params)

# Train the model with the best hyperparameters
rfc_best.fit(X_train, y_train)

# Evaluate the model
train_accuracy = rfc_best.score(X_train, y_train)
test_accuracy = rfc_best.score(X_test, y_test)

print("Training accuracy:", train_accuracy)
print("Testing accuracy:", test_accuracy)
 

In [None]:
import joblib
joblib.dump(rfc_best, 'rf_model3.pkl')

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Read the preprocessed dataset
data = pd.read_csv("C:/Users/Yash Waldia/Desktop/crime1/PAASBAAN-crime-prediction/pd.csv")

# Separate features and target variable
X = data.drop(['crime1', 'crime2', 'crime3', 'crime4'], axis=1)  # Features
y = data[['crime1', 'crime2', 'crime3', 'crime4']]  # Target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize the KNN classifier
knn = KNeighborsClassifier(n_neighbors=5)  # You can adjust the number of neighbors as needed

# Train the KNN classifier
knn.fit(X_train_scaled, y_train)

# Predict the labels for test set
y_pred = knn.predict(X_test_scaled)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
data = pd.read_csv("C:/Users/Yash Waldia/Desktop/crime1/PAASBAAN-crime-prediction/pd.csv")

# Split the data into features (X) and target variable (y)
X = data[['YEAR', 'MONTH', 'DAY', 'HOUR', 'MINUTE','Latitude', 'Longitude']]
y = data[['crime1', 'crime2', 'crime3', 'crime4']]  # Assuming you have four types of crimes

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Random Forest Classifier
rfc = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
rfc.fit(X_train, y_train)

# Predict on the testing set
y_pred = rfc.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Now, you can use this trained model to predict crimes for new data
# For example:
new_data = pd.DataFrame({
    'YEAR': [2024],
    'MONTH': [5],
    'DAY': [30],
    'HOUR': [14],
    'MINUTE': [30],
    'Latitude': [491277.3697],
    'Longitude': [5458444.38]
})

predicted_crimes = rfc.predict(new_data)
print("Predicted crimes for the new data:", predicted_crimes)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
data = pd.read_csv("pd2.csv")

# Split the data into features (X) and target variable (y)
X = data[['YEAR', 'MONTH', 'DAY', 'HOUR', 'MINUTE', 'Latitude', 'Longitude','NEIGHBOURHOOD_ID']]
y = data[['crime1', 'crime2', 'crime3', 'crime4']]  # Assuming you have four types of crimes

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the KNN Classifier
knn = KNeighborsClassifier(n_neighbors=5)

# Train the model
knn.fit(X_train, y_train)

# Predict on the testing set
y_pred = knn.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Now, you can use this trained model to predict crimes for new data
# For example:
new_data = pd.DataFrame({
    'YEAR': [2025],
    'MONTH': [5],
    'DAY': [3],
    'HOUR': [14],
    'MINUTE': [30],
    'Latitude': [49.26980201],
    'Longitude': [-123.0837633],
    'NEIGHBOURHOOD_ID': [8]
})

predicted_crimes = knn.predict(new_data)
print("Predicted crimes for the new data:", predicted_crimes)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
data = pd.read_csv("pd2.csv")

# Split the data into features (X) and target variable (y)
X = data[['YEAR', 'MONTH', 'DAY', 'HOUR', 'MINUTE', 'Latitude', 'Longitude','NEIGHBOURHOOD_ID']]
y = data[['crime1', 'crime2', 'crime3', 'crime4']]  # Assuming you have four types of crimes


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Decision Tree Classifier
dt = DecisionTreeClassifier(random_state=42)

# Train the model
dt.fit(X_train, y_train)

# Predict on the testing set
y_pred = dt.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Now, you can use this trained model to predict crimes for new data
# For example:
new_data = pd.DataFrame({
    'YEAR': [2026],
    'MONTH': [4],
    'DAY': [30],
    'HOUR': [14],
    'MINUTE': [30],
    'Latitude': [49.26980201],
    'Longitude': [-123.0837633],
    'NEIGHBOURHOOD_ID': [16]
})

predicted_crimes = dt.predict(new_data)
print("Predicted crimes for the new data:", predicted_crimes)


In [None]:
import pandas as pd
import numpy as np

# Load your dataset into a DataFrame
# Assuming your dataset is stored in a CSV file named 'pd.csv'
df = pd.read_csv('pd.csv')

# Calculate the total number of entries for each crime type
crime_counts = df.iloc[:, 7:].sum()

# Determine the smallest count among the crime types
min_count = crime_counts.min()

# Randomly sample entries from each crime type to match the count of the smallest crime type
sampled_data = pd.concat([df[df[f'crime{i+1}'] == 1].sample(min_count, replace=True) for i in range(4)])

# Shuffle the sampled data
sampled_data = sampled_data.sample(frac=1).reset_index(drop=True)

# Reduce the dataset to about 10 thousand entries
final_data = sampled_data.sample(n=min(10000, len(sampled_data)))

# Save or use the 'final_data' DataFrame as your reduced dataset
final_data.to_csv('reduced_crime_data.csv', index=False)


In [3]:

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense

# Load the dataset
data = pd.read_csv("reduced_crime_data.csv")

# Split the data into features (X) and target variable (y)
X = data[['YEAR', 'MONTH', 'DAY', 'HOUR', 'MINUTE', 'Latitude', 'Longitude']]
y = data[['crime1', 'crime2', 'crime3', 'crime4']]  # Assuming you have four types of crimes

# Normalize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape the data for LSTM input [samples, time steps, features]
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

# Define the RNN model
model = Sequential()
model.add(LSTM(units=50, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(4, activation='softmax'))  # Assuming four types of crimes

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model
_, accuracy = model.evaluate(X_test, y_test)
print('Accuracy: %.2f%%' % (accuracy * 100)) 

Epoch 1/50


  super().__init__(**kwargs)


[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.3526 - loss: 1.3390 - val_accuracy: 0.4145 - val_loss: 1.2714
Epoch 2/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4194 - loss: 1.2684 - val_accuracy: 0.4225 - val_loss: 1.2563
Epoch 3/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.4168 - loss: 1.2614 - val_accuracy: 0.4355 - val_loss: 1.2394
Epoch 4/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4378 - loss: 1.2326 - val_accuracy: 0.4355 - val_loss: 1.2261
Epoch 5/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4320 - loss: 1.2238 - val_accuracy: 0.4390 - val_loss: 1.2185
Epoch 6/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4380 - loss: 1.2208 - val_accuracy: 0.4410 - val_loss: 1.2145
Epoch 7/50
[1m250/250[0m [32m━━━━━━━

In [4]:
# Now, you can use this trained model to predict crimes for new data
# For example:
new_data = np.array([[2026, 8, 13, 4, 20, 49.16980201, -123.0837633]])
new_data = scaler.transform(new_data)
new_data = np.reshape(new_data, (1, 1, new_data.shape[1]))

predicted_crimes = model.predict(new_data)
print("Predicted probabilities of crimes for the new data:", predicted_crimes)



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 173ms/step
Predicted probabilities of crimes for the new data: [[0.2177617  0.18224554 0.5397655  0.06022722]]


In [None]:
# Save the model to a file in the native Keras format
model.save("crime_prediction_lstm_model.keras")


In [None]:
import pandas as pd

# Read the dataset
data = pd.read_csv("reduced_crime_data.csv")

# Sum up the occurrences of each crime type
total_crime1 = data['crime1'].sum()
total_crime2 = data['crime2'].sum()
total_crime3 = data['crime3'].sum()
total_crime4 = data['crime4'].sum()

# Print the total occurrences of each crime type
print("Total occurrences of Crime 1:", total_crime1)
print("Total occurrences of Crime 2:", total_crime2)
print("Total occurrences of Crime 3:", total_crime3)
print("Total occurrences of Crime 4:", total_crime4)


In [None]:
# Now, you can use this trained model to predict crimes for new data
# For example:
new_data = np.array([[2026, 8, 13, 4, 20, 49.16980201, -123.0837633]])
new_data = scaler.transform(new_data)
new_data = np.reshape(new_data, (1, 1, new_data.shape[1]))

predicted_crimes = model.predict(new_data)
print("Predicted probabilities of crimes for the new data:", predicted_crimes)

In [None]:
import keras
import tensorflow as tf
print("Keras version:", keras.__version__)
print("TensorFlow version:", tf.__version__)
