In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

# Load the preprocessed dataset
data = pd.read_csv("C:/Users/Yash Waldia/Desktop/crime1/PAASBAAN-crime-prediction/reduced_crime_data.csv")

# Split data into features (X) and target variables (y)
X = data[['YEAR', 'MONTH', 'DAY', 'HOUR','MINUTE', 'Latitude', 'Longitude']]
y = data[['crime1', 'crime2', 'crime3', 'crime4']]

# Split data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize Random Forest Classifier
rfc = RandomForestClassifier()

# Define hyperparameters grid for tuning
param_grid = {
  'n_estimators': [50, 100],
  'max_depth': [10, 15, 20],  # Reduced options for max_depth
  'min_samples_split': [2, 5],
  'min_samples_leaf': [1, 2]
}

# Perform Grid Search CV for hyperparameter tuning
grid_search = GridSearchCV(estimator=rfc, param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Get the best hyperparameters
best_params = grid_search.best_params_
print("Best hyperparameters:", best_params)

# Initialize Random Forest Classifier with the best hyperparameters
rfc_best = RandomForestClassifier(**best_params)

# Train the model with the best hyperparameters
rfc_best.fit(X_train, y_train)

# Evaluate the model
train_accuracy = rfc_best.score(X_train, y_train)
test_accuracy = rfc_best.score(X_test, y_test)

print("Training accuracy:", train_accuracy)
print("Testing accuracy:", test_accuracy)

new_data = pd.DataFrame({
    'YEAR': [2026],
    'MONTH': [8],
    'DAY': [13],
    'HOUR': [4],
    'MINUTE': [20],
    'Latitude': [49.16980201],
    'Longitude': [-123.0837633]
})

# Make predictions on the new data
new_predictions = rfc_best.predict(new_data)

# Print the predictions
print("Predictions for new data:")
print(new_predictions)

Best hyperparameters: {'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}
Training accuracy: 0.997875
Testing accuracy: 0.3125
Predictions for new data:
[[0 0 0 0]]


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from keras.callbacks import EarlyStopping

# Read the preprocessed data
df = pd.read_csv('pd.csv')

# Define features and target
X = df.drop(['crime1', 'crime2', 'crime3', 'crime4'], axis=1)  # Features
y = df[['crime1', 'crime2', 'crime3', 'crime4']]  # Target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Reshape the data for LSTM
X_train_reshaped = np.reshape(X_train_scaled, (X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
X_test_reshaped = np.reshape(X_test_scaled, (X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))

# Build the LSTM model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train_reshaped.shape[1], X_train_reshaped.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=4, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Define early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model
history = model.fit(X_train_reshaped, y_train, epochs=50, batch_size=32, validation_split=0.2, callbacks=[early_stopping])

# Evaluate the model
loss, accuracy = model.evaluate(X_test_reshaped, y_test)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)



In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import SGD

# Load the preprocessed dataset
data = pd.read_csv("C:/Users/Yash Waldia/Desktop/crime1/PAASBAAN-crime-prediction/pd2.csv")

# Select relevant features and target variables
X = data[['YEAR', 'MONTH', 'DAY', 'HOUR', 'MINUTE', 'Latitude', 'Longitude']]
y = data[['crime1', 'crime2', 'crime3', 'crime4']]

# Split data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Reshape data for LSTM input (samples, timesteps, features)
X_train_reshaped = X_train_scaled.reshape(X_train_scaled.shape[0], 1, X_train_scaled.shape[1])
X_test_reshaped = X_test_scaled.reshape(X_test_scaled.shape[0], 1, X_test_scaled.shape[1])

# Define the LSTM model
model = Sequential()
model.add(LSTM(units=100, return_sequences=True, input_shape=(X_train_reshaped.shape[1], X_train_reshaped.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(units=100))
model.add(Dropout(0.2))
model.add(Dense(units=4, activation='softmax'))  # Adjust units and activation for multi-class classification

# Compile the model
optimizer = SGD(learning_rate=0.01)  # Adjust learning rate here
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Early stopping callback
early_stopping = EarlyStopping(patience=3, monitor='val_loss', restore_best_weights=True)

# Train the model
history = model.fit(X_train_reshaped, y_train, epochs=20, batch_size=32, validation_data=(X_test_reshaped, y_test), callbacks=[early_stopping])

# Evaluate the model
train_loss, train_accuracy = model.evaluate(X_train_reshaped, y_train)
test_loss, test_accuracy = model.evaluate(X_test_reshaped, y_test)

print("Training loss:", train_loss)
print("Training accuracy:", train_accuracy)
print("Testing loss:", test_loss)
print("Testing accuracy:", test_accuracy)
