
## Load Data

In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import joblib
import os


In [2]:
# Load the CSV file
df = pd.read_csv('data.csv')


In [3]:
# Convert timestamp and drop invalid entries
df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
df = df.dropna(subset=['timestamp', 'latitude', 'longitude'])


In [4]:
# Extract features from timestamp
column_1 = df['timestamp']
DT = pd.DataFrame({
    "year": column_1.dt.year,
    "month": column_1.dt.month,
    "day": column_1.dt.day,
    "hour": column_1.dt.hour,
    "dayofyear": column_1.dt.dayofyear,
    "week": column_1.dt.isocalendar().week,
    "dayofweek": column_1.dt.dayofweek,
    "weekday": column_1.dt.weekday,
    "quarter": column_1.dt.quarter
})


In [5]:
# Include latitude and longitude
DT['latitude'] = df['latitude']
DT['longitude'] = df['longitude']


In [6]:
# Input features
X = DT[['month', 'day', 'hour', 'dayofyear', 'dayofweek', 'latitude', 'longitude']]

# Output labels (last 6 columns are crimes)
Y = df.iloc[:, -6:]


In [7]:
# Drop rows with missing values and align Y
X = X.dropna()
Y = Y.loc[X.index]

# Ensure labels are integers
Y = Y.astype(int)


In [8]:
model = RandomForestClassifier()
model.fit(X, Y)


RandomForestClassifier()

In [9]:
# Save the model to a folder
os.makedirs('model', exist_ok=True)
joblib.dump(model, 'model/rf_model')


['model/rf_model']