
## Load Data

In [52]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
import joblib
import os


In [53]:
# Load the CSV file
df = pd.read_csv('Generate_Synthetic_Data.csv')


In [54]:
df.head()

Unnamed: 0,timestamp,latitude,longitude,act379,act13,act279,act323,act363,act302,region,incident_type,loss_amount
0,2019-01-01 08:26:00,26.577687,79.487584,0,0,1,0,0,0,Delhi,Phishing,103354.842558
1,2019-01-01 08:48:00,20.217247,76.481848,0,1,1,0,0,0,Hyderabad,Digital Arrest Scam,79171.432852
2,2019-01-01 14:03:00,14.844297,78.561454,0,0,0,0,0,0,Mumbai,Job Fraud,66107.247745
3,2019-01-02 04:59:00,17.220835,77.993437,0,0,0,0,0,0,Mumbai,Digital Arrest Scam,66122.680724
4,2019-01-02 11:42:00,26.215151,82.815567,0,0,0,0,0,0,Chennai,Investment Fraud,73162.758137


In [55]:
df.tail()


Unnamed: 0,timestamp,latitude,longitude,act379,act13,act279,act323,act363,act302,region,incident_type,loss_amount
12780,2025-12-30 22:41:00,25.99755,88.150954,0,0,0,0,0,0,Chennai,Job Fraud,58076.124094
12781,2025-12-31 01:05:00,17.579562,82.909289,1,0,1,0,0,0,Delhi,Digital Arrest Scam,103726.133065
12782,2025-12-31 07:51:00,21.524544,82.684723,0,0,0,0,0,0,Hyderabad,Investment Fraud,92834.74108
12783,2025-12-31 08:01:00,17.26439,79.098852,0,0,0,1,0,0,Delhi,Phishing,83844.664301
12784,2025-12-31 09:36:00,27.912442,80.442308,1,0,0,0,0,0,Hyderabad,Phishing,82437.561672


In [56]:
# Convert timestamp and drop invalid entries
df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
df = df.dropna(subset=['timestamp', 'latitude', 'longitude'])


In [57]:
# Extract features from timestamp
column_1 = df['timestamp']
DT = pd.DataFrame({
    "year": column_1.dt.year,
    "month": column_1.dt.month,
    "day": column_1.dt.day,
    "hour": column_1.dt.hour,
    "dayofyear": column_1.dt.dayofyear,
    "week": column_1.dt.isocalendar().week,
    "dayofweek": column_1.dt.dayofweek,
    "weekday": column_1.dt.weekday,
    "quarter": column_1.dt.quarter
})


In [58]:
# Include latitude and longitude
DT['latitude'] = df['latitude']
DT['longitude'] = df['longitude']


In [59]:
# Input features
X = DT[['month', 'day', 'hour', 'dayofyear', 'dayofweek', 'latitude', 'longitude']]

# Output labels (last 6 columns are crimes)
Y = df.iloc[:, -6:]


In [60]:


# Drop missing rows from X and align Y
X = X.dropna()
Y = Y.loc[X.index]

# Use 'incident_type' as the target
Y = Y['incident_type']

# Encode string labels to integers
le = LabelEncoder()
Y = le.fit_transform(Y)


In [61]:
model = RandomForestClassifier()
model.fit(X, Y)


RandomForestClassifier()

In [62]:
# Save the model to a folder
os.makedirs('model', exist_ok=True)
joblib.dump(model, 'model/rf_model')


['model/rf_model']