In [None]:
import pandas as pd
df = pd.read_csv(r"C:\Users\rachn\CityX_Crime_Watch\data\crime_data.csv")
# See first few rows
df.head

In [None]:
# Convert to datetime
df['Dates'] = pd.to_datetime(df['Dates'])

# Extract parts of date
df['Hour'] = df['Dates'].dt.hour
df['Day'] = df['Dates'].dt.day_name()
df['Month'] = df['Dates'].dt.month
df['Year'] = df['Dates'].dt.year


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(10,5))
sns.countplot(data=df, x='Day', order=['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'])
plt.title("Crimes per Day")
plt.xticks(rotation=45)
plt.show()


In [None]:
df['Category'].value_counts().head(10).plot(kind='bar', title='Top Crime Categories', figsize=(10,5))
plt.ylabel("Count")
plt.show()


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

# Vectorize Descriptions
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['Descript'].fillna(''))

# Target variable
y = df['Category']

# Train/Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Model
model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
model.fit(X_train, y_train)

# Prediction
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))


In [None]:
def get_severity(cat):
    if cat in ['NON-CRIMINAL', 'SUSPICIOUS OCCURRENCE', 'MISSING PERSON', 'RUNAWAY', 'RECOVERED VEHICLE']:
        return 1
    elif cat in ['WARRANTS', 'OTHER OFFENSES', 'VANDALISM', 'TRESPASS', 'DISORDERLY CONDUCT', 'BAD CHECKS']:
        return 2
    elif cat in ['LARCENY/THEFT', 'VEHICLE THEFT', 'FORGERY/COUNTERFEITING', 'DRUG/NARCOTIC', 'STOLEN PROPERTY', 'FRAUD', 'BRIBERY', 'EMBEZZLEMENT']:
        return 3
    elif cat in ['ROBBERY', 'WEAPON LAWS', 'BURGLARY', 'EXTORTION']:
        return 4
    elif cat in ['KIDNAPPING', 'ARSON']:
        return 5
    return 0

df['Severity'] = df['Category'].apply(get_severity)

# Show sample
df[['Category', 'Descript', 'Severity']].head(10)


In [None]:
from sklearn.preprocessing import LabelEncoder

# Create label encoder
label_encoder = LabelEncoder()

# Fit and transform y to numeric
y = label_encoder.fit_transform(df['Category'])

# Save the label encoder for decoding later
import joblib
joblib.dump(label_encoder, '../app/label_encoder.pkl')


In [None]:
from sklearn.preprocessing import LabelEncoder

# Create label encoder
label_encoder = LabelEncoder()

# Fit and transform y to numeric
y = label_encoder.fit_transform(df['Category'])

# Save the label encoder for decoding later
import joblib
joblib.dump(label_encoder, 'C:\Users\rachn\CityX_Crime_Watch\app\label_encoder.pkl')


In [None]:
from sklearn.preprocessing import LabelEncoder

# Create label encoder
label_encoder = LabelEncoder()

# Fit and transform y to numeric
y = label_encoder.fit_transform(df['Category'])

# Save the label encoder for decoding later
import joblib
joblib.dump(label_encoder, 'C:/Users/rachn/CityX_Crime_Watch/app/label_encoder.pkl')

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

# Decode predictions back to labels (optional for display)
decoded_preds = label_encoder.inverse_transform(y_pred)

from sklearn.metrics import accuracy_score
print("Accuracy:", accuracy_score(y_test, y_pred))

In [None]:
def get_severity(cat):
    if cat in ['NON-CRIMINAL', 'SUSPICIOUS OCCURRENCE', 'MISSING PERSON', 'RUNAWAY', 'RECOVERED VEHICLE']:
        return 1
    elif cat in ['WARRANTS', 'OTHER OFFENSES', 'VANDALISM', 'TRESPASS', 'DISORDERLY CONDUCT', 'BAD CHECKS']:
        return 2
    elif cat in ['LARCENY/THEFT', 'VEHICLE THEFT', 'FORGERY/COUNTERFEITING', 'DRUG/NARCOTIC', 'STOLEN PROPERTY', 'FRAUD', 'BRIBERY', 'EMBEZZLEMENT']:
        return 3
    elif cat in ['ROBBERY', 'WEAPON LAWS', 'BURGLARY', 'EXTORTION']:
        return 4
    elif cat in ['KIDNAPPING', 'ARSON']:
        return 5
    return 0

df['Severity'] = df['Category'].apply(get_severity)

# Show sample
df[['Category', 'Descript', 'Severity']].head(10)



In [None]:
import joblib
import os

# Create app folder if not exists
os.makedirs("C:/Users/rachn/CityX_Crime_Watch/app/", exist_ok=True)

# Save your trained model, vectorizer, and label encoder
joblib.dump(model, 'C:/Users/rachn/CityX_Crime_Watch/app/model.pkl')
joblib.dump(vectorizer, 'C:/Users/rachn/CityX_Crime_Watch/app/vectorizer.pkl')
joblib.dump(label_encoder, 'C:/Users/rachn/CityX_Crime_Watch/app/label_encoder.pkl')


In [None]:
import fitz  # PyMuPDF

def extract_text_from_pdf(uploaded_file):
    doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
    text = ""
    for page in doc:
        text += page.get_text()
    return text


In [None]:
import streamlit as st
import pandas as pd
import joblib
from pdf_utils import extract_text_from_pdf
import folium
from streamlit_folium import st_folium

# Load files
model = joblib.load("model.pkl")
vectorizer = joblib.load("vectorizer.pkl")
label_encoder = joblib.load("label_encoder.pkl")

# Load dataset
df = pd.read_csv("data/crime_data.csv")

st.title("🚓 CityX Crime Watch Dashboard")

# PDF upload
uploaded_file = st.file_uploader("📄 Upload Police Report", type="pdf")

if uploaded_file:
    text = extract_text_from_pdf(uploaded_file)
    st.subheader("Extracted Text")
    st.write(text)

    # Prediction
    X_input = vectorizer.transform([text])
    pred = model.predict(X_input)[0]
    crime_category = label_encoder.inverse_transform([pred])[0]

    def get_severity(cat):
        if cat in ['NON-CRIMINAL', 'SUSPICIOUS OCCURRENCE', 'MISSING PERSON', 'RUNAWAY', 'RECOVERED VEHICLE']:
            return 1
        elif cat in ['WARRANTS', 'OTHER OFFENSES', 'VANDALISM', 'TRESPASS', 'DISORDERLY CONDUCT', 'BAD CHECKS']:
            return 2
        elif cat in ['LARCENY/THEFT', 'VEHICLE THEFT', 'FORGERY/COUNTERFEITING', 'DRUG/NARCOTIC', 'STOLEN PROPERTY', 'FRAUD', 'BRIBERY', 'EMBEZZLEMENT']:
            return 3
        elif cat in ['ROBBERY', 'WEAPON LAWS', 'BURGLARY', 'EXTORTION']:
            return 4
        elif cat in ['KIDNAPPING', 'ARSON']:
            return 5
        return 0

    severity = get_severity(crime_category)

    st.success(f"Predicted Crime Category: {crime_category}")
    st.warning(f"Severity Level: {severity}")

# Crime map
st.subheader("📍 Crime Map")
m = folium.Map(location=[df['Y'].mean(), df['X'].mean()], zoom_start=12)
for _, row in df.iterrows():
    folium.CircleMarker([row['Y'], row['X']], radius=2, color='red').add_to(m)
st_data = st_folium(m, width=700)


In [None]:
from pdf_utils import extract_text_from_pdf


In [None]:
import sys
sys.path.append('C:/Users/rachn/CityX_Crime_Watch/app')

from pdf_utils import extract_text_from_pdf


In [None]:
from pdf_utils import extract_text_from_pdf

In [None]:
import fitz  # PyMuPDF

def extract_text_from_pdf(uploaded_file):
    doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
    text = ""
    for page in doc:
        text += page.get_text()
    return text

In [None]:
import sys
sys.path.append('../app')

from pdf_utils import extract_text_from_pdf


In [2]:
import fitz  # PyMuPDF

def extract_text_from_pdf(uploaded_file):
    doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
    text = ""
    for page in doc:
        text += page.get_text()
    return text

In [3]:
import sys
sys.path.append('../app')

from pdf_utils import extract_text_from_pdf

print("Function imported successfully!")

ModuleNotFoundError: No module named 'pdf_utils'