In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# Crime Trend Analysis: The City of New Orleans (2022 and 2025)

## 1.1 Importing and cleaning data - Report 2025

In [2]:
report_2025 = pd.read_csv('../data/report_2025.csv')

In [3]:
report_2025.head(2)

Unnamed: 0,Item_Number,Year,District,Location,Disposition,Signal_Type,Signal_Description,Occurred_Date_Time,Date,Month,...,Offender_Gender,Offender_Age,Victim_Number,Person_Type,Victim_Race,Victim_Gender,Victim_Age,Victim_Fatal_Status,Hate_Crime,Report_Type
0,H-11186-25,2025,5,014XX Montegut St,OPEN,62C,SIMPLE BURGLARY (VEHICLE),8/10/2025 10:30,8/10/2025,August,...,MALE,0,2.0,VICTIM,BLACK,MALE,46,Non-fatal,Unknown,Incident Report
1,H-11186-25,2025,5,014XX Montegut St,OPEN,62C,SIMPLE BURGLARY (VEHICLE),8/10/2025 10:30,8/10/2025,August,...,MALE,0,2.0,VICTIM,BLACK,MALE,46,Non-fatal,Unknown,Incident Report


In [4]:
# Standardizing (renaming) columns
report_2025=report_2025.rename(columns={'Item_Number': 'Report_Number', 'Occurred_Date_Time': 'Occurred_Date'})

In [5]:
#Converting datatype
report_2025['District'] = report_2025['District'].astype(str)

In [6]:
report_2025['Occurred_Date'] = pd.to_datetime(report_2025['Occurred_Date'])

In [7]:
report_2025['Date'] = pd.to_datetime(report_2025['Date'])

In [8]:
report_2025.shape

(64500, 28)

In [9]:
report_2025.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64500 entries, 0 to 64499
Data columns (total 28 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   Report_Number        64500 non-null  object        
 1   Year                 64500 non-null  int64         
 2   District             64500 non-null  object        
 3   Location             64500 non-null  object        
 4   Disposition          64500 non-null  object        
 5   Signal_Type          64500 non-null  object        
 6   Signal_Description   64500 non-null  object        
 7   Occurred_Date        64500 non-null  datetime64[ns]
 8   Date                 64500 non-null  datetime64[ns]
 9   Month                64500 non-null  object        
 10  Month_Number         64500 non-null  int64         
 11  Day                  64500 non-null  object        
 12  Charge_Code          64500 non-null  object        
 13  Charge_Description   64500 non-

## 1.2 Importing and cleaning data - Report 2022

In [10]:
report_2022 = pd.read_csv('../data/report_2022.csv')

In [11]:
report_2022.head(2)

Unnamed: 0,Item_Number,Year,District,Location,Disposition,Signal_Type,Signal_Description,Occurred_Date_Time,Date,Month,...,Offender_Gender,Offender_Age,Victim_Number,Person_Type,Victim_Race,Victim_Gender,Victim_Age,Victim_Fatal_Status,Hate_Crime,Report_Type
0,B-12830-22,2022,7,047XX Lynhuber,CLOSED,65,SIMPLE ROBBERY,2/10/2022 23:50,2/10/2022,February,...,MALE,32.0,1.0,VICTIM,BLACK,FEMALE,28.0,Non-fatal,Unknown,Supplemental Report
1,B-12830-22,2022,7,047XX Lynhuber,CLOSED,65,SIMPLE ROBBERY,2/10/2022 23:50,2/10/2022,February,...,MALE,32.0,2.0,VICTIM,Unknown,Unknown,0.0,Non-fatal,Unknown,Supplemental Report


In [12]:
report_2022.shape

(117272, 28)

In [13]:
# Standardizing (renaming) columns
report_2022=report_2022.rename(columns={'Item_Number': 'Report_Number', 'Occurred_Date_Time': 'Occurred_Date'})

In [14]:
#Converting datatype
report_2022['District'] = report_2022['District'].astype(str)

In [15]:
#Converting date to datetime 
report_2022['Occurred_Date'] = pd.to_datetime(report_2022['Occurred_Date'], errors='coerce')

In [16]:
report_2022['Date'] = pd.to_datetime(report_2022['Date'], errors='coerce')

In [17]:
report_2022['Date'] = pd.to_datetime(report_2022['Date'])

In [18]:
report_2022.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 117272 entries, 0 to 117271
Data columns (total 28 columns):
 #   Column               Non-Null Count   Dtype         
---  ------               --------------   -----         
 0   Report_Number        117272 non-null  object        
 1   Year                 117272 non-null  int64         
 2   District             117272 non-null  object        
 3   Location             117272 non-null  object        
 4   Disposition          117272 non-null  object        
 5   Signal_Type          117272 non-null  object        
 6   Signal_Description   117272 non-null  object        
 7   Occurred_Date        117264 non-null  datetime64[ns]
 8   Date                 117264 non-null  datetime64[ns]
 9   Month                117272 non-null  object        
 10  Month_Number         117272 non-null  int64         
 11  Day                  117272 non-null  object        
 12  Charge_Code          117272 non-null  object        
 13  Charge_Descrip

## 1.3 Merging report 2022 and report 2025

In [19]:
merged_report = pd.concat([report_2022, report_2025], axis=0, ignore_index=True)

In [20]:
merged_report.head(2)

Unnamed: 0,Report_Number,Year,District,Location,Disposition,Signal_Type,Signal_Description,Occurred_Date,Date,Month,...,Offender_Gender,Offender_Age,Victim_Number,Person_Type,Victim_Race,Victim_Gender,Victim_Age,Victim_Fatal_Status,Hate_Crime,Report_Type
0,B-12830-22,2022,7,047XX Lynhuber,CLOSED,65,SIMPLE ROBBERY,2022-02-10 23:50:00,2022-02-10,February,...,MALE,32.0,1.0,VICTIM,BLACK,FEMALE,28.0,Non-fatal,Unknown,Supplemental Report
1,B-12830-22,2022,7,047XX Lynhuber,CLOSED,65,SIMPLE ROBBERY,2022-02-10 23:50:00,2022-02-10,February,...,MALE,32.0,2.0,VICTIM,Unknown,Unknown,0.0,Non-fatal,Unknown,Supplemental Report


In [21]:
merged_report.shape

(181772, 28)

In [22]:
merged_report.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 181772 entries, 0 to 181771
Data columns (total 28 columns):
 #   Column               Non-Null Count   Dtype         
---  ------               --------------   -----         
 0   Report_Number        181772 non-null  object        
 1   Year                 181772 non-null  int64         
 2   District             181772 non-null  object        
 3   Location             181772 non-null  object        
 4   Disposition          181772 non-null  object        
 5   Signal_Type          181772 non-null  object        
 6   Signal_Description   181772 non-null  object        
 7   Occurred_Date        181764 non-null  datetime64[ns]
 8   Date                 181764 non-null  datetime64[ns]
 9   Month                181772 non-null  object        
 10  Month_Number         181772 non-null  int64         
 11  Day                  181772 non-null  object        
 12  Charge_Code          181772 non-null  object        
 13  Charge_Descrip

In [23]:
#Removing duplicates based on column Report_Number
merged_report_duplicates = merged_report.drop_duplicates(subset=['Report_Number'])

In [24]:
merged_report_duplicates.shape

(95007, 28)

## 1.4 Converting dataframe to a csv file

In [42]:
#Converting pandas dataframe to csv file
merged_report_duplicates.to_csv(r'C:\Users\Yille\DA15\Capstone\Capstone_2025\data\report_2022_2025.csv', index=False)