In [1]:
#1 Importing neccessary libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_columns', None)
sns.set(style="whitegrid")

In [None]:
#2 Load CSV File

In [None]:
file_path = "crime.csv";

# Load the data
try:
    df = pd.read_csv(file_path)
    print("✅ File loaded successfully!")
except FileNotFoundError:
    print("❌ File not found. Please check the path and try again.")

In [8]:
#3 Cleaning the file

In [None]:
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')
print("Missing values per column:\n")
print(df.isnull().sum())
df.fillna(0, inplace=True)

In [10]:
#4 Basic Data Overview

In [None]:
print("Top 5 Rows:")
display(df.head())

print("\nData Types:")
print(df.dtypes)

print("\nDescriptive Statistics:")
display(df.describe())

In [12]:
#5 Crime Count by state

In [None]:
if 'state/ut' in df.columns:
    city_counts = df['state/ut'].value_counts().head(15)
    city_counts.plot(kind='bar', figsize=(10, 5), title='Top 15 States by Crime Reports')
    plt.xlabel('state/ut')
    plt.ylabel('Number of Crimes')
    plt.show()
else:
    print("No 'state/ut' column found.")

In [14]:
#6 Analysis of all the crimes

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Standardize column names
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')

# Check for 'year' column
if 'year' not in df.columns:
    print("❌ 'year' column not found. Please make sure your dataset has a column named 'year'.")
else:
    # Get numeric columns excluding 'year'
    numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
    numeric_cols = [col for col in numeric_cols if col != 'year']
    
    print("📊 Numeric crime-related columns:", numeric_cols)
    
    # Group data by year
    year_grouped = df.groupby('year')[numeric_cols].sum().reset_index()

    # Plot each column's trend over years
    for col in numeric_cols:
        plt.figure(figsize=(8, 5))
        sns.lineplot(data=year_grouped, x='year', y=col, marker='o')
        plt.title(f"Trend of '{col}' Over the Years")
        plt.xlabel("Year")
        plt.ylabel(col)
        plt.grid(True)
        plt.tight_layout()
        plt.show()

        print(f"🧠 From {year_grouped['year'].min()} to {year_grouped['year'].max()}, '{col}' ranged from {year_grouped[col].min()} to {year_grouped[col].max()}.")
        print("-" * 60)


In [16]:
#7 Trend of crimes across years for a given State

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Clean column names
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')

# Identify the state and year column (case-insensitive)
state_col = next((col for col in df.columns if 'state' in col), None)
year_col = 'year' if 'year' in df.columns else None

if not state_col or not year_col:
    print("❌ Required columns ('state' and 'year') not found.")
else:
    # Display unique states
    states = df[state_col].dropna().unique().tolist()
    print("# Available States/UTs:")
    for s in states:
        print("o", s.upper())

    # Ask user to select a state
    selected_state = input("\n✏️ Enter a state from the list above: ").strip().lower()
    
    # Filter rows for selected state
    df[state_col] = df[state_col].str.lower().str.strip()
    filtered_df = df[df[state_col] == selected_state]
    
    if filtered_df.empty:
        print("❌ No data found for the selected state.")
    else:
        # Get all crime-related numeric columns
        numeric_cols = filtered_df.select_dtypes(include=['int64', 'float64']).columns.tolist()
        crime_cols = [col for col in numeric_cols if col != 'year']
        
        # Sum all crimes year-wise
        yearly_total = filtered_df.groupby('year')[crime_cols].sum()
        yearly_total['total_crimes'] = yearly_total.sum(axis=1)
        yearly_total = yearly_total.reset_index()

        # Plotting
        plt.figure(figsize=(9, 5))
        sns.lineplot(data=yearly_total, x='year', y='total_crimes', marker='o', color='darkred')
        plt.title(f"Total Reported Crimes in {selected_state.title()} Over the Years")
        plt.xlabel("Year")
        plt.ylabel("Total Crimes")
        plt.grid(True)
        plt.tight_layout()
        plt.show()
        
        # Summary
        print(f"🔍 From {yearly_total['year'].min()} to {yearly_total['year'].max()},")
        print(f"Total crimes ranged from {yearly_total['total_crimes'].min()} to {yearly_total['total_crimes'].max()}.")


In [18]:
#Type of crime for a given state

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Clean column names
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')

# Identify the state and year column
state_col = next((col for col in df.columns if 'state' in col), None)
year_col = 'year' if 'year' in df.columns else None

if not state_col or not year_col:
    print("❌ Required columns ('state' and 'year') not found.")
else:
    # Display unique states in uppercase
    states = df[state_col].dropna().unique().tolist()
    print("📍 Available States/UTs:")
    for s in states:
        print("🔸", s.upper())

    # Ask user to select a state
    selected_state = input("\n✏️ Enter a state from the list above: ").strip().lower()
    
    # Normalize state column to lowercase for matching
    df[state_col] = df[state_col].str.lower().str.strip()
    filtered_df = df[df[state_col] == selected_state]
    
    if filtered_df.empty:
        print("❌ No data found for the selected state.")
    else:
        # Identify all numeric columns except year (assumed crime columns)
        numeric_cols = filtered_df.select_dtypes(include=['int64', 'float64']).columns.tolist()
        crime_cols = [col for col in numeric_cols if col != year_col]

        # Sum total crimes by type over all years for the selected state
        crime_sums = filtered_df[crime_cols].sum().sort_values(ascending=False)

        # Plotting total crimes by crime type
        plt.figure(figsize=(12, 6))
        sns.barplot(x=crime_sums.values, y=crime_sums.index, color='darkred')
        plt.title(f"Total Crimes by Type in {selected_state.upper()}")
        plt.xlabel("Total Crimes")
        plt.ylabel("Crime Type")
        plt.tight_layout()
        plt.show()

        # Print summary
        print(f"\n🔍 Crime counts by type in {selected_state.upper()}:")
        print(crime_sums)
