1. Data Preprocessing

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import joblib

def preprocess_data(df):
    # Ensure all columns are in lowercase
    df.columns = df.columns.str.lower()
    
    # Encode categorical variables
    label_encoders = {}
    for column in df.select_dtypes(include=['object']).columns:
        le = LabelEncoder()
        df[column] = le.fit_transform(df[column])
        label_encoders[column] = le
    
    return df, label_encoders

def split_data(df, target_column):
    X = df.drop(target_column, axis=1)
    y = df[target_column]
    return train_test_split(X, y, test_size=0.2, random_state=42)


2. Model Training

In [8]:
from xgboost import XGBClassifier

def train_model(data_path, target_column, model_path):
    # Load the dataset
    df = pd.read_csv(data_path)
    
    # Preprocess the data
    df, label_encoders = preprocess_data(df)
    
    # Split the data
    X_train, X_test, y_train, y_test = split_data(df, target_column)
    
    # Train the model
    model = XGBClassifier()
    model.fit(X_train, y_train)
    
    # Save the model, feature names, and label encoders
    joblib.dump((model, X_train.columns.tolist(), label_encoders), model_path)
    
    # Evaluate the model
    accuracy = model.score(X_test, y_test)
    print(f"Model trained with accuracy: {accuracy:.2f}")

# Make sure your dataset is available at the correct path
train_model('data\accident_data.csv', 'area_accident', 'models/accident_model.pkl')


OSError: [Errno 22] Invalid argument: 'data\x07ccident_data.csv'

3. Streamlit Application

In [9]:
import streamlit as st
import pandas as pd
import joblib

# Load the trained model and feature names
model, feature_names, label_encoders = joblib.load('models/accident_model.pkl')

# Define the main function
def main():
    st.title("Accident Prediction App")

    # Input fields
    time_hour = st.selectbox("Hour", [f"{i:02d}" for i in range(24)])
    time_minute = st.selectbox("Minute", [f"{i:02d}" for i in range(60)])
    day_of_week = st.selectbox("Day of the Week", ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"])
    area_accident = st.selectbox("Area of Accident",  [
        'Office areas', 'Recreational areas', 'Residential areas', 'Industrial areas', 'Other',
        'Church areas', 'Market areas', 'Rural village areas', 'Outside rural areas', 
        'Hospital areas', 'School areas', 'Unknown', 'Rural village areasOffice areas'
    ])
    type_of_vehicle = st.selectbox("Type of Vehicle",[
        'Lorry (41–100Q)','Public (12 seats)', 'Ridden horse', 'Lorry (11–40Q)', 'Turbo', 
        'Taxi', 'Bicycle', 'Automobile', 'Other', 'Pick up up to 10Q', 'Public (13–45 seats)', 
        'Special vehicle', 'Stationwagen', 'Long lorry', 'Bajaj', 'Public (> 45 seats)', 'Motorcycle'
    ])
    lane_or_medians = st.selectbox("Lane or Medians",  [
        'Undivided Two way', 'other', 'Double carriageway (median)', 'One way',
        'Two-way (divided with solid lines road marking)', 'Two-way (divided with broken lines road marking)',
        'Unknown'
    ])
    road_surface_type = st.selectbox("Road Surface Type", [
        'Asphalt roads', 'Earth roads', 'Gravel roads', 'Other', 'Asphalt roads with some distress'
    ])
    road_surface_conditions = st.selectbox("Road Surface Conditions", [
        'No junction', 'Y Shape', 'Crossing', 'O Shape', 'Other', 'Unknown', 'T Shape', 'X Shape'
    ])
    light_conditions = st.selectbox("Light Conditions", [
        'Daylight', 'Darkness - lights lit', 'Darkness - no lighting', 'Darkness - lights unlit'
    ])
    weather_conditions = st.selectbox("Weather Conditions",  [
        'Normal', 'Raining', 'Raining and Windy', 'Cloudy', 'Other', 'Windy', 'Snow', 'Unknown', 'Fog or mist'
    ])
    sex_of_driver = st.selectbox("Sex of Driver", ["Male", "Female", "Other"])

    # Combine time input
    time = f"{time_hour}:{time_minute}"

    # Convert input into DataFrame
    input_data = pd.DataFrame({
        'time': [time],
        'day_of_week': [day_of_week],
        'area_accident': [area_accident],
        'type_of_vehicle': [type_of_vehicle],
        'lane_or_medians': [lane_or_medians],
        'road_surface_type': [road_surface_type],
        'road_surface_conditions': [road_surface_conditions],
        'light_conditions': [light_conditions],
        'weather_conditions': [weather_conditions],
        'sex_of_driver': [sex_of_driver]
    })

    # Preprocess the input data
    for column, le in label_encoders.items():
        input_data[column] = le.transform(input_data[column])

    input_data['time'] = input_data['time'].apply(lambda x: int(x.split(':')[0]) * 60 + int(x.split(':')[1]))
    
    # Ensure the input data columns match the feature names
    input_data = input_data[feature_names]

    # Make prediction
    if st.button("Predict"):
        prediction_proba = model.predict_proba(input_data)
        accident_chance = prediction_proba[0][1] * 100  # Assuming the second column is the probability of accident

        st.write(f"The predicted accident chance is: {accident_chance:.2f}%")

if __name__ == "__main__":
    main()


FileNotFoundError: [Errno 2] No such file or directory: 'models/accident_model.pkl'