In [35]:
# ----------------------------------------------------------------------
# **Part 1: File Set Up**
# ----------------------------------------------------------------------

#===========DEPENDENCIES=============
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
import seaborn as sns
import requests
from datetime import datetime


pd.set_option('display.max_columns', None)


# define function to convert to hourly time 
def to_hour(time):
    try:
        hour = datetime.strptime(str(time), '%H:%M')
        return int(datetime.strftime(hour, '%H'))
    except Exception:
        return 0
#============IMPORT==============
csv_file_path = os.path.join('Resources', 'accidents_2014.csv')
traffic_df = pd.read_csv(csv_file_path)

#============DATA CLEANING===========

#============DROP BLANK COLUMNS===========

traffic_df.dropna(
    axis=1,
    how='all',
    inplace=True
)

#============DROP BLANK ROWS WITH BLANK VALUES==========

traffic_df['Junction_Control'].replace(
    np.nan, 'None', inplace=True)

traffic_df.replace(
    '', np.nan, inplace=True)

traffic_df.replace(
    'Unknown', np.nan, inplace=True)

traffic_df.dropna(axis=0, inplace=True)

#===========REPLACING ERRANT/MISPELLED VALUES===============

traffic_df['Light_Conditions'].replace(
    'Darkeness: No street lighting',
    'Darkness: No street lighting', 
    inplace=True
)

traffic_df['Pedestrian_Crossing-Physical_Facilities'].replace(
    'non-junction pedestrian crossing',
    'Non-junction Pedestrian Crossing', 
    inplace=True
)
#===========RENAMING COLUMNS===============

traffic_df.rename(columns=
    {'Accident_Index' : 'Accident Index',
     'Longitude' : 'Longitude', 
     'Latitude' : 'Latitude', 
     'Police_Force' : 'Police Force', 
     'Accident_Severity' : 'Accident Severity', 
     'Number_of_Vehicles' : 'Number of Vehicles', 
     'Number_of_Casualties' : 'Number of Casualties', 
     'Date' : 'Date', 
     'Day_of_Week' : 'Day of Week', 
     'Time' : 'Time', 
     'Local_Authority_(District)' : 'Local Authority District', 
     'Local_Authority_(Highway)' : 'Local Authority Highway', 
     '1st_Road_Class' : '1st Road Class', 
     '1st_Road_Number' : '1st Road Number', 
     'Road_Type' : 'Road Type', 
     'Speed_limit' : 'Speed Limit', 
     'Junction_Control' : 'Junction Control', 
     '2nd_Road_Class' : '2nd Road Class', 
     '2nd_Road_Number' : '2nd Road Number', 
     'Pedestrian_Crossing-Human_Control' : 'Pedestrian Crossing Human Control', 
     'Pedestrian_Crossing-Physical_Facilities' : 'Pedestrian Crossing Physical Facilities', 
     'Light_Conditions' : 'Light Conditions', 
     'Weather_Conditions' : 'Weather Conditions', 
     'Road_Surface_Conditions' : 'Road Surface Conditions', 
     'Special_Conditions_at_Site' : 'Special Conditions at Site', 
     'Carriageway_Hazards' : 'Carriageway Hazards', 
     'Urban_or_Rural_Area' : 'Urban or Rural Area', 
     'Did_Police_Officer_Attend_Scene_of_Accident' : 'Police Attended Scene of Accident', 
     'LSOA_of_Accident_Location' : 'LSOA of Accident Location', 
     'Year' : 'Year', 
    }, inplace=True)
# format Date in Datetime format and add additional columns for month, day, and hour
traffic_df['Date'] = pd.to_datetime(traffic_df['Date'], format='%d/%m/%y')
traffic_df['Month'] = traffic_df['Date'].dt.month
traffic_df['Day'] = traffic_df['Date'].dt.day
traffic_df['Hour of Day'] = traffic_df['Time'].apply(to_hour)

# display cleaned file
traffic_df.head()

Unnamed: 0,Accident Index,Longitude,Latitude,Police Force,Accident Severity,Number of Vehicles,Number of Casualties,Date,Day of Week,Time,Local Authority District,Local Authority Highway,1st Road Class,1st Road Number,Road Type,Speed Limit,Junction Control,2nd Road Class,2nd Road Number,Pedestrian Crossing Human Control,Pedestrian Crossing Physical Facilities,Light Conditions,Weather Conditions,Road Surface Conditions,Special Conditions at Site,Carriageway Hazards,Urban or Rural Area,Police Attended Scene of Accident,LSOA of Accident Location,Year,Month,Day,Hour of Day
0,201401BS70001,-0.206443,51.496345,1,3,2,1,2014-01-09,5,13:21,12,E09000020,3,315,Single carriageway,30,,-1,0,None within 50 metres,No physical crossing within 50 meters,Daylight: Street light present,Raining without high winds,Wet/Damp,,,1,No,E01002814,2014,1,9,13
1,201401BS70006,-0.171308,51.495892,1,3,2,1,2014-01-09,5,8:50,12,E09000020,3,4,Single carriageway,30,Giveway or uncontrolled,3,4,None within 50 metres,Central refuge,Daylight: Street light present,Fine without high winds,Dry,,,1,Yes,E01002821,2014,1,9,8
2,201401BS70009,-0.201326,51.498245,1,3,2,1,2014-01-10,6,18:25,12,E09000020,3,315,Single carriageway,30,Giveway or uncontrolled,6,0,None within 50 metres,No physical crossing within 50 meters,Darkness: Street lights present and lit,Fine without high winds,Wet/Damp,,,1,Yes,E01002817,2014,1,10,18
3,201401BS70011,-0.207445,51.507511,1,3,2,1,2014-01-10,6,10:55,12,E09000020,5,0,Single carriageway,30,Giveway or uncontrolled,6,0,None within 50 metres,No physical crossing within 50 meters,Daylight: Street light present,Fine without high winds,Dry,,,1,Yes,E01002871,2014,1,10,10
4,201401BS70013,-0.179731,51.497822,1,3,2,1,2014-01-05,1,20:26,12,E09000020,5,0,Single carriageway,30,Giveway or uncontrolled,5,0,None within 50 metres,No physical crossing within 50 meters,Darkness: Street lights present and lit,Raining without high winds,Wet/Damp,,,1,Yes,E01002892,2014,1,5,20


In [36]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Step 1: Preprocess the data
weather_condition_mapping = {
    'Fine without high winds': 3,
    'Raining without high winds': 3,
    'Raining with high winds': 2,
    'Other': 2,
    'Fine with high winds': 2,
    'Fog or mist': 1,
    'Snowing without high winds': 1,
    'Snowing with high winds': 1
}

traffic_df['Weather_Condition_Number'] = traffic_df['Weather Conditions'].map(weather_condition_mapping)

# Step 2: Split the data into training and testing sets
X = traffic_df[['Longitude', 'Latitude', 'Weather_Condition_Number']]
Y = traffic_df['Accident Severity']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.5, random_state=42)

In [37]:
from sklearn.naive_bayes import GaussianNB

## 1->Creating a classifier|training the algorithm|Testing the algorithm(Gaussian Naive Bayes Algorithm)

# Step 3: Train the model
clf = GaussianNB()
clf.fit(X_train, Y_train)

# Step 4: Test the model
Y_pred = clf.predict(X_test)
accuracy = accuracy_score(Y_test, Y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.8478614168575017


In [38]:
from sklearn.ensemble import RandomForestClassifier
##2->Random Forest Classsifier

# Step 3: Train the model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, Y_train)

# Step 4: Test the model
Y_pred = rf_model.predict(X_test)
accuracy = accuracy_score(Y_test, Y_pred)
print("Accuracy:", accuracy)


Accuracy: 0.8203811775134816


In [39]:
from sklearn.linear_model import LogisticRegression

##3->LogisticRegression



# Step 3: Train the logistic regression model
lr_model = LogisticRegression(random_state=42)
lr_model.fit(X_train, Y_train)

# Step 4: Test the model
Y_pred_lr = lr_model.predict(X_test)
accuracy_lr = accuracy_score(Y_test, Y_pred_lr)
print("Logistic Regression Accuracy:", accuracy_lr)


Logistic Regression Accuracy: 0.850483859052966


In [40]:
from sklearn.tree import DecisionTreeClassifier

##4->DecisionTreeClassifier
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, Y_train)

# Step 4: Test the model
Y_pred_dt = dt_model.predict(X_test)
accuracy_dt = accuracy_score(Y_test, Y_pred_dt)
print("Decision Tree Accuracy:", accuracy_dt)


Decision Tree Accuracy: 0.7545246361823151


In [41]:
from sklearn.neural_network import MLPClassifier

##5->neural_network

# Step 3: Train the Neural Network model
nn_model = MLPClassifier(random_state=80)
nn_model.fit(X_train, Y_train)

# Step 4: Test the model
Y_pred_nn = nn_model.predict(X_test)
accuracy_nn = accuracy_score(Y_test, Y_pred_nn)
print("Neural Network Accuracy:", accuracy_nn)


Neural Network Accuracy: 0.850483859052966


In [42]:
from sklearn.neighbors import KNeighborsClassifier

##6->KNN

# Step 3: Train the KNN model
knn_model = KNeighborsClassifier(n_neighbors=6)  # You can adjust the number of neighbors as needed
knn_model.fit(X_train, Y_train)

# Step 4: Test the model
Y_pred_knn = knn_model.predict(X_test)
accuracy_knn = accuracy_score(Y_test, Y_pred_knn)
print("K-Nearest Neighbors Accuracy:", accuracy_knn)


K-Nearest Neighbors Accuracy: 0.8222649035975474
