In [1]:
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from  scipy.stats import f_oneway
import warnings 
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
accidents =  pd.read_csv('accident_data.csv')

In [3]:
accidents.describe()

Unnamed: 0,Latitude,Longitude,Number_of_Casualties,Number_of_Vehicles
count,660654.0,660653.0,660679.0,660679.0
mean,52.553866,-1.43121,1.35704,1.831255
std,1.406922,1.38333,0.824847,0.715269
min,49.91443,-7.516225,1.0,1.0
25%,51.49069,-2.332291,1.0,1.0
50%,52.315641,-1.411667,1.0,2.0
75%,53.453452,-0.232869,1.0,2.0
max,60.757544,1.76201,68.0,32.0


In [4]:
accidents.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 660679 entries, 0 to 660678
Data columns (total 14 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   Index                    660679 non-null  object 
 1   Accident_Severity        660679 non-null  object 
 2   Accident Date            660679 non-null  object 
 3   Latitude                 660654 non-null  float64
 4   Light_Conditions         660679 non-null  object 
 5   District Area            660679 non-null  object 
 6   Longitude                660653 non-null  float64
 7   Number_of_Casualties     660679 non-null  int64  
 8   Number_of_Vehicles       660679 non-null  int64  
 9   Road_Surface_Conditions  659953 non-null  object 
 10  Road_Type                656159 non-null  object 
 11  Urban_or_Rural_Area      660664 non-null  object 
 12  Weather_Conditions       646551 non-null  object 
 13  Vehicle_Type             660679 non-null  object 
dtypes: f

In [5]:
category_col =['Index','Accident_Severity','Accident Date','Light_Conditions','District Area','Road_Surface_Conditions','Road_Type'
              ,'Urban_or_Rural_Area','Weather_Conditions','Vehicle_Type',]


for col in category_col:
    accidents[col] = accidents[col].astype('category')

print(accidents.dtypes)

Index                      category
Accident_Severity          category
Accident Date              category
Latitude                    float64
Light_Conditions           category
District Area              category
Longitude                   float64
Number_of_Casualties          int64
Number_of_Vehicles            int64
Road_Surface_Conditions    category
Road_Type                  category
Urban_or_Rural_Area        category
Weather_Conditions         category
Vehicle_Type               category
dtype: object


<H1>FILLING UP NULL VALUES</H1>

In [6]:
accidents['Accident Date'] = accidents['Accident Date'].fillna(accidents['Accident Date'].mode()[0])
accidents['Latitude'] = accidents['Latitude'].fillna(accidents['Latitude'].mode()[0])
accidents['Longitude'] = accidents['Longitude'].fillna(accidents['Longitude'].mode()[0])
accidents['Road_Surface_Conditions'] = accidents['Road_Surface_Conditions'].fillna(accidents['Road_Surface_Conditions'].mode()[0])
accidents['Road_Type'] = accidents['Road_Type'].fillna(accidents['Road_Type'].mode()[0])
accidents['Urban_or_Rural_Area'] = accidents['Urban_or_Rural_Area'].fillna(accidents['Urban_or_Rural_Area'].mode()[0])
accidents.isnull().sum()

Index                          0
Accident_Severity              0
Accident Date                  0
Latitude                       0
Light_Conditions               0
District Area                  0
Longitude                      0
Number_of_Casualties           0
Number_of_Vehicles             0
Road_Surface_Conditions        0
Road_Type                      0
Urban_or_Rural_Area            0
Weather_Conditions         14128
Vehicle_Type                   0
dtype: int64

In [7]:
accidents['Accident Date'] = pd.to_datetime(accidents['Accident Date'], dayfirst = True, errors = 'coerce')

In [8]:
accidents.dtypes

Index                            category
Accident_Severity                category
Accident Date              datetime64[ns]
Latitude                          float64
Light_Conditions                 category
District Area                    category
Longitude                         float64
Number_of_Casualties                int64
Number_of_Vehicles                  int64
Road_Surface_Conditions          category
Road_Type                        category
Urban_or_Rural_Area              category
Weather_Conditions               category
Vehicle_Type                     category
dtype: object

In [9]:
accidents.isnull().sum()

Index                           0
Accident_Severity               0
Accident Date              395672
Latitude                        0
Light_Conditions                0
District Area                   0
Longitude                       0
Number_of_Casualties            0
Number_of_Vehicles              0
Road_Surface_Conditions         0
Road_Type                       0
Urban_or_Rural_Area             0
Weather_Conditions          14128
Vehicle_Type                    0
dtype: int64

<h1>Q1: WHAT IS THE DISTRICT WITH THE HIGHEST NUMBER OF CASUALTIES?</h1>

In [10]:
district_casualties = accidents.groupby('District Area')['Number_of_Casualties'].sum()
district_casualties

District Area
Aberdeen City    1508
Aberdeenshire    2529
Adur              817
Allerdale        1663
Alnwick           351
                 ... 
Wychavon         1943
Wycombe          2442
Wyre             1706
Wyre Forest      1340
York             2453
Name: Number_of_Casualties, Length: 422, dtype: int64

<h1>insight1.1:Aberdeenshire has the highest number of casualties</h1>

<h1>insight2: what is the number of casualties in  aberdeenshire</h1>

In [11]:
aberdeenshire_casualties = district_casualties['Aberdeenshire']
aberdeenshire_casualties

np.int64(2529)

<H1>ANS:Aberdeenshire has the highest number of casualties with 2529.</H1>

<H1>Q2:What is the distribution of Accident_Severity? </H1>

In [12]:
accidents['Accident_Severity'].value_counts()


Accident_Severity
Slight     563801
Serious     88217
Fatal        8661
Name: count, dtype: int64

<h1>insight3: Slight: 85.5% </h1>
<h1>insight4: Serious: 13.4% </h1>
<h1>insight5: Fatal: 1.3% </h1>
<h1>overall, slights severity have the highest percentage in accident severity</h1>

<h1>Q3:How often do different Light_Conditions occur in accidents?</h1>

In [13]:
accidents['Light_Conditions'].value_counts()


Light_Conditions
Daylight                       484880
Darkness - lights lit          129335
Darkness - no lighting          37437
Darkness - lighting unknown      6484
Darkness - lights unlit          2543
Name: count, dtype: int64

<h1>insight6: 73.4 percent of accident happen on daylight</h1>
<h1>insight7: 19.6 percent of accident happen on Darkness - lights lit</h1>
<h1>insight8: 5.7 percent of accident happen on Darkness - no lighting</h1>
<h1>insight9: 1.0 percent of accident happen on Darkness - lighting unknown</h1>
<h1>insight10: 0.4 percent of accident happen on Darkness - lights unlit</h1>

<h1>Q4:number of accidents during weather conditions</h1>

In [14]:
accidents['Weather_Conditions'].value_counts()


Weather_Conditions
Fine no high winds       520885
Raining no high winds     79696
Other                     17150
Raining + high winds       9615
Fine + high winds          8554
Snowing no high winds      6238
Fog or mist                3528
Snowing + high winds        885
Name: count, dtype: int64

<H1>INSIGHT11: Fine no high winds: 520885 or 81.7%</H1>
<H1>INSIGHT12: Raining no high winds:79696 or 12.5%</H1>
<H1>INSIGHT13: Other: 17150 or 2.7%</H1>
<H1>INSIGHT14: Raining + high winds: 9615 or 1.5%</H1>
<H1>INSIGHT15: Fine + high winds: 8554 or 1.3%</H1>
<H1>INSIGHT 16:Fog or mist: 3528 or  0.6%</H1>
<H1>INSIGHT 17:Snowing + high winds: 885 or 0.1%</H1>
<H2>basically, accidents happens on a sunny day weather conditions</H2>

<h1>Q5:How does Road_Type affect the number of Number_of_Casualties?</h1>

In [15]:
accidents.groupby('Road_Type')['Number_of_Casualties'].mean()


Road_Type
Dual carriageway      1.477279
One way street        1.192713
Roundabout            1.274891
Single carriageway    1.343788
Slip road             1.423661
Name: Number_of_Casualties, dtype: float64

<h1>Insight:18
    
Different road types have varying impacts on the average number of casualties. 

Dual carriageway: Average number of casualties is 1.477279, which is the highest among the road types listed.

Slip road: Average number of casualties is 1.423661, slightly lower than dual carriageways.

Single carriageway: Average number of casualties is 1.343788.

Roundabout: Average number of casualties is 1.274891.

 INSIGHT One way street: Average number of casualties is 1.192713, which is the lowest among the road types listed.</h1>

<h1>Q6:How do Light_Conditions affect the Number_of_Casualties?</h1>

In [16]:
accidents.groupby('Light_Conditions')['Number_of_Casualties'].mean()


Light_Conditions
Darkness - lighting unknown    1.306447
Darkness - lights lit          1.369374
Darkness - lights unlit        1.331105
Darkness - no lighting         1.541657
Daylight                       1.340309
Name: Number_of_Casualties, dtype: float64

<h1>INSIGHT19:
No Lighting: The most dangerous, with the highest average casualties.

Lights Lit: Still dangerous at night, even with streetlights.

Lights Unlit: Slightly safer than lit lights, but still risky.

Daylight: Safer than nighttime, with fewer casualties.

Unknown Lighting: Least average casualties, but data might not be reliable.</h1>


<h1>Q7:How do Weather_Conditions, Light_Conditions, and Accident_Severity interact?</h1>


In [17]:
accidents.groupby(['Weather_Conditions', 'Light_Conditions'])['Number_of_Casualties'].mean()



Weather_Conditions     Light_Conditions           
Fine + high winds      Darkness - lighting unknown    1.389831
                       Darkness - lights lit          1.373110
                       Darkness - lights unlit        1.470588
                       Darkness - no lighting         1.523869
                       Daylight                       1.370428
Fine no high winds     Darkness - lighting unknown    1.331252
                       Darkness - lights lit          1.366880
                       Darkness - lights unlit        1.324118
                       Darkness - no lighting         1.560301
                       Daylight                       1.333901
Fog or mist            Darkness - lighting unknown    1.369231
                       Darkness - lights lit          1.436482
                       Darkness - lights unlit        1.297297
                       Darkness - no lighting         1.469838
                       Daylight                       1.460134
Othe

<H1>
INSIGHTS20:
Fine Weather (High Winds/No High Winds):
    
Daylight: Generally results in fewer casualties.

DaRkness - No Lighting: Highest casualties, especially without lighting.

Daylight: Fewer casualties compared to night.

Darkness - Lights Unlit: Lowest casualties within foggy conditions.

Darkness - No Lighting: Higher casualties.

Daylight: Safer with fewer casualties.

Raining (With/Without High Winds):

DaylighT: Slightly fewer casualties.

Darkness - No Lighting: Higher casualties.

Snowing (With/Without High Winds):
|
</H1>

<h1>Q8:Does light Conditions and Weather Conditions affects the Severity of an accidents</h1>

In [18]:
accidents.groupby(['Weather_Conditions', 'Light_Conditions'])['Accident_Severity'].value_counts()


Weather_Conditions     Light_Conditions             Accident_Severity
Fine + high winds      Darkness - lighting unknown  Slight                 49
                                                    Serious                 7
                                                    Fatal                   3
                       Darkness - lights lit        Slight               1497
                                                    Serious               311
                                                                         ... 
Snowing no high winds  Darkness - no lighting       Serious               107
                                                    Fatal                  11
                       Daylight                     Slight               3163
                                                    Serious               292
                                                    Fatal                  20
Name: count, Length: 120, dtype: int64

<h1>INSIGHTS21:
    Fine + High Winds:

Darkness - lighting unknown: Most accidents are slight, with a few serious and fatal.

Darkness - lights lit: Higher number of slight accidents, followed by serious accidents.

Snowing No High Winds:

Darkness - no lighting: Higher number of serious accidents and some fatal accidents.

Daylight: Most accidents are slight, followed by serious and a few fatal.</h1>

<h1>Q9:What is the distribution of accidents based on Accident_Severity?</h1>

In [19]:
accident_severity_distribution = accidents['Accident_Severity'].value_counts()
print(accident_severity_distribution)

Accident_Severity
Slight     563801
Serious     88217
Fatal        8661
Name: count, dtype: int64


<H1> INSIGHTS22:
distribution of accidents based on severity in percentage:
Slight: 563,801 accidents (87.02%)

Serious: 88,217 accidents (13.63%)

Fatal: 8,661 accidents (1.34%)</H1>

<h1>Q10:What is the average number of casualties per accident?</h1>

In [20]:
average_casualties = accidents['Number_of_Casualties'].mean()
print(average_casualties)

1.357040257068864


<H1>insight23:The average number of casualties per accident is 1.357040257068864 or 16.96%.</H1>

<h1>Q11:What is the most common Vehicle_Type involved in accidents?</h1>

In [22]:
vehicle_type_distribution = accidents['Vehicle_Type'].value_counts()


print(vehicle_type_distribution)

Vehicle_Type
Car                                      497992
Van / Goods 3.5 tonnes mgw or under       34160
Bus or coach (17 or more pass seats)      25878
Motorcycle over 500cc                     25657
Goods 7.5 tonnes mgw and over             17307
Motorcycle 125cc and under                15269
Taxi/Private hire car                     13294
Motorcycle over 125cc and up to 500cc      7656
Motorcycle 50cc and under                  7603
Goods over 3.5t. and under 7.5t            6096
Other vehicle                              5637
Minibus (8 - 16 passenger seats)           1976
Agricultural vehicle                       1947
Pedal cycle                                 197
Data missing or out of range                  6
Ridden horse                                  4
Name: count, dtype: int64


<H1>insight24:</H1>
<h1>Cars Everywhere: No surprise here—cars are involved in the most accidents. With 497,992 incidents, it looks like cars are the biggest culprits on the road.</h1>
<h1>Bikes and Big Trucks: Motorcycles and big trucks are also pretty accident-prone. Motorcycles over 500cc and heavy goods vehicles seem to have a knack for getting into accidents. Maybe it's their size, speed, or just bad luck.</h1>
<h1>Rarely Seen on the Accident List: Some vehicles, like ridden horses 4 accidents and pedal cycles 197 accidents, hardly ever show up in accident stats. Probably because there aren't too many of them on the roads.</h1>
<h1>Passenger Vehicles: Buses and minibuses have their fair share of accidents too . So, it’s crucial to keep safety top of mind for these passenger vehicles—make sure drivers are well-trained, and the vehicles are in good shape.</h1>