In [39]:
import numpy as np
import pandas as pd
import warnings

from scipy.stats import f_oneway
warnings.filterwarnings('ignore')  #to ignore warnings

<h3 style=background-color:pink>Importing datasets using pandas</h3>

In [8]:
accidents = pd.read_csv('datasets\\accident_data.csv')

<h3 style=background-color:pink>Showing DataFrame</h3>

In [9]:
accidents.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 660679 entries, 0 to 660678
Data columns (total 14 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   Index                    660679 non-null  object 
 1   Accident_Severity        660679 non-null  object 
 2   Accident Date            660679 non-null  object 
 3   Latitude                 660654 non-null  float64
 4   Light_Conditions         660679 non-null  object 
 5   District Area            660679 non-null  object 
 6   Longitude                660653 non-null  float64
 7   Number_of_Casualties     660679 non-null  int64  
 8   Number_of_Vehicles       660679 non-null  int64  
 9   Road_Surface_Conditions  659953 non-null  object 
 10  Road_Type                656159 non-null  object 
 11  Urban_or_Rural_Area      660664 non-null  object 
 12  Weather_Conditions       646551 non-null  object 
 13  Vehicle_Type             660679 non-null  object 
dtypes: f

<h4 style=background-color:pink><i><--describe is for showing a table--></i></h4>

In [10]:
accidents.describe()

Unnamed: 0,Latitude,Longitude,Number_of_Casualties,Number_of_Vehicles
count,660654.0,660653.0,660679.0,660679.0
mean,52.553866,-1.43121,1.35704,1.831255
std,1.406922,1.38333,0.824847,0.715269
min,49.91443,-7.516225,1.0,1.0
25%,51.49069,-2.332291,1.0,1.0
50%,52.315641,-1.411667,1.0,2.0
75%,53.453452,-0.232869,1.0,2.0
max,60.757544,1.76201,68.0,32.0


<h3 style=background-color:pink>Showing Null values</h3>

In [11]:
accidents.isnull().sum()

Index                          0
Accident_Severity              0
Accident Date                  0
Latitude                      25
Light_Conditions               0
District Area                  0
Longitude                     26
Number_of_Casualties           0
Number_of_Vehicles             0
Road_Surface_Conditions      726
Road_Type                   4520
Urban_or_Rural_Area           15
Weather_Conditions         14128
Vehicle_Type                   0
dtype: int64

<h4 style=background-color:pink><i><--setting a value for null--></i></h4>

In [12]:
accidents['Latitude'] = accidents['Latitude'].fillna(accidents['Latitude'].mode()[0])
accidents['Longitude'] = accidents['Longitude'].fillna(accidents['Longitude'].mode()[0])
accidents['Road_Surface_Conditions'] = accidents['Road_Surface_Conditions'].fillna(accidents['Road_Surface_Conditions'].mode()[0])
accidents['Road_Type'] = accidents['Road_Type'].fillna('unknown')
accidents['Urban_or_Rural_Area'] = accidents['Urban_or_Rural_Area'].fillna(accidents['Urban_or_Rural_Area'].mode()[0])
accidents['Weather_Conditions'] = accidents['Weather_Conditions'].fillna('unnacounted')

accidents.isnull().sum()

Index                      0
Accident_Severity          0
Accident Date              0
Latitude                   0
Light_Conditions           0
District Area              0
Longitude                  0
Number_of_Casualties       0
Number_of_Vehicles         0
Road_Surface_Conditions    0
Road_Type                  0
Urban_or_Rural_Area        0
Weather_Conditions         0
Vehicle_Type               0
dtype: int64

<h3 style=background-color:pink>Showing DataTypes</h3>

In [13]:
accidents.dtypes

Index                       object
Accident_Severity           object
Accident Date               object
Latitude                   float64
Light_Conditions            object
District Area               object
Longitude                  float64
Number_of_Casualties         int64
Number_of_Vehicles           int64
Road_Surface_Conditions     object
Road_Type                   object
Urban_or_Rural_Area         object
Weather_Conditions          object
Vehicle_Type                object
dtype: object

<h4 style=background-color:pink><i><--changing each DataTypes--></i></h4>

In [14]:
accidents['Accident_Severity'] = accidents['Accident_Severity'].astype('category')
accidents['Light_Conditions'] = accidents['Light_Conditions'].astype('category')
accidents['District Area'] = accidents['District Area'].astype('category')
accidents['Road_Surface_Conditions'] = accidents['Road_Surface_Conditions'].astype('category')
accidents['Road_Type'] = accidents['Road_Type'].astype('category')
accidents['Urban_or_Rural_Area'] = accidents['Urban_or_Rural_Area'].astype('category')
accidents['Weather_Conditions'] = accidents['Weather_Conditions'].astype('category')
accidents['Vehicle_Type'] = accidents['Vehicle_Type'].astype('category')

#setting a DataTypes for date to a datetime
accidents['Accident Date'] = pd.to_datetime(accidents['Accident Date'], dayfirst = True, errors = 'coerce')


accidents.dtypes


Index                              object
Accident_Severity                category
Accident Date              datetime64[ns]
Latitude                          float64
Light_Conditions                 category
District Area                    category
Longitude                         float64
Number_of_Casualties                int64
Number_of_Vehicles                  int64
Road_Surface_Conditions          category
Road_Type                        category
Urban_or_Rural_Area              category
Weather_Conditions               category
Vehicle_Type                     category
dtype: object

<h3 style=background-color:pink>Setting a</h3>

In [15]:
accidents['Year'] = accidents['Accident Date'].dt.year
accidents['Month'] = accidents['Accident Date'].dt.month
accidents['Day'] = accidents['Accident Date'].dt.day
accidents['DayOfWeek'] = accidents['Accident Date'].dt.dayofweek

accidents.isnull().sum()

Index                           0
Accident_Severity               0
Accident Date              395672
Latitude                        0
Light_Conditions                0
District Area                   0
Longitude                       0
Number_of_Casualties            0
Number_of_Vehicles              0
Road_Surface_Conditions         0
Road_Type                       0
Urban_or_Rural_Area             0
Weather_Conditions              0
Vehicle_Type                    0
Year                       395672
Month                      395672
Day                        395672
DayOfWeek                  395672
dtype: int64

In [16]:
y2019 = accidents[accidents['Year'] == 2019] #accessing all 2019 data recorded

<h1 style=background-color:lightblue>[#1] Is it possible that next year, most accidents record can be fatal?</h1>

In [17]:
y2019['Index'].count()

np.int64(71867)

In [18]:
y2019['Accident_Severity'].value_counts()

Accident_Severity
Slight     60928
Serious     9823
Fatal       1116
Name: count, dtype: int64

In [19]:
y_2019 = y2019[y2019['Accident_Severity'] == 'Fatal'].count() / y2019['Accident_Severity'].count()
y_2019 * 100

Index                      1.552868
Accident_Severity          1.552868
Accident Date              1.552868
Latitude                   1.552868
Light_Conditions           1.552868
District Area              1.552868
Longitude                  1.552868
Number_of_Casualties       1.552868
Number_of_Vehicles         1.552868
Road_Surface_Conditions    1.552868
Road_Type                  1.552868
Urban_or_Rural_Area        1.552868
Weather_Conditions         1.552868
Vehicle_Type               1.552868
Year                       1.552868
Month                      1.552868
Day                        1.552868
DayOfWeek                  1.552868
dtype: float64

<h1 style=background-color:lightgreen>[Insight #1] since the percentage of 2019 accidents record is very low. It is impossible that next year most recorded accidents can be fatal</h1>

<h1 style=background-color:lightblue>[#2] Did the Fine weather influence the accident that happened in 2019?</h1>

In [20]:
y2019[y2019['Weather_Conditions'] == 'Fine no high winds'].count()

Index                      58345
Accident_Severity          58345
Accident Date              58345
Latitude                   58345
Light_Conditions           58345
District Area              58345
Longitude                  58345
Number_of_Casualties       58345
Number_of_Vehicles         58345
Road_Surface_Conditions    58345
Road_Type                  58345
Urban_or_Rural_Area        58345
Weather_Conditions         58345
Vehicle_Type               58345
Year                       58345
Month                      58345
Day                        58345
DayOfWeek                  58345
dtype: int64

In [21]:
y_2019 = y2019[y2019['Weather_Conditions'] == 'Fine no high winds'].count() / y2019['Accident_Severity'].count()
y_2019 * 100

Index                      81.184688
Accident_Severity          81.184688
Accident Date              81.184688
Latitude                   81.184688
Light_Conditions           81.184688
District Area              81.184688
Longitude                  81.184688
Number_of_Casualties       81.184688
Number_of_Vehicles         81.184688
Road_Surface_Conditions    81.184688
Road_Type                  81.184688
Urban_or_Rural_Area        81.184688
Weather_Conditions         81.184688
Vehicle_Type               81.184688
Year                       81.184688
Month                      81.184688
Day                        81.184688
DayOfWeek                  81.184688
dtype: float64

<h1 style=background-color:lightgreen>[Insight #2] Yes. Fine weather actually has a big influenced in an accident happened in wayback 2019. It means that the most accident happened during a nice weather</h1>

<h1 style=background-color:lightblue>[#3] Is there a possibility that being in a dark place, can caused a slight accident?  </h1>

In [22]:
accidents['Accident_Severity'].count()

np.int64(660679)

In [23]:
accidents['Light_Conditions'].value_counts()

Light_Conditions
Daylight                       484880
Darkness - lights lit          129335
Darkness - no lighting          37437
Darkness - lighting unknown      6484
Darkness - lights unlit          2543
Name: count, dtype: int64

<h3 style=background-color:pink>Darkness</h3>

In [24]:
Darkness = ['Darkness - lights lit', 'Darkness - no lighting', 'Darkness - lighting unknown', 'Darkness - lights unlit']

In [25]:
dark_accident = accidents[accidents['Light_Conditions'].isin(Darkness)].count() / accidents['Accident_Severity'].count()
dark_accident['Accident_Severity'] * 100

np.float64(26.60883727195809)

<h3 style=background-color:pink>Daylight</h3>

In [26]:
day_accident = accidents[accidents['Light_Conditions'] == 'Daylight'].count() / accidents['Accident_Severity'].count()
day_accident['Accident_Severity'] * 100

np.float64(73.3911627280419)

<h1 style=background-color:lightgreen>[Insight #3] There is a chance that accident can happen, but not as high as the chance with the daylight </h1>

<h1 style=background-color:lightblue>[#4] What year and weather has the most accident recorded with fatal injury  </h1>

In [27]:
acdnt_year = np.round(accidents[accidents["Accident_Severity"] == 'Fatal'].groupby(['Year', 'Weather_Conditions']).size())
acdnt_year.unstack()

Weather_Conditions,Fine + high winds,Fine no high winds,Fog or mist,Other,Raining + high winds,Raining no high winds,Snowing + high winds,Snowing no high winds,unnacounted
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2019.0,31,942,14,11,17,92,0,1,8
2020.0,32,740,11,18,20,104,1,2,14
2021.0,14,651,8,19,15,97,0,7,12
2022.0,6,522,5,18,6,38,0,13,15


<h1 style=background-color:lightgreen>[Insight #4] 2019 has the highest record of accident happened with fatal injury while the weather is normal/calm; total of 942 record </h1>

<h1 style=background-color:lightgreen>[Insight #4.1]While the lowest accident recorded with fatal injury shows in year 2019 and 2020 in Snowing</h1>

<h1 style=background-color:lightblue>[#5] Which of the Month and Year has the most record of accident with fatal injury </h1>



In [28]:
m_2019 = np.round(accidents.groupby(['Year' , 'Month', 'Accident_Severity']).size())
m_2019.unstack()

Unnamed: 0_level_0,Accident_Severity,Fatal,Serious,Slight
Year,Month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019.0,1.0,81,699,4538
2019.0,2.0,83,750,5340
2019.0,3.0,102,775,5026
2019.0,4.0,91,811,4276
2019.0,5.0,96,803,5366
2019.0,6.0,73,865,4902
2019.0,7.0,86,821,5173
2019.0,8.0,105,886,5004
2019.0,9.0,101,858,4990
2019.0,10.0,86,831,5312


<h1 style=background-color:lightgreen>[Insight #5] Most accident record happened in December 2019 with a total of 115 </h1>

<h1 style=background-color:lightblue>[#6] What kind of road type and places does the motorcycle most got involve in an accident based on the record?</h1>


In [29]:
accidents['Vehicle_Type'].value_counts()

Vehicle_Type
Car                                      497992
Van / Goods 3.5 tonnes mgw or under       34160
Bus or coach (17 or more pass seats)      25878
Motorcycle over 500cc                     25657
Goods 7.5 tonnes mgw and over             17307
Motorcycle 125cc and under                15269
Taxi/Private hire car                     13294
Motorcycle over 125cc and up to 500cc      7656
Motorcycle 50cc and under                  7603
Goods over 3.5t. and under 7.5t            6096
Other vehicle                              5637
Minibus (8 - 16 passenger seats)           1976
Agricultural vehicle                       1947
Pedal cycle                                 197
Data missing or out of range                  6
Ridden horse                                  4
Name: count, dtype: int64

In [30]:
motor_types = ['Motorcycle over 500cc', 'Motorcycle 125cc and under', 'Motorcycle over 125cc and up to 500cc', 'Motorcycle 50cc and under']

In [31]:
m_2019 = np.round(accidents[accidents['Vehicle_Type'].isin(motor_types)].groupby(['Road_Type', 'Urban_or_Rural_Area']).size())
m_2019.unstack()

Urban_or_Rural_Area,Rural,Unallocated,Urban
Road_Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Dual carriageway,3971,0,4482
One way street,86,0,1007
Roundabout,1247,0,2486
Single carriageway,13600,0,28359
Slip road,360,0,258
unknown,100,0,229


<h1 style=background-color:lightgreen>[Insight #6] Most motorcycle accident record happened in Urban with Single Carriageway Roadtype </h1>

<h1 style=background-color:lightblue>[#7] Which weather condition did the car got the highest number fatal injury</h1>


In [32]:
accidents['Weather_Conditions'].value_counts()

Weather_Conditions
Fine no high winds       520885
Raining no high winds     79696
Other                     17150
unnacounted               14128
Raining + high winds       9615
Fine + high winds          8554
Snowing no high winds      6238
Fog or mist                3528
Snowing + high winds        885
Name: count, dtype: int64

In [33]:
weather_rain = ['Raining no high winds', 'Raining + high winds']
weather_fine = ['Fine no high winds', 'Fine + high winds']

<h3 style=background-color:pink>Raining</h3>

In [34]:
rain = np.round(accidents[accidents['Weather_Conditions'].isin(weather_rain)].groupby(['Vehicle_Type','Accident_Severity']).size())
rain.unstack()

Accident_Severity,Fatal,Serious,Slight
Vehicle_Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Agricultural vehicle,1,49,231
Bus or coach (17 or more pass seats),42,430,3099
Car,751,8060,58335
Data missing or out of range,0,0,0
Goods 7.5 tonnes mgw and over,27,274,2088
Goods over 3.5t. and under 7.5t,2,94,716
Minibus (8 - 16 passenger seats),2,38,232
Motorcycle 125cc and under,21,266,1763
Motorcycle 50cc and under,10,116,878
Motorcycle over 125cc and up to 500cc,10,109,891


<h3 style=background-color:pink>Fine Weather</h3>

In [35]:
fine = np.round(accidents[accidents['Weather_Conditions'].isin(weather_fine)].groupby(['Vehicle_Type','Accident_Severity']).size())
fine.unstack()

Accident_Severity,Fatal,Serious,Slight
Vehicle_Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Agricultural vehicle,19,218,1296
Bus or coach (17 or more pass seats),270,2794,17684
Car,5518,55275,338338
Data missing or out of range,0,0,5
Goods 7.5 tonnes mgw and over,178,1935,11727
Goods over 3.5t. and under 7.5t,59,717,4141
Minibus (8 - 16 passenger seats),26,213,1347
Motorcycle 125cc and under,162,1668,10414
Motorcycle 50cc and under,84,841,5198
Motorcycle over 125cc and up to 500cc,92,851,5213


<h1 style=background-color:lightgreen>[Insight #7] Most Fatal Accident in Car Type happened when the weather is fine </h1>

<h1 style=background-color:lightblue>[#8]Does the wet road surface condition has an impact with the fatal injury</h1>


In [36]:
accidents['Index'].count()

np.int64(660679)

In [37]:
accidents['Road_Surface_Conditions'].value_counts()

Road_Surface_Conditions
Dry                     448547
Wet or damp             186708
Frost or ice             18517
Snow                      5890
Flood over 3cm. deep      1017
Name: count, dtype: int64

In [38]:
wet = accidents[accidents['Road_Surface_Conditions'] == 'Wet or damp'].count()
wet['Road_Surface_Conditions']

np.int64(186708)

In [30]:
fatal = accidents[accidents['Accident_Severity'] == 'Fatal'].count()
fatal['Accident_Severity']

np.int64(8661)

In [46]:
r_a = accidents[accidents['Road_Surface_Conditions'] == 'Wet or damp'].count() / accidents[accidents['Accident_Severity'] == 'Fatal'].count()
total = r_a / 660679
total * 100

Index                      0.003263
Accident_Severity          0.003263
Accident Date              0.003205
Latitude                   0.003263
Light_Conditions           0.003263
District Area              0.003263
Longitude                  0.003263
Number_of_Casualties       0.003263
Number_of_Vehicles         0.003263
Road_Surface_Conditions    0.003263
Road_Type                  0.003263
Urban_or_Rural_Area        0.003263
Weather_Conditions         0.003263
Vehicle_Type               0.003263
Year                       0.003205
Month                      0.003205
Day                        0.003205
DayOfWeek                  0.003205
dtype: float64

<h1 style=background-color:lightgreen>[Insight #8] </h1>

<h1 style=background-color:lightblue>[#9]What is the average number of incident happened per month?</h1>


In [None]:
accidents['Month'].mean()

In [None]:
months= accidents.groupby('Month').size()
months

In [None]:
accidents_per_month = monthly_accidents.mean()
accidents_per_month

<h1 style=background-color:lightgreen>[Insight #9] 22083.9 Is the average accident per month</h1>

<h1 style=background-color:lightblue>[#10]What district has the most record of accident in a year?</h1>

In [None]:
accidents['District Area'].value_counts()

In [None]:
area_year = np.round(accidents[accidents['District Area'] == 'Birmingham'].groupby(['Year']).size())
area_year

<h1 style=background-color:lightgreen>[Insight #10] District area name Birmingham has the most record of accident happen in year 2019 with 1554 records</h1>

<h1 style=background-color:lightblue>[#11] Is it possible that the fatal injury might happened in that district?</h1>

In [None]:
accidents['District Area'].value_counts()

In [None]:
accidents['Accident_Severity'].value_counts()

In [None]:
birminghan_fatal = accidents[(accidents['Accident_Severity'] == 'Fatal') & (accidents['District Area'] == 'Birmingham')]
birminghan_fatal['Accident_Severity'].count()

In [None]:
b_f = birminghan_fatal['Accident_Severity'].count() / accidents[accidents['Accident_Severity'] == 'Fatal'].count()
b_f['Accident_Severity'] * 100

<h1 style=background-color:lightgreen>[Insight #11] There are a little chance that an accident with fatal injury might happened in the district named birminghan </h1>

<h1 style=background-color:lightblue>[#12]Correlation of Number of vehicle and Number of casualties </h1>

In [7]:
accidents['Number_of_Vehicles'].corr(accidents['Number_of_Casualties'])

np.float64(0.22888886126927627)

<h1 style=background-color:lightgreen>[Insight #12] </h1>

<h1 style=background-color:lightblue>[#13]What is the lowest accident_severity in the records?</h1>

In [66]:
accidents['Accident_Severity'].value_counts()

Accident_Severity
Slight     563801
Serious     88217
Fatal        8661
Name: count, dtype: int64

<h1 style=background-color:lightgreen>[Insight #13] Fatal is the lowest accident severity recorded</h1>

<h1 style=background-color:lightblue>[#14]What is a type of vehicle that has the highest accident record?</h1>

In [77]:
accidents['Vehicle_Type'].value_counts()

Vehicle_Type
Car                                      497992
Van / Goods 3.5 tonnes mgw or under       34160
Bus or coach (17 or more pass seats)      25878
Motorcycle over 500cc                     25657
Goods 7.5 tonnes mgw and over             17307
Motorcycle 125cc and under                15269
Taxi/Private hire car                     13294
Motorcycle over 125cc and up to 500cc      7656
Motorcycle 50cc and under                  7603
Goods over 3.5t. and under 7.5t            6096
Other vehicle                              5637
Minibus (8 - 16 passenger seats)           1976
Agricultural vehicle                       1947
Pedal cycle                                 197
Data missing or out of range                  6
Ridden horse                                  4
Name: count, dtype: int64

<h1 style=background-color:lightgreen>[Insight #14] Car is the most vehicle type recorded</h1>

<h1 style=background-color:lightblue>[#15]What is a type of vehicle that has the highest accident in a single cariageway roadtype that is more involved in fatal accidents?</h1>

In [64]:
acdnt_year = np.round(accidents[accidents['Accident_Severity'] == 'Fatal'].groupby(['Vehicle_Type','Road_Type']).size())
acdnt_year.unstack()

Road_Type,Dual carriageway,One way street,Roundabout,Single carriageway,Slip road,unknown
Vehicle_Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agricultural vehicle,6,1,0,14,0,0
Bus or coach (17 or more pass seats),63,2,6,249,3,2
Car,1364,69,109,4973,33,29
Data missing or out of range,0,0,0,0,0,0
Goods 7.5 tonnes mgw and over,49,3,4,158,2,0
Goods over 3.5t. and under 7.5t,13,4,0,48,1,1
Minibus (8 - 16 passenger seats),5,0,0,24,0,0
Motorcycle 125cc and under,33,2,3,150,1,0
Motorcycle 50cc and under,23,4,0,68,0,0
Motorcycle over 125cc and up to 500cc,22,0,5,78,0,0


<h1 style=background-color:lightgreen>[Insight #15] Car is the most vehicle type involve in fatal accidents</h1>

<h1 style=background-color:lightblue>[#16]what is the percentage of agricultural vehicle that involve in an accident in an urban area?</h1>

In [74]:
accidents[accidents['Vehicle_Type'] == 'Agricultural vehicle']['Index'].count()

np.int64(1947)

In [None]:
accidents['Vehicle_Type']

In [None]:
accidents[accidents['Vehicle_Type'] == 'Agricultural vehicle']

In [78]:
agri = np.round(accidents[accidents['Vehicle_Type'] == 'Agricultural vehicle'].groupby(['Urban_or_Rural_Area']).size())
agri

Urban_or_Rural_Area
Rural           675
Unallocated       0
Urban          1272
dtype: int64

In [79]:
agri_urban = 1272 / 1947
agri_urban * 100

65.33127889060091

<h1 style=background-color:lightgreen>[Insight #16] 65% of an accident involving the agricultural vehicle happened in urban area</h1>

<h1 style=background-color:lightblue>[#17]What record of Fatal acccidenthas the highest record? Urban or Rural</h1>

In [84]:
acdnt_year = np.round(accidents.groupby(['Accident_Severity','Urban_or_Rural_Area']).size())
acdnt_year.unstack()

Urban_or_Rural_Area,Rural,Unallocated,Urban
Accident_Severity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Fatal,5601,0,3060
Serious,37312,1,50904
Slight,196077,10,367714


<h1 style=background-color:lightgreen>[Insight #17] This insight shows that Most fatal accident is recorded from rural area</h1>

<h1 style=background-color:lightblue>[#18]What month does the Motorcycle has a highest record accident urban area?</h1>

In [87]:
motor_types = ['Motorcycle over 500cc', 'Motorcycle 125cc and under', 'Motorcycle over 125cc and up to 500cc', 'Motorcycle 50cc and under']

In [89]:
motor = np.round(accidents[accidents['Vehicle_Type'].isin(motor_types)].groupby(['Urban_or_Rural_Area','Month']).size())
motor.unstack()

Month,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0
Urban_or_Rural_Area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Rural,546,732,636,589,587,641,691,671,624,690,647,773
Unallocated,0,0,0,0,0,0,0,0,0,0,0,0
Urban,999,1223,1282,1100,1219,1201,1235,1166,1259,1386,1391,1347


<h1 style=background-color:lightgreen>[Insight #18] Month of november is a month where a motorcycle has a highest record</h1>

<h1 style=background-color:lightblue>[#19]What district has the largest number of recorded accident?</h1>

In [8]:
accidents['District Area'].value_counts()

District Area
Birmingham            13491
Leeds                  8898
Manchester             6720
Bradford               6212
Sheffield              5710
                      ...  
Berwick-upon-Tweed      153
Teesdale                142
Shetland Islands        133
Orkney Islands          117
Clackmannanshire         91
Name: count, Length: 422, dtype: int64

<h1 style=background-color:lightgreen>[Insight #19] Birmingham has the highest number of accidents recorded</h1>