### Risk index/safety index/metric ideation

- (fatalities + major injuries + minor injuries) / total passengers?
- mean of the above numbers for every make and model?
- risk index number with weights for fatal_fraction, serious_fraction, minor_fraction

In [2]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns

In [3]:
df = pd.read_csv('data_cleaned_final.csv')

In [5]:
# Reorder columns
df = df.reindex(columns=['Date', 'Year', 'Month', 'Location', 'Country', 'Injury_Severity', 'Damage_Type', 'Aircraft_Category', 'Make', 'Model', 'Engines', 'Engine_Type', 'Fatal_Injuries', 'Serious_Injuries', 'Minor_Injuries', 'Uninjured', 'Total_Passengers', 'Weather'])

In [7]:
# Creating columns for the fraction of each injury type relative to total passengers
df['Fatal_Injuries_Fraction'] = df['Fatal_Injuries'] / df['Total_Passengers']
df['Serious_Injuries_Fraction'] = df['Serious_Injuries'] / df['Total_Passengers']
df['Minor_Injuries_Fraction'] = df['Minor_Injuries'] / df['Total_Passengers']
df['Uninjured_Fraction'] = df['Uninjured'] / df['Total_Passengers']

In [13]:
w1 = 0.6
w2 = 0.3
w3 = 0.1

df['Risk_Index'] = (w1 * df['Fatal_Injuries_Fraction']) + (w2 * df['Serious_Injuries_Fraction']) + (w3 * df['Minor_Injuries_Fraction'])

In [22]:
sizewise = [
    df['Total_Passengers'] < 100,
    (df['Total_Passengers'] >= 100) & (df['Total_Passengers'] <= 249),
    df['Total_Passengers'] >= 250
]

choices = ['Small', 'Medium', 'Large']

df['Plane_Size'] = np.select(sizewise, choices, default='unknown')


In [23]:
df['Plane_Size'].value_counts()

Plane_Size
Small     37251
Medium      167
Large        43
Name: count, dtype: int64

In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37461 entries, 0 to 37460
Data columns (total 23 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Date                       37461 non-null  object 
 1   Year                       37461 non-null  int64  
 2   Month                      37461 non-null  int64  
 3   Location                   37459 non-null  object 
 4   Country                    37461 non-null  object 
 5   Injury_Severity            37461 non-null  object 
 6   Damage_Type                37461 non-null  object 
 7   Aircraft_Category          37461 non-null  object 
 8   Make                       37461 non-null  object 
 9   Model                      37461 non-null  object 
 10  Engines                    37461 non-null  float64
 11  Engine_Type                36420 non-null  object 
 12  Fatal_Injuries             37461 non-null  float64
 13  Serious_Injuries           37461 non-null  flo

In [36]:
df['Model'].value_counts()

Model
152          1252
172          1102
172N          775
PA 28 140     597
172M          539
             ... 
R172 E          1
S1 11B          1
B 58            1
A188 B          1
PA42            1
Name: count, Length: 2489, dtype: int64

In [40]:
# Creating Mean Risk Index for each plane model
model_mean_risk_index = df.groupby('Model')['Risk_Index'].mean()
# Add column mean_risk_index to df
df = df.join(model_mean_risk_index, on='Model', rsuffix='_mean')

In [39]:
# Total number of accidents for each model
model_total_accidents = df['Model'].value_counts()

In [38]:
type(model_mean_risk_index)

pandas.core.series.Series

In [43]:
# Finding the safest planes
df_safe_planes = df.loc[(df['Risk_Index_mean'] == 0) & (df['Damage_Type'] == 'Minor')]

In [44]:
df_safe_planes.info()

<class 'pandas.core.frame.DataFrame'>
Index: 33 entries, 790 to 36770
Data columns (total 25 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Date                       33 non-null     object 
 1   Year                       33 non-null     int64  
 2   Month                      33 non-null     int64  
 3   Location                   33 non-null     object 
 4   Country                    33 non-null     object 
 5   Injury_Severity            33 non-null     object 
 6   Damage_Type                33 non-null     object 
 7   Aircraft_Category          33 non-null     object 
 8   Make                       33 non-null     object 
 9   Model                      33 non-null     object 
 10  Engines                    33 non-null     float64
 11  Engine_Type                32 non-null     object 
 12  Fatal_Injuries             33 non-null     float64
 13  Serious_Injuries           33 non-null     float64
 

In [47]:
df_safe_planes['Plane_Size'].value_counts()

Plane_Size
Small     17
Medium     8
Large      8
Name: count, dtype: int64

In [48]:
df_safe_planes.loc[df_safe_planes['Plane_Size'] == 'Small']

Unnamed: 0,Date,Year,Month,Location,Country,Injury_Severity,Damage_Type,Aircraft_Category,Make,Model,...,Uninjured,Total_Passengers,Weather,Fatal_Injuries_Fraction,Serious_Injuries_Fraction,Minor_Injuries_Fraction,Uninjured_Fraction,Risk_Index,Plane_Size,Risk_Index_mean
800,1990-06-17,1990,6,"TAKU, AK",United States,Non-Fatal,Minor,Airplane,cessna,206U,...,10.0,10.0,VMC,0.0,0.0,0.0,1.0,0.0,Small,0.0
1631,1990-10-29,1990,10,"COLUMBUS, OH",United States,Non-Fatal,Minor,Airplane,mcdonnell douglas,DC 9 31,...,49.0,49.0,VMC,0.0,0.0,0.0,1.0,0.0,Small,0.0
3440,1991-10-24,1991,10,"DAYTONA BEACH, FL",United States,Non-Fatal,Minor,Airplane,piper,PA 28 R 180,...,3.0,3.0,VMC,0.0,0.0,0.0,1.0,0.0,Small,0.0
6293,1993-07-28,1993,7,"FOND DU LAC, WI",United States,Non-Fatal,Minor,Airplane,north american,F 51,...,5.0,5.0,VMC,0.0,0.0,0.0,1.0,0.0,Small,0.0
7902,1994-07-31,1994,7,"READINGTON, NJ",United States,Non-Fatal,Minor,Airplane,boeing,PT 13,...,4.0,4.0,VMC,0.0,0.0,0.0,1.0,0.0,Small,0.0
9447,1995-07-28,1995,7,"DALLAS, TX",United States,Non-Fatal,Minor,Airplane,boeing,737 3A4,...,99.0,99.0,VMC,0.0,0.0,0.0,1.0,0.0,Small,0.0
9475,1995-08-01,1995,8,"GLENDALE, AZ",United States,Non-Fatal,Minor,Airplane,north american,AT 6B,...,3.0,3.0,VMC,0.0,0.0,0.0,1.0,0.0,Small,0.0
10305,1996-02-20,1996,2,"FAIRBANKS, AK",United States,Non-Fatal,Minor,Airplane,boeing,757 232,...,91.0,91.0,VMC,0.0,0.0,0.0,1.0,0.0,Small,0.0
10424,1996-03-23,1996,3,"NASHVILLE, TN",United States,Non-Fatal,Minor,Airplane,piper,PA 28 201,...,5.0,5.0,VMC,0.0,0.0,0.0,1.0,0.0,Small,0.0
15871,1999-10-17,1999,10,"JEAN, NV",United States,Non-Fatal,Minor,Airplane,waco,UPF7,...,5.0,5.0,VMC,0.0,0.0,0.0,1.0,0.0,Small,0.0


In [49]:
df_safe_planes.loc[df_safe_planes['Plane_Size'] == 'Small']['Make'].value_counts()

Make
boeing               6
piper                3
cessna               2
north american       2
waco                 2
mcdonnell douglas    1
gulfstream           1
Name: count, dtype: int64

In [33]:
df_safe_planes.loc[df_safe_planes['Plane_Size'] == 'Small'][100:]

Unnamed: 0,Date,Year,Month,Location,Country,Injury_Severity,Damage_Type,Aircraft_Category,Make,Model,...,Minor_Injuries,Uninjured,Total_Passengers,Weather,Fatal_Injuries_Fraction,Serious_Injuries_Fraction,Minor_Injuries_Fraction,Uninjured_Fraction,Risk_Index,Plane_Size
32382,2015-11-15,2015,11,"Greeley, CO",United States,Non-Fatal,Minor,Airplane,piper,J3C,...,0.0,2.0,2.0,VMC,0.0,0.0,0.0,1.0,0.0,Small
32500,2016-02-03,2016,2,"San Diego, CA",United States,Non-Fatal,Minor,Airplane,cessna,172S,...,0.0,4.0,4.0,VMC,0.0,0.0,0.0,1.0,0.0,Small
32961,2016-08-13,2016,8,"Fort Lauderdale, FL",United States,Non-Fatal,Minor,Airplane,piper,PA 34,...,0.0,3.0,3.0,VMC,0.0,0.0,0.0,1.0,0.0,Small
33510,2017-06-08,2017,6,"Harrisonville, MO",United States,Non-Fatal,Minor,Airplane,piper,PA 28 235,...,0.0,1.0,1.0,VMC,0.0,0.0,0.0,1.0,0.0,Small
33606,2017-07-08,2017,7,"San Jose, CA",United States,Non-Fatal,Minor,Airplane,cessna,R182,...,0.0,7.0,7.0,VMC,0.0,0.0,0.0,1.0,0.0,Small
34869,2019-03-21,2019,3,"Hollywood, FL",United States,Non-Fatal,Minor,Airplane,cessna,172,...,0.0,3.0,3.0,VMC,0.0,0.0,0.0,1.0,0.0,Small
35186,2019-07-27,2019,7,"Gaithersburg, MD",United States,Non-Fatal,Minor,Airplane,cessna,172,...,0.0,3.0,3.0,VMC,0.0,0.0,0.0,1.0,0.0,Small
35390,2019-10-22,2019,10,"Lamesa, TX",United States,Non-Fatal,Minor,Airplane,air tractor,AT 502,...,0.0,1.0,1.0,VMC,0.0,0.0,0.0,1.0,0.0,Small
35407,2019-10-31,2019,10,"Honolulu, HI",United States,Non-Fatal,Minor,Airplane,cessna,208,...,0.0,3.0,3.0,VMC,0.0,0.0,0.0,1.0,0.0,Small
35492,2020-01-08,2020,1,"Palm Coast, FL",United States,Non-Fatal,Minor,Airplane,cessna,172,...,0.0,2.0,2.0,VMC,0.0,0.0,0.0,1.0,0.0,Small


In [50]:
df_safe_planes.to_csv('safest_planes.csv', index=False)