In [6]:
import urllib.request
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import os
from tqdm.notebook import tqdm
from numpy import random
sns.set_style('darkgrid')
matplotlib.rcParams['font.size'] = 18
matplotlib.rcParams['figure.figsize'] = (18, 10)
matplotlib.rcParams['figure.facecolor'] = '#00000000'

In [7]:
ref_url = 'https://www.bst-tsb.gc.ca/eng/stats/aviation/data-5.html'

data_dictionary_url = 'https://www.bst-tsb.gc.ca/eng/stats/aviation/csv/ASISdb-dd.csv'

occurence_table_url = 'https://www.bst-tsb.gc.ca/includes/stats/csv/Air/ASISdb_MDOTW_VW_OCCURRENCE_PUBLIC.csv'

aircraft_table_url = 'https://www.bst-tsb.gc.ca/includes/stats/csv/Air/ASISdb_MDOTW_VW_AIRCRAFT_PUBLIC.csv'

injuries_table_url = 'https://www.bst-tsb.gc.ca/includes/stats/csv/Air/ASISdb_MDOTW_VW_INJURIES_PUBLIC.csv'

events_phases_table_url = 'https://www.bst-tsb.gc.ca/includes/stats/csv/Air/ASISdb_MDOTW_VW_EVENTS_AND_PHASES_PUBLIC.csv'

survivability_table_url = 'https://www.bst-tsb.gc.ca/includes/stats/csv/Air/ASISdb_MDOTW_VW_SURVIVABILITY_PUBLIC.csv'


In [8]:
if 'data_dict.csv' not in os.listdir():
    urllib.request.urlretrieve(data_dictionary_url, 'data_dict.csv')
if 'occurence.csv' not in os.listdir():
    urllib.request.urlretrieve(occurence_table_url, 'occurence.csv')
if 'aircraft.csv' not in os.listdir():
    urllib.request.urlretrieve(aircraft_table_url, 'aircraft.csv')
if 'injuries.csv' not in os.listdir():
    urllib.request.urlretrieve(injuries_table_url, 'injuries.csv')
if 'events_phases.csv' not in os.listdir():
    urllib.request.urlretrieve(events_phases_table_url, 'events_phases.csv')
if 'survivability.csv' not in os.listdir():
    urllib.request.urlretrieve(survivability_table_url, 'survivability.csv')


In [40]:
import warnings
warnings.filterwarnings("ignore")
data_dict_df = pd.read_csv('data_dict.csv',encoding = 'ISO-8859-1')
occurence_df = pd.read_csv('occurence.csv',encoding = 'ISO-8859-1')
aircraft_df = pd.read_csv('aircraft.csv',encoding = 'ISO-8859-1')
injuries_df = pd.read_csv('injuries.csv', encoding='ISO-8859-1')
events_phases_df = pd.read_csv('events_phases.csv', encoding='ISO-8859-1')
survivability_df = pd.read_csv('survivability.csv', encoding='ISO-8859-1')

In [44]:
occurence_sel_columns = ['OccNo', 'AirportID_CountryID_DisplayEng', 'AirportID_ProvinceID_DisplayEng', 'Location', 'Latitude', 'Longitude', 'ICAO','CountryID_DisplayEng',
'ICAO_DisplayEng', 'OccDate', 'OccIncidentTypeID_DisplayEng', 'OccRegionID_DisplayEng', 'OccTime', 'OccTypeID_DisplayEng','ReportedByID_DisplayEng',
'SeriousIncidentEnum_DisplayEng', 'TimeZoneID_DisplayEng', 'TotalFatalCount','TotalMinorCount','TotalNoneCount','TotalSeriousCount',
'TotalUnknownCount', 'InjuriesEnum_DisplayEng', 'LightCondID_DisplayEng','SkyCondID_DisplayEng', 'InflightIcingCondEnum_DisplayEng',
'GeneralWeatherID_DisplayEng', 'Temperature', 'VisibilyCeiling', 'Visibility', 'WeatherPhenomenaTypeID_DisplayEng', 'TakeOffLandingTypeID_DisplayEng',
'RunwaySurfaceID_DisplayEng', 'SurfaceContaminationID_DisplayEng']
aircraft_sel_columns = ['OccNo', 'RegistrationNo', 'AircraftTypeID_DisplayEng', 'AircraftModelID_DisplayEng', 'AircraftMakeID_DisplayEng',
'RegCountryID_DisplayEng', 'OrganizationID_DisplayEng', 'OperatorTypeID_DisplayEng', 'LandingGearID_DisplayEng',
'WakeTurbulenceCategoryID_DisplayEng', 'YearOfManuf', 'NumberOfEngine', 'AmateurBuildIND', 'DamageLevelID_DisplayEng','WeightWithinLimitsEnum_DisplayEng', 'DestAirportID_AirportName', 'DepartAirportID_AirportName', 'FlightPlanTypeID_DisplayEng',
'OperationTypeID_DisplayEng', 'FlightNo', 'DangerousGoodEnum_DisplayEng', 'ICAODepart', 'ICAODestination', 'FlightPhaseID_DisplayEng',
'AtsTypeID_DisplayEng', 'AtsServiceID_DisplayEng', 'AtsUnitName', 'TrafficAdvisoryTypeID_DisplayEng', 'EvasiveActionByATSEnum_DisplayEng',
'EvasiveActionByPilotEnum_DisplayEng', 'OtherAircraftSightedEnum_DisplayEng', 'AtsUnitInvolvedEnum_DisplayEng']
injuries_sel_columns = ['OccNo', 'InjuryCategoryID_DisplayEng', 'FatalCount', 'SeriousCount', 'MinorCount', 'NoneCount', 'Unknown_MissingCount']
events_phases_sel_columns = ['OccNo', 'EventID_DisplayEng', 'FullEventDescEng', 'PhaseID_DisplayEng', 'FullPhaseDescEng']
survivability_sel_columns = ['OccNo', 'SurvivableEnum_DisplayEng', 'AircraftEvacTime', 'EvacEgressIssueEnum_DisplayEng', 'UlbID_DisplayEng', 'FlightFollowingSystemID_DisplayEng', 'ELTInstalledEnum_DisplayEng', 'ELTSignalReceivedEnum_DisplayEng', 
'ELTStatusID_DisplayEng', 'SurvEquiID_DisplayEng', 'EquipEffReasonID_DisplayEng', 'EquipEffectiveEnum_DisplayEng','EquipInfluenceEnum_DisplayEng', 'EvacHamperedID_DisplayEng']


- **OccNo** : The unique occurrence number for general reference. <br>
- **AirportID_CountryID_DisplayEng** : The country in which the airport is located, in English. <br>
- **AirportID_ProvinceID_DisplayEng** : The province or the state in which the airport is located, in Canada or the United States respectively, in English.  <br>
- **Location** : The location of the occurrence <br>
- **Latitude** : Latitude of the occurrence or airport location <br>
- **Longitude** : Longitude of the occurrence or airport location <br>
- **ICAO** : The International Civil Aviation Organization (ICAO) 4-letter airport code. <br>
- **CountryID_DisplayEng** : The country of the occurrence, in English. <br>
- **ICAO_DisplayEng** : The International Civil Aviation Organization (ICAO) occurrence category, in English.
For one occurrence, multiple ICAO categories may be assigned, that will generate multiple entries/rows. <br>
- **OccDate** : The occurrence date. Date format is YYYY-MM-DD. <br>
- **OccIncidentTypeID_DisplayEng** : If the occurrence is a reportable incident, the type of incident as defined in the Transportation Safety Board Regulations, Part 1, Subparagraph 2(1)(b), in English. <br>
- **OccRegionID_DisplayEng** : The region of the occurrence, as defined by the geographical area each regional office has been assigned, in English. <br>
- **OccTime** : The time the occurrence happened. Time format is hh:mm (24-hour clock). <br>
- **OccTypeID_DisplayEng** : A description of the occurrence type (accident/incident reportable), in English. <br>
- **ReportedByID_DisplayEng** : The name of the organization or entity that reported the occurrence, in English. <br>
- **SeriousIncidentEnum_DisplayEng** : Indicates whether the occurrence was a serious incident as defined by ICAO in Annex 13, in English. Indicator: Yes/No. <br>
- **TimeZoneID_DisplayEng** : The time zone used for reporting the time of occurrence, in English. <br>
- **TotalFatalCount** : The total number of fatalities (includes any ground fatalities). <br>
- **TotalMinorCount** : The total number of minor injuries (includes any ground injuries). <br>
- **TotalNoneCount** : The total number of people not injured (aircraft occupants only). <br>
- **TotalSeriousCount** : The total number of serious injuries (includes any ground injuries). <br>
- **TotalUnknownCount** : The total number of people whose status is unknown (aircraft occupants only). <br>
- **InjuriesEnum_DisplayEng** : Indicates whether there were any injuries related to the occurrence, including ground injuries, in English. <br>
- **LightCondID_DisplayEng** : A description of the light conditions, in English. <br>
- **SkyCondID_DisplayEng** : The sky conditions at the time of the occurrence, in English. <br>
- **InflightIcingCondEnum_DisplayEng** : Indicates whether in-flight icing conditions existed, if known, in English. <br>
- **GeneralWeatherID_DisplayEng** : Indicates whether the known weather conditions were conducive to visual or instrument flight rules, in English. <br>
- **Temperature** : The temperature at the time of the occurrence, in degrees Celsius by default. <br>
- **VisibilyCeiling** : The visibility ceiling, in feet. <br>
- **Visibility** : The visibility, in statute miles. <br>
- **WeatherPhenomenaTypeID_DisplayEng** : The type of weather phenomena at the time of the occurrence, in English.
Weather phenomena grid - Multiple weather phenomenas can be specified for each occurrence (OccID), each type and its associated description will appear as a separate entry. <br>
- **TakeOffLandingTypeID_DisplayEng** : The type of takeoff and landing area associated with the occurrence, in English. <br>
- **RunwaySurfaceID_DisplayEng** : The texture of the surface of the runway involved in the occurrence, in English. <br>
- **SurfaceContaminationID_DisplayEng** : The type of surface contamination, if relevant, in English.
Note: Each description will result in a separate entry. <br>
- **OccNo** : The unique occurrence number for general reference. <br>
- **RegistrationNo** : The registration number of the occurrence aircraft. <br>
- **AircraftTypeID_DisplayEng** : The aircraft type as defined in the Canadian Aviation Regulations, Part 1, Subpart 1. <br>
- **AircraftModelID_DisplayEng** : The aircraft model, in English. <br>
- **AircraftMakeID_DisplayEng** : The aircraft make, in English. <br>
- **RegCountryID_DisplayEng** : The country of registration of the occurrence aircraft, in English. <br>
- **OrganizationID_DisplayEng** : The name of the organization (if the operator is an organization), in English. <br>
- **OperatorTypeID_DisplayEng** : The type of operator (private, commercial, state) involved in the occurrence, in English. <br>
- **LandingGearID_DisplayEng** : The type of landing gear on the aircraft, in English. <br>
- **WakeTurbulenceCategoryID_DisplayEng** : The wake turbulence category of the occurrence aircraft, as defined by the Canadian Aviation Regulations, Standard 821 -Canadian Domestic Air Traffic Control Separation, in English. <br>
- **YearOfManuf** : The year in which the aircraft was manufactured. <br>
- **NumberOfEngine** : The number of engines installed on the aircraft. <br>
- **AmateurBuildIND** : Indicates whether the aircraft was amateur-built. <br>
- **DamageLevelID_DisplayEng** : The aircraft level of damage as defined by ICAO, in English. <br>
- **WeightWithinLimitsEnum_DisplayEng** : Indicates whether the actual aircraft weight was within limits at the time of the occurrence, in English. Indicator: Yes/No/Unknown <br>
- **DestAirportID_AirportName** : The name of the destination airport, if applicable.  <br>
- **DepartAirportID_AirportName** : The name of the departure airport, if applicable. <br>
- **FlightPlanTypeID_DisplayEng** : The type of flight plan, in English. <br>
- **OperationTypeID_DisplayEng** : The type of operation performed at the time of the occurrence, in English. <br>
- **FlightNo** : Flight Number of the Specific Aircraft Involved (AcID) <br>
- **DangerousGoodEnum_DisplayEng** : Indicates whether there were dangerous goods on board the occurrence aircraft, if known, in English. <br>
- **ICAODepart** : The International Civil Aviation Organization (ICAO) identification for the airport, aerodrome, or heliport of the intended takeoff. <br>
- **ICAODestination** : The International Civil Aviation Organization (ICAO) identification for the airport, aerodrome, or heliport of the intended landing. <br>
- **FlightPhaseID_DisplayEng** : The flight phase when fire or smoke is initiated, in English. <br>
- **AtsTypeID_DisplayEng** : The air traffic service (ATS) type description, in English. <br>
- **AtsServiceID_DisplayEng** : The air traffic service (ATS) provided in Canada or by an international equivalent, in English. <br>
- **AtsUnitName** : The air traffic service (ATS) unit name. <br>
- **TrafficAdvisoryTypeID_DisplayEng** : The type of traffic advisory issued at the time of the occurrence, in English. <br>
- **EvasiveActionByATSEnum_DisplayEng** : Indicates whether evasive action was instructed by the air traffic service (ATS), if known, in English. <br>
- **EvasiveActionByPilotEnum_DisplayEng** : Indicates whether evasive action was taken by the pilot, if known, in English. <br>
- **OtherAircraftSightedEnum_DisplayEng** : Indicates whether the other aircraft was sighted by the specific aircraft involved (Yes/No/Wrong aircraft), in English. <br>
- **AtsUnitInvolvedEnum_DisplayEng** : Indicates whether an air traffic service (ATS) unit was involved, in English. <br>
- **OccNo** : The unique occurrence number for general reference. <br>
- **InjuryCategoryID_DisplayEng** : A description of the injury category (crew/passengers/ground), in English.
Note: Injury count for crew/passengers/ground are stored in separate entries. <br>
- **FatalCount** : The number of fatalities for the associated occurrence/occurrence aircraft. <br>
- **SeriousCount** : The number of serious injuries associated to the occurrence or occurrence aircraft. <br>
- **MinorCount** : The number of minor injuries for associated occurrence/occurrence aircraft. <br>
- **NoneCount** : The number of non injuries for the associated occurrence/occurrence aircraft. <br>
- **Unknown_MissingCount** : The number of people who are missing or whose location is unknown for the associated occurrence/occurrence aircraft. <br>
- **OccNo** : The unique occurrence number for general reference. <br>
- **EventID_DisplayEng** : A description of the event, in English. <br>
- **FullEventDescEng** : An algorithm-generated text string describing the full event, in English. <br>
- **PhaseID_DisplayEng** : The phase of the flight, in English.
Note: Multiple phases can be assigned to each occurrence (OccID) or specific aircraft (AcID) and each will have an associated EventID. <br>
- **FullPhaseDescEng** : An algorithm-generated text string describing the phase in detail, in English. <br>
- **OccNo** : The unique occurrence number for general reference. <br>
- **SurvivableEnum_DisplayEng** : Indicates whether the occurrence was survivable (for occurrences involving an impact), in English. Indicator: Yes/No/Unknown. <br>
- **AircraftEvacTime** : The duration of the aircraft evacuation, in minutes. <br>
- **EvacEgressIssueEnum_DisplayEng** : Indicates whether there were evacuation egress issues, if known, in English. <br>
- **UlbID_DisplayEng** : Indicates whether the underwater locator beacon was installed, operated, and/or relevant to the occurrence, in English. <br>
- **FlightFollowingSystemID_DisplayEng** : Indicates whether a flight following system (a type of onboard aircraft position tracking system) was installed, and whether it provided any assistance to locating the aircraft (English) <br>
- **ELTInstalledEnum_DisplayEng** : Indicates whether an emergency locator transmitter (ELT) was installed on the occurrence aircraft, if known, in English. <br>
- **ELTSignalReceivedEnum_DisplayEng** : Indicates whether an ELT signal was received, if known, in English. <br>
- **ELTStatusID_DisplayEng** : A description of the ELT status, in English. <br>
- **SurvEquiID_DisplayEng** : The survival equipment available on the occurrence aircraft, if relevant, in English.
Equipment grid - Multiple survival equipments can be assigned to an occurrence (OccID). Each equipment will result in a separate entry. <br>
- **EquipEffReasonID_DisplayEng** : The reason for survival equipment effectiveness, in English. <br>
- **EquipEffectiveEnum_DisplayEng** : Indicates whether equipment was effective and corresponds to the survival equipment, if known, in English. <br>
- **EquipInfluenceEnum_DisplayEng** : Indicates whether the equipment influenced the survivability of the occurrence, if known, in English. <br>
- **EvacHamperedID_DisplayEng** : The reason(s) why the evacuation was hampered, if applicable, in English.