# Montgomery drivers

In [1]:
import pandas as pd

In [9]:
drivers_raw = pd.read_csv("../data/montgomery_drivers.csv")

  drivers_raw = pd.read_csv("../data/montgomery_drivers.csv")


In [16]:
def describe(column):
    return column.value_counts(dropna=False)

In [11]:
def prepare_data(data):
    data = data[['Report Number', 'Vehicle ID', 'Driver At Fault', 'Injury Severity', 'Driver Substance Abuse', 'Driver Distracted By', 'Vehicle Body Type', 'Vehicle Movement',
                 'Vehicle Going Dir', 'Speed Limit', 'Parked Vehicle', 'Vehicle Year', 'Vehicle Make', 'Vehicle Model']]
    data.columns = ['ReportNumber', 'VehicleID', 'DriverAtFault', 'InjurySeverity', 'DriverSubstanceAbuse', 'DriverDistractedBy', 'VehicleType', 'VehicleMovement',
                 'VehicleGoingDir', 'SpeedLimit', 'ParkedVehicle', 'VehicleYear', 'VehicleMake', 'VehicleModel']
    data = data.astype(str)
    data['VehicleYear'] = data['VehicleYear'].astype(int)
    return data

In [23]:
def change_to_unknown(string):
    return 'UNKNOWN' if (string.lower() == 'unknown' or string == 'nan' or string == '') else string

In [71]:
def handle_nans(data):
    data = data.copy()
    data['SpeedLimit'] = data['SpeedLimit'].fillna(0)
    data['VehicleYear'] = data['VehicleYear'].fillna(0)
    # str columns changed to unknown
    columns_to_unknown = ['DriverSubstanceAbuse', 'DriverDistractedBy', 'VehicleType', 'VehicleMovement', 'VehicleGoingDir', 'VehicleMake', 'VehicleModel']
    for col in columns_to_unknown:
        data[col] = data[col].apply(change_to_unknown)
    return data

In [38]:
def clean_substance_abuse(substance):
    substance = substance.lower().replace('present', '').replace('contributed', '').replace('detected', '').strip()
    if 'combin' in substance:
        return 'COMBINATION'
    else:
        return substance.upper()

In [52]:
def map_vehicle_type(vehicle):
    vehicle = vehicle.lower()

    def in_vehicle(types):
        return any([car_type in vehicle for car_type in types])
    # passenger car
    if in_vehicle(['passenger', 'utility', 'pickup', 'van', 'wagon', 'limousine']) and 'over' not in vehicle:
        return 'PASSENGER'
    elif in_vehicle(['emergency']):
        return 'EMERGENCY'
    elif in_vehicle(['motorcycle', 'moped']):
        return 'MOTORCYCLE'
    elif in_vehicle(['bus']):
        return 'BUS'
    elif in_vehicle(['truck']):
        return 'TRUCK'
    elif in_vehicle(['unknown']):
        return 'UNKNOWN'
    else:
        return 'OTHER'

In [75]:
from datetime import datetime

In [77]:
datetime.now().year

2024

In [78]:
def transform_columns(data):
    data = data.copy()
    data['DriverAtFault'] = data['DriverAtFault'].apply(lambda x: True if x == 'Yes' else False)
    # boolean if substance contributed
    data['SubstanceAbuseContributed'] = data['DriverSubstanceAbuse'].apply(lambda x: True if 'contributed' in x.lower() else False)
    # clean substance
    data['DriverSubstanceAbuse'] = data['DriverSubstanceAbuse'].apply(clean_substance_abuse)    
    # map vehicle types
    data['VehicleType'] = data['VehicleType'].apply(map_vehicle_type)
    # boolean
    data['ParkedVehicle'] = data['ParkedVehicle'].apply(lambda x: True if x == 'Yes' else False)
    # delete impossible year values
    data['VehicleYear'] = data['VehicleYear'].apply(lambda x: 0 if (x < 1900 or x > (datetime.now().year+1)) else x)
    return data

In [82]:
drivers_prep = prepare_data(drivers_raw)
drivers_nonan = handle_nans(drivers_prep)
drivers = transform_columns(drivers_nonan)

In [83]:
drivers

Unnamed: 0,ReportNumber,VehicleID,DriverAtFault,InjurySeverity,DriverSubstanceAbuse,DriverDistractedBy,VehicleType,VehicleMovement,VehicleGoingDir,SpeedLimit,ParkedVehicle,VehicleYear,VehicleMake,VehicleModel,SubstanceAbuseContributed
0,MCP3170003V,4E492574-893B-4EB1-ADCA-53FDD633D6C4,True,NO APPARENT INJURY,NONE,LOOKED BUT DID NOT SEE,PASSENGER,MOVING CONSTANT SPEED,South,0,False,2017,LEXUS,SUV,False
1,MCP3254003K,6D16232C-4E1E-49A6-B3A2-7FDEF7E506F2,False,NO APPARENT INJURY,NONE,NOT DISTRACTED,PASSENGER,MOVING CONSTANT SPEED,West,35,False,2010,TOYT,PRIUS,False
2,EJ7887003Q,82ED056D-33FA-44A5-AA2B-8DCE653FA03C,False,SUSPECTED MINOR INJURY,NONE,NOT DISTRACTED,PASSENGER,MOVING CONSTANT SPEED,South,35,False,2021,SUBARU,FORRESTER,False
3,MCP2674004J,3CD099CA-F5FF-4174-B184-02BCF2C89934,False,NO APPARENT INJURY,NONE,NOT DISTRACTED,EMERGENCY,MOVING CONSTANT SPEED,South,40,False,2019,DODGE,CHARGER,False
4,MCP25280008,64940511-7ACF-4F66-9A1B-5F476A46C2A5,True,NO APPARENT INJURY,NONE,NOT DISTRACTED,PASSENGER,MOVING CONSTANT SPEED,North,20,False,2014,NISSAN,ROGUE,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
172100,DM8338000C,81DAD2F7-8D4A-4316-B40D-FAD4A4587B28,False,NO APPARENT INJURY,UNKNOWN,NOT DISTRACTED,EMERGENCY,SLOWING OR STOPPING,East,25,False,2016,FORD,EXPLORER,False
172101,MCP1182001S,DF427BFB-94E2-454E-9C46-82C8E286B992,True,NO APPARENT INJURY,UNKNOWN,LOOKED BUT DID NOT SEE,PASSENGER,SLOWING OR STOPPING,East,35,False,2008,NISSAN,ALTIMA,False
172102,MCP1453008X,CD875BDC-5B10-47F6-B3E7-7C091A71FB5D,False,NO APPARENT INJURY,NONE,NOT DISTRACTED,PASSENGER,SLOWING OR STOPPING,North,40,False,2008,CHEVROLET,EXPRESS,False
172103,MCP2568000M,15D3D66B-8337-4E7A-BAE6-855C02AE1CA7,False,NO APPARENT INJURY,NONE,NOT DISTRACTED,PASSENGER,MAKING LEFT TURN,South,35,False,2018,FORD,TRANSIT VAN,False
