# Set up initial data paths

In [1]:
## Setup 
# import modules
import pandas as pd
import os
from datetime import datetime
import arcpy
import numpy as np
import urllib
import Custom_Functions as cf
ca_crash_input_file = 'RawData/CA_Raw_Data.csv'
nv_crash_input_file = 'RawData/NV_Raw_Data.csv'
# setup workspace folder
workspace = "F:/gis/PROJECTS/ResearchAnalysis/Monitoring/Data/Crash/"

# setup environment variables
arcpy.env.overwriteOutput = True
arcpy.env.workspace = "F:/GIS/PROJECTS/ResearchAnalysis/Monitoring/Data/Crash/CrashData/CrashData.gdb"

# create a spatial reference object for the output coordinate system 
# output projection for data going into SDE should be UTM Zone 10N (EPSG: 26910)
out_coordinate_system = arcpy.SpatialReference(26910)

# network path to connection files
filePath = "F:/GIS/DB_CONNECT"

# database file path 
sdeBase  = os.path.join(filePath, "Vector.sde")

# SDE feature classes needed for spatial joins
corridor = os.path.join(sdeBase, 'sde.SDE.Transportation\sde.SDE.Corridor')
trpa     = os.path.join(sdeBase, 'sde.SDE.Jurisdictions\sde.SDE.TRPA_bdy')

# define csv lat/long field names for xy table to point
x_coords = 'POINT_X'
y_coords = 'POINT_Y'

## Import raw data and sde highway collisions

In [None]:
# SDE feature class to update
crashSDE  = os.path.join(sdeBase, 'sde.SDE.Transportation\sde.SDE.Highway_Collisions')

# Get Crash Data
caCrashes = os.path.join(workspace, ca_crash_input_file)
dfCACrash_raw = pd.read_csv(caCrashes)

nvCrashes = os.path.join(workspace, nv_crash_input_file)
dfNVCrash_raw = pd.read_csv(nvCrashes)
    
# # in memory files
memory = "memory" + "\\"

## CA data transformation

In [28]:
#Import California lookup dictionaries
#All lookup values are in a single csv that gets imported and then filtered on state and fieldname
#There are more efficient ways to do this but this makes for the most flexibility
value_lookup = 'LookupLists/FieldLookups.csv'
ca_crash_severity_lookup = cf.import_lookup_dictionary(value_lookup,'key','value','State','CA','FieldName','Crash_Severity')
ca_crash_type_lookup = cf.import_lookup_dictionary(value_lookup,'key','value','State','CA','FieldName','Crash_Type')
ca_lighting_lookup = cf.import_lookup_dictionary(value_lookup,'key','value','State','CA','FieldName','Lighting')
ca_weather_lookup = cf.import_lookup_dictionary(value_lookup,'key','value','State','CA','FieldName','Weather')
ca_violation_lookup = cf.import_lookup_dictionary(value_lookup,'key','value','State','CA','FieldName','Violation')
ca_road_surface_lookup = cf.import_lookup_dictionary(value_lookup,'key','value','State','CA','FieldName','Road_Surface')
ca_road_con_lookup = cf.import_lookup_dictionary(value_lookup,'key','value','State','CA','FieldName','Road_Condition')
ca_ped_action_lookup = cf.import_lookup_dictionary(value_lookup,'key','value','State','CA','FieldName','Pedestrian_Action')
ca_hit_run_lookup = cf.import_lookup_dictionary(value_lookup,'key','value','State','CA','FieldName','Hit_and_Run')
ca_mviw_lookup = cf.import_lookup_dictionary(value_lookup,'key','value','State','CA','FieldName','Motor_Vehicle_Interacted_With')

In [29]:

## CA data translation
#This list will be used to rename the columns from the raw data
#Future improvement - turn this into a csv?
ca_column_mapping = {'COLLISION_SEVERITY' : 'Crash_Severity',
'TYPE_OF_COLLISION' : 'Crash_Type',
'LIGHTING' : 'Lighting',
'WEATHER_1' : 'Weather_1',
'WEATHER_2' : 'Weather_2',
'PCF_VIOL_CATEGORY' : 'Violation',
'ROAD_SURFACE' : 'Road_Surface',
'ROAD_COND_1' : 'Road_Condition_1',
'ROAD_COND_2' : 'Road_Condition_2',
'PED_ACTION' : 'Pedestrian_Action',
'HIT_AND_RUN' : 'Hit_and_Run',
'MVIW' : 'Motor_Vehicle_Interacted_With',
'ACCIDENT_YEAR' : 'Year',
'ALCOHOL_INVOLVED' : 'Alcohol_Involved',
'BICYCLE_ACCIDENT' : 'Bicycle_Involved',
'CASE_ID' : 'CA_Case_ID',
'CITY' : 'City',
'COLLISION_DATE' : 'Date',
'COLLISION_TIME' : '4DigTime',
'COUNT_BICYCLIST_INJURED' : 'Num_Bicyclist_Injured',
'COUNT_BICYCLIST_KILLED' : 'Num_Bicyclist_Killed',
'COUNT_MC_INJURED' : 'Num_Motorcyclist_Injured',
'COUNT_MC_KILLED' : 'Num_Motorcyclist_Killed',
'COUNT_PED_INJURED' : 'Num_Ped_Injured',
'COUNT_PED_KILLED' : 'Num_Ped_Killed',
'COUNTY' : 'County',
'Hour' : 'Time',
'MOTORCYCLE_ACCIDENT' : 'Motorcycle_Involved',
'NUMBER_INJURED' : 'Num_Injured',
'NUMBER_KILLED' : 'Num_Killed',
'PARTY_COUNT' : 'Num_Parties',
'PEDESTRIAN_ACCIDENT' : 'Pedestrian_Involved',
'COLLISION_TIME':'COLLISION_TIME',
'POINT_X':'POINT_X',
'POINT_Y':'POINT_Y'
}

#Any columns not in the column mapping will be deleted
dfCACrash=cf.renamecolumns(dfCACrash_raw,ca_column_mapping,True)


In [30]:

#Add new columns with hard coded values
dfCACrash['State']       = "CA"
dfCACrash['4DigTime']    = dfCACrash['COLLISION_TIME'].astype(str).str.zfill(4)
dfCACrash['Hour']        = dfCACrash['4DigTime'].str[:2]
dfCACrash['Min']         = dfCACrash['4DigTime'].str[2:]
dfCACrash['Time']        = dfCACrash['Hour']+":"+dfCACrash['Min']
dfCACrash['Data_Source'] = "CHP/SWITRS"
#convert case ID
dfCACrash['NV_Accident_Num']       = np.nan
dfCACrash['NV_Accident_Rec_Num']   = np.nan
dfCACrash['Num_Vehicles']          = np.nan
dfCACrash['Corridor_ID']           = np.nan

#Update fields from lookup dictionaries
#Imporvement - lookup dictionary that loops through everything? Might hurt readablity
dfCACrash['Crash_Severity']=dfCACrash['Crash_Severity'].astype(str)
dfCACrash['Crash_Severity'] = dfCACrash['Crash_Severity'].map(ca_crash_severity_lookup)
dfCACrash['Crash_Type'] = dfCACrash['Crash_Type'].map(ca_crash_type_lookup)
dfCACrash['Lighting'] = dfCACrash['Lighting'].map(ca_lighting_lookup)
dfCACrash['Weather_1'] = dfCACrash['Weather_1'].map(ca_weather_lookup)
dfCACrash['Weather_2']=dfCACrash['Weather_2'].map(ca_weather_lookup)
dfCACrash['Violation'] = dfCACrash['Violation'].map(ca_violation_lookup)
dfCACrash['Road_Surface'] = dfCACrash['Road_Surface'].map(ca_road_surface_lookup)
dfCACrash['Road_Condition_1']=dfCACrash['Road_Condition_1'].map(ca_road_con_lookup)
dfCACrash['Road_Condition_2']=dfCACrash['Road_Condition_2'].map(ca_road_con_lookup)
dfCACrash['Pedestrian_Action']=dfCACrash['Pedestrian_Action'].map(ca_ped_action_lookup)
dfCACrash['Hit_and_Run'] = dfCACrash['Hit_and_Run'].map(ca_hit_run_lookup)
dfCACrash['Motor_Vehicle_Interacted_With'] = dfCACrash['Motor_Vehicle_Interacted_With'].map(ca_mviw_lookup)
def populate_all_involved(row):
    field_value = ''
    if row['Pedestrian_Involved']=='Y':
        field_value+='Pedestrian, '
    if row['Bicycle_Involved']=='Y':
        field_value+='Bicycle, '
    if row['Motorcycle_Involved']=='Y':
        field_value+='Motorcycle, '
    return field_value[:-2] if field_value else ''
dfCACrash['All_Involved'] = dfCACrash.apply(populate_all_involved,axis=1)
#Specify which fields to keep
dfCACrash = dfCACrash[['State',
           'CA_Case_ID',
           'NV_Accident_Num',
           'NV_Accident_Rec_Num',
           'Corridor_ID',
           'County',
           'City',
           'Year',
           'Date',
           'Time',
           'Weather_1',
           'Weather_2',
           'Crash_Severity',
           'Num_Killed',
           'Num_Injured',
           'Num_Ped_Killed',
           'Num_Ped_Injured',
           'Num_Bicyclist_Killed',
           'Num_Bicyclist_Injured',
           'Num_Motorcyclist_Killed',
           'Num_Motorcyclist_Injured',
           'Crash_Type',
           'Num_Vehicles',
           'Num_Parties',
           'Violation',
           'Hit_and_Run',
           'Motor_Vehicle_Interacted_With',
           'Pedestrian_Action', 
           'Road_Condition_1',
           'Road_Condition_2',
           'Road_Surface',
           'Lighting',
           'Pedestrian_Involved',
           'Bicycle_Involved',
           'Motorcycle_Involved',
           'Alcohol_Involved',
           'Data_Source',
           'All_Involved',
           'POINT_X',
           'POINT_Y']].copy()

## NV data transformation

In [31]:

nv_road_surface_lookup = cf.import_lookup_dictionary(value_lookup,'key','value','State','NV','FieldName','Road_Surface')
nv_crash_type_lookup = cf.import_lookup_dictionary(value_lookup,'key','value','State','NV','FieldName','Crash_Type')
nv_lighting_lookup = cf.import_lookup_dictionary(value_lookup,'key','value','State','NV','FieldName','Lighting')
nv_road_condition_lookup = cf.import_lookup_dictionary(value_lookup,'key','value','State','NV','FieldName','Road_Condition_1')


In [32]:
## NV Data Transformation
# set fields for time and case info
nv_column_mapping = {
    'NV Accident Num' : 'NV_Accident_Num',
'NV Accident Rec Num' : 'NV_Accident_Rec_Num',
'Collision_Year' : 'Year',
'Collision_Date' : 'Date',
'Collision_Time' : 'Time',
'Total Vehicles' : 'Num_Vehicles',
'Fatalities' : 'Num_Killed',
'Injured' : 'Num_Injured',
'COLLISION_SEVERITY' : 'Crash_Severity',
'Crash Type' : 'Crash_Type',
'Factors Roadway' : 'Road_Surface',
'HWY Factors' : 'Road_Condition_1',
'X' : 'POINT_X',
'Y': 'POINT_Y',
'Weather': 'Weather_1',
'LIGHTING':'Lighting' 
}

dfNVCrash = cf.renamecolumns(dfNVCrash_raw,nv_column_mapping,False)

In [33]:


nv_motorcycle_list = ['MOTORCYCLE',"MOTORBIKE","MOPED"]

dfNVCrash['Num_Killed']   = dfNVCrash['Num_Killed'].fillna(0)
dfNVCrash['Num_Injured'] = dfNVCrash['Num_Injured'].fillna(0)

dfNVCrash['Num_Parties']          = np.nan
dfNVCrash['Data_Source']          = "NDOT"
dfNVCrash['City']                 = np.nan
dfNVCrash['CA_Case_ID']           = np.nan
dfNVCrash['Num_Ped_Killed'] = np.nan
dfNVCrash['Num_Ped_Injured'] = np.nan
dfNVCrash['Num_Bicyclist_Killed'] = np.nan
dfNVCrash['Num_Bicyclist_Injured'] = np.nan
dfNVCrash['Num_Motorcyclist_Killed'] = np.nan
dfNVCrash['Num_Motorcyclist_Injured'] = np.nan
dfNVCrash['Violation']  = "N/A"
dfNVCrash['Hit_and_Run'] = "N/A"
dfNVCrash['Motor_Vehicle_Interacted_With'] = "N/A"
dfNVCrash['Pedestrian_Action'] = "N/A"
dfNVCrash['Weather_2']   = np.nan
dfNVCrash['Corridor_ID'] = np.nan
dfNVCrash['Road_Condition_2'] = np.nan


# Convert NV crash type and severity
dfNVCrash['Crash_Type'] = dfNVCrash['Crash_Type'].map(nv_crash_type_lookup)
dfNVCrash['Lighting'] = dfNVCrash['Lighting'].map(nv_lighting_lookup)

#Process alcohol involvement (check outputs)
dfNVCrash['V1 Driver Factors'] = dfNVCrash['V1 Driver Factors'].fillna("Not stated")
dfNVCrash['V2 Driver Factors'] = dfNVCrash['V2 Driver Factors'].fillna("Not stated")
dfNVCrash.loc[dfNVCrash['V1 Driver Factors'].str.contains("DRINKING"), 'Alcohol_Involved'] = "Y"
dfNVCrash.loc[dfNVCrash['V2 Driver Factors'].str.contains("DRINKING"), 'Alcohol_Involved'] = "Y"
#Process bike/ped involvement (check outputs)
dfNVCrash['V1 All Events'] = dfNVCrash['V1 All Events'].fillna("Not stated")
dfNVCrash['V2 All Events'] = dfNVCrash['V2 All Events'].fillna("Not stated")
dfNVCrash.loc[dfNVCrash['V1 All Events'].str.contains("PEDESTRIAN"), 'Pedestrian_Involved'] = "Y"
dfNVCrash.loc[dfNVCrash['V2 All Events'].str.contains("PEDESTRIAN"), 'Pedestrian_Involved'] = "Y"
dfNVCrash.loc[dfNVCrash['V1 All Events'].str.contains("PEDAL CYCLE"), 'Bicycle_Involved'] = "Y"
dfNVCrash.loc[dfNVCrash['V2 All Events'].str.contains("PEDAL CYCLE"), 'Bicycle_Involved'] = "Y"
#Process motorcycle involvement (check outputs)
dfNVCrash['V1 Type'] = dfNVCrash['V1 Type'].fillna("Not stated")
dfNVCrash['V2 Type'] = dfNVCrash['V1 Type'].fillna("Not stated")
dfNVCrash.loc[dfNVCrash['V1 Type'].str.contains('|'.join(nv_motorcycle_list), case=False, na=False), 'Motorcycle_Involved'] = "Y"
dfNVCrash.loc[dfNVCrash['V2 Type'].str.contains('|'.join(nv_motorcycle_list), case=False, na=False), 'Motorcycle_Involved'] = "Y"

# #Convert road surface
dfNVCrash['Road_Surface']=dfNVCrash['Road_Surface'].fillna('Not stated')
dfNVCrash['Road_Condition_1']=dfNVCrash['Road_Condition_1'].fillna('Not stated')
dfNVCrash=cf.update_if_contains(dfNVCrash,'Road_Surface',nv_road_surface_lookup)
dfNVCrash=cf.update_if_contains(dfNVCrash,'Road_Condition_1',nv_road_condition_lookup)


dfNVCrash['All_Involved'] = dfNVCrash.apply(populate_all_involved,axis=1)
#Rename to match CA data


# final list of fields
dfNVCrash = dfNVCrash[['State',
           'CA_Case_ID',
           'NV_Accident_Num',
           'NV_Accident_Rec_Num',
           'Corridor_ID',
           'County',
           'City',
           'Year',
           'Date',
           'Time',
           'Weather_1',
           'Weather_2',
           'Crash_Severity',
           'Num_Killed',
           'Num_Injured',
           'Num_Ped_Killed',
           'Num_Ped_Injured',
           'Num_Bicyclist_Killed',
           'Num_Bicyclist_Injured',
           'Crash_Type',
           'Num_Vehicles',
           'Num_Parties',
           'Violation',
           'Hit_and_Run',
           'Motor_Vehicle_Interacted_With',
           'Pedestrian_Action', 
           'Road_Condition_1',
           'Road_Condition_2',
           'Road_Surface',
           'Lighting',
           'Pedestrian_Involved',
           'Bicycle_Involved',
           'Motorcycle_Involved',
           'Alcohol_Involved',
           'Data_Source',
           'All_Involved',
           'POINT_X',
           'POINT_Y']].copy()


In [None]:
# os.remove(os.path.join(workspace, "NV_Crash_New.csv" ))

# export dataframe to csv 
dfNVCrash.to_csv(os.path.join(workspace, "NV_Crash_New.csv" ))
# get NV CSV for XY Table TO Point
nvCSV = os.path.join(workspace, "NV_Crash_New.csv" )

# name the output feature class
nvFC  = 'NV_Crash_New_1'

def fieldJoinCalc(updateFC, updateFieldsList, sourceFC, sourceFieldsList):
    from time import strftime  
    print ("Started data transfer: " + strftime("%Y-%m-%d %H:%M:%S"))
#     log.info("Started data transfer: " + strftime("%Y-%m-%d %H:%M:%S"))
    # Use list comprehension to build a dictionary from arcpy SearchCursor  
    valueDict = {r[0]:(r[1:]) for r in arcpy.da.SearchCursor(sourceFC, sourceFieldsList)}  
   
    with arcpy.da.UpdateCursor(updateFC, updateFieldsList) as updateRows:  
        for updateRow in updateRows:  
            # store the Join value of the row being updated in a keyValue variable  
            keyValue = updateRow[0]  
            # verify that the keyValue is in the Dictionary  
            if keyValue in valueDict:  
                # transfer the value stored under the keyValue from the dictionary to the updated field.  
                updateRow[1] = valueDict[keyValue][0]  
                updateRows.updateRow(updateRow)    
    del valueDict  
    print ("Finished data transfer: " + strftime("%Y-%m-%d %H:%M:%S"))

# Nevada data frame to feature class 
# input data is in NAD 1983 UTM Zone 11N coordinate system
arcpy.management.XYTableToPoint(nvCSV, nvFC, 
                                x_coords, y_coords, "",
                                # set prjoection transform to from
                                arcpy.SpatialReference(26911))

# output data for project tool
output_NV_Crash_Project = "NV_Crash_Project_1"

# project from UTM to WGS
arcpy.Project_management(nvFC, output_NV_Crash_Project, out_coordinate_system)

# os.remove(os.path.join(workspace, "CA_Crash_New.csv"))
## CA Export
# export dataframe to csv 
dfCACrash.to_csv(os.path.join(workspace, "CA_Crash_New.csv" ))

# get NV CSV for XY Table TO Point
caCSV = os.path.join(workspace, "CA_Crash_New.csv" )

# name the output feature class
caFC     = 'CA_Crash_New_1'

# CA data frame to feature class
arcpy.management.XYTableToPoint(caCSV, caFC, 
                                x_coords, y_coords, "",
                                # set prjoection transform to from
                                arcpy.SpatialReference(4326))

# output data for project tool
output_CA_Crash_Project = "CA_Crash_Project_1" 

# project from UTM to WGS
arcpy.Project_management(caFC, output_CA_Crash_Project, out_coordinate_system)

## Merge CA and NV
# out merge fc
tahoeCrash = "Tahoe_Crash_1"

# input feature classes
caCrash = "CA_Crash_Project_1"
nvCrash = "NV_Crash_Project_1"

# Create FieldMappings object to manage merge output fields
fieldMappings = arcpy.FieldMappings()
# Add all fields from all parcel staging layers
fieldMappings.addTable(caCrash)
fieldMappings.addTable(nvCrash)

# Remove all output fields from the field mappings, except fields in field_master list
for field in fieldMappings.fields:
    if field.name not in [  'OBJECTID',
                            'State',
                            'CA_Case_ID',
                            'NV_Accident_Num',
                            'NV_Accident_Rec_Num',
                            'Corridor_ID',
                            'County',
                            'City',
                            'Year',
                            'Date',
                            'Time',
                            'Weather_1',
                            'Weather_2',
                            'Crash_Severity',
                            'Num_Killed',
                            'Num_Injured',
                            'Num_Ped_Killed',
                            'Num_Ped_Injured',
                            'Num_Bicyclist_Killed',
                            'Num_Bicyclist_Injured',
                            'Num_Motorcyclist_Killed',
                            'Num_Motorcyclist_Injured',
                            'Crash_Type',
                            'Num_Vehicles',
                            'Num_Parties',
                            'Violation',
                            'Hit_and_Run',
                            'Motor_Vehicle_Interacted_With',
                            'Pedestrian_Action', 
                            'Road_Condition_1',
                            'Road_Condition_2',
                            'Road_Surface',
                            'Lighting',
                            'Pedestrian_Involved',
                            'Bicycle_Involved',
                            'Alcohol_Involved',
                            'Motorcycle_Involved',
                            'Data_Source',
                            'POINT_X',
                            'POINT_Y',
                            'SHAPE@']:
        # remove everything else
        fieldMappings.removeFieldMap(fieldMappings.findFieldMapIndex(field.name)) 
    
# Use Merge tool to move features into single dataset
arcpy.management.Merge([caCrash, nvCrash], tahoeCrash, fieldMappings)
print("Crash Feature Classes Merged")

# ## Spatial Join of Corridor IDs
# in memory points to be used for spatial join results
corridorPoints = memory + 'CrashPoint_Corridor'
# Spatial Join
arcpy.SpatialJoin_analysis(tahoeCrash, corridor, corridorPoints, 
                           "JOIN_ONE_TO_ONE", "KEEP_ALL", "", "HAVE_THEIR_CENTER_IN", "", "")

# use function to transfer spatial join results to crash stagin layer
fieldJoinCalc(tahoeCrash, ['OBJECTID', 'Corridor_ID'], corridorPoints, ['OBJECTID','CORRIDOR_NAME'])
print("Finished updating staging layer")


tempLayer = "deleteLayers"

# Run MakeFeatureLayer
arcpy.management.MakeFeatureLayer(tahoeCrash, tempLayer)
 
arcpy.management.SelectLayerByLocation(tempLayer, "have_their_center_in", 
                                       trpa,
                                       search_distance="", 
                                       selection_type="NEW_SELECTION", 
                                       invert_spatial_relationship="INVERT")
 
# Run GetCount and if some features have been selected, then 
#  run DeleteFeatures to remove the selected features.
if int(arcpy.management.GetCount(tempLayer)[0]) > 0:
    arcpy.management.DeleteFeatures(tempLayer)
print("features deleted")

# outfc = 
# Update SDE - Truncate Append
# updateSDE(tahoeCrash, outfc, fieldnames)

## NV 2021 Data

In [2]:

from arcgis import GeoAccessor

arcpy.env.overwriteOutput = True
# arcpy.env.workspace = "F:/GIS/PROJECTS/ResearchAnalysis/Monitoring/Data/Crash/2021 Crash Data/NV/Statewide Data 2021.gdb"

# raw_crash_featureclass = "Crash_Data_2021"
# person_table = "Person_Table_2021"
# vehicle_table = "Vehicle_Table_2021"
# updated_crash_featureclass = 'Crash_Data_2021_Updated'

arcpy.env.workspace = "F:/GIS/PROJECTS/ResearchAnalysis/Monitoring/Data/Crash/2022_Crash_Data/5yr 2019-2023 geodatabase/5YR 2019-2023.gdb"

raw_crash_featureclass = "Export_Output_5yrstatewide"
person_table = "person_table"
vehicle_table = "vehicletable"
updated_crash_featureclass = 'Crash_Data_2022_Updated'


# Create a new feature class based on the existing one
arcpy.management.CopyFeatures(raw_crash_featureclass, updated_crash_featureclass)

# Add x and y fields to the new feature class
arcpy.management.AddField(updated_crash_featureclass, 'POINT_X', 'DOUBLE')
arcpy.management.AddField(updated_crash_featureclass, 'POINT_Y', 'DOUBLE')

# Use an UpdateCursor to calculate x and y coordinates
with arcpy.da.UpdateCursor(updated_crash_featureclass, ['SHAPE@X', 'SHAPE@Y', 'POINT_X', 'POINT_Y']) as cursor:
    for row in cursor:
        x, y = row[0], row[1]
        row[2] = x
        row[3] = y
        cursor.updateRow(row)

def import_table_from_fgb(tablename):
    data = []

    # Use SearchCursor to iterate through the feature class
    fields = [field.name for field in arcpy.ListFields(tablename)]
    with arcpy.da.SearchCursor(tablename, fields) as cursor:
        for row in cursor:
            data.append(row)
    # Convert the list of tuples to a Pandas DataFrame
    df = pd.DataFrame(data, columns=fields)
    return df

person_df = import_table_from_fgb(person_table)
vehicle_df = import_table_from_fgb(vehicle_table)

crash_sdf = GeoAccessor.from_featureclass(updated_crash_featureclass)


## Map and rename columns for dfs from a dictionary

In [3]:
crash_column_mapping = {
   'CrashDate' : 'Date',
'CrashTime' : 'Time',
'Weather' : 'Weather_1',
'NumFatalities' : 'Num_Killed',
'NumInjured' : 'Num_Injured',
'VehCrashType' : 'Crash_Type',
'NumVehicles' : 'Num_Vehicles',
'RoadSurface' : 'Road_Surface',
'LightCondition' : 'Lighting',
'PedestrianInvolved' : 'Pedestrian_Involved',
'CrashNum' : 'NV_Accident_Num',
'CrashSeverity': 'Crash_Severity',
'aNo' : 'NV_Accident_Rec_Num',
'County' : 'County',
'RoadEnvironmentalFactors':'Road_Condition_1',
'SHAPE':'SHAPE',
'POINT_X':'POINT_X',
'POINT_Y':'POINT_Y'
}

crash_sdf_clean = cf.renamecolumns(crash_sdf,crash_column_mapping,True)

In [4]:
#Import Lookup Lists
crash_type_lookup_nv = cf.import_lookup_dictionary('LookupLists/CrashType_Lookup.csv','key','value','State','NV','FieldName','Crash_Type')
lighting_lookup_nv = cf.import_lookup_dictionary('LookupLists/Lighting_Lookup.csv','key','value','State','NV','FieldName','Lighting')
road_conditions_lookup_nv  = cf.import_lookup_dictionary('LookupLists/RoadConditions_Lookup.csv','key','value','State','NV','FieldName','Road_Conditions')
road_surface_lookup_nv = cf.import_lookup_dictionary('LookupLists/RoadSurface_Lookup.csv', 'key','value','State','NV','FieldName','Road_Surface')
weather_lookup_nv = cf.import_lookup_dictionary('LookupLists/Weather_Lookup.csv','key','value','State','NV','FieldName','Weather')
crash_lookup_nv = cf.import_lookup_dictionary('LookupLists/CrashSeverity_Lookup.csv','key','value','State','NV','FieldName','Crash_Severity')


In [5]:
#crash_sdf_clean['Lighting'] = crash_sdf_clean['Lighting'].replace(lighting_lookup_nv, value="Not stated")
#crash_sdf_clean['Road_Condition_2']= crash_sdf_clean['Road_Condition_1']
crash_sdf_clean = cf.update_if_contains(crash_sdf_clean,'Road_Condition_1',road_conditions_lookup_nv)
#Not really sure what's going on with this field - road surface in Raw data is Asphalt etc but road surface in crash data is dry, wet etc
crash_sdf_clean['Road_Surface'] = crash_sdf_clean['Road_Condition_1']
crash_sdf_clean =cf.update_if_contains(crash_sdf_clean,'Weather_1', weather_lookup_nv)
crash_sdf_clean['Crash_Severity']=crash_sdf_clean['Crash_Severity'].map(crash_lookup_nv)
crash_sdf_clean['Lighting']=crash_sdf_clean['Lighting'].map(lighting_lookup_nv)
crash_sdf_clean['Time']=crash_sdf_clean['Time'].str.slice(0,2) + ':' + crash_sdf_clean['Time'].str.slice(2)

In [6]:
# Use person dataframe to get number of bike involved, pedestrian, alcohol status
def type_involved(df_slice, types, type_column):
    if (df_slice[type_column].isin(types)).any():
        return 'Y'
    else:
        return 'N'
    
def alcohol_involved(df_slice, substring):
    # Check if the substring is contained anywhere in 'Value2'
    if ((df_slice['AlcDrugInvolved'].str.contains(substring)) & (df_slice['PersonType']=='Driver')).any():
        return 'Y'  
    else: 
        return 'N'

def conditional_count(df_slice, condition_column1, types1, condition_column2, types2):
    # Count the number of records meeting a specific condition
    return ((df_slice[condition_column1].isin(types1)) & (df_slice[condition_column2].isin(types2))).sum()


In [7]:
vehicle_columns_to_keep = ['vcNo', 'VehType']
vehicle_df_simple = vehicle_df[vehicle_columns_to_keep]
person_df_wt_vehicle = pd.merge(person_df,vehicle_df_simple,how='left',left_on='vcNoRelated',right_on='vcNo')

In [8]:
ped_types= ['Skater','Pedestrian','Other Non-Motorist','Wheelchair']
mortality_codes = ['K']
injury_codes = ['C', 'B', 'A']
bike_types = ['Pedal Cyclist', 'E-Bike', 'Other Cyclist']
motorcycle_types = ['MC - MOTORCYCLE',	'MD - MOPED',	'MOPED',	'MOTORBIKE',	'MOTORCYCLE',	'MOTORSCOOTER',	'MT - MOTORCYCLE',	'OTHER']

person_grouped = person_df_wt_vehicle.groupby('CrashNum').apply(lambda group_df: pd.Series({
    'Num_Ped_Killed': conditional_count(group_df, condition_column1='PersonType',types1=ped_types,condition_column2='InjuryCode',types2=mortality_codes),
    'Num_Ped_Injured': conditional_count(group_df, condition_column1='PersonType',types1=ped_types,condition_column2='InjuryCode',types2=injury_codes),
    'Num_Bicyclist_Killed': conditional_count(group_df, condition_column1='PersonType',types1=bike_types,condition_column2='InjuryCode',types2=mortality_codes),
    'Num_Bicyclist_Injured': conditional_count(group_df, condition_column1='PersonType',types1=bike_types,condition_column2='InjuryCode',types2=injury_codes),
    'Num_Motorcyclist_Killed': conditional_count(group_df, condition_column1='VehType',types1=motorcycle_types,condition_column2='InjuryCode',types2=mortality_codes),
    'Num_Motorcyclist_Injured': conditional_count(group_df, condition_column1='VehType',types1=motorcycle_types,condition_column2='InjuryCode',types2=injury_codes),
    'Bicycle_Involved': type_involved(group_df,bike_types,'PersonType'),
    'Motorcycle_Involved': type_involved(group_df,motorcycle_types,'VehType'),
    'Alcohol_Involved': alcohol_involved(group_df, 'ALCO'),
})).reset_index()

### Create final dataframe for NV 2021

In [10]:
def populate_all_involved(row):
    field_value = ''
    if row['Pedestrian_Involved']=='Y':
        field_value+='Pedestrian, '
    if row['Bicycle_Involved']=='Y':
        field_value+='Bicycle, '
    if row['Motorcycle_Involved']=='Y':
        field_value+='Motorcycle, '
    return field_value[:-2] if field_value else ''

In [13]:
nevada_crashes_2021=pd.merge(crash_sdf_clean, person_grouped, how='left',left_on='NV_Accident_Num',right_on='CrashNum')
#Fill in hard coded fields
nevada_crashes_2021['Violation']  = "N/A"
nevada_crashes_2021['Hit_and_Run'] = "N/A"
nevada_crashes_2021['Motor_Vehicle_Interacted_With'] = "N/A"
nevada_crashes_2021['Pedestrian_Action'] = "N/A"
#nevada_crashes_2021['Year']  = "2021"
nevada_crashes_2021['Data_Source'] = "NDOT"
nevada_crashes_2021['CA_Case_ID'] = np.nan
nevada_crashes_2021['City'] = np.nan
nevada_crashes_2021['Num_Parties'] = np.nan
nevada_crashes_2021['Corridor_ID'] = np.nan
nevada_crashes_2021.loc[~nevada_crashes_2021['Pedestrian_Involved'].isna()]['All_Involved'] = nevada_crashes_2021.loc[~nevada_crashes_2021['Pedestrian_Involved'].isna()].apply(populate_all_involved,axis=1)
nevada_crashes_2021.loc[nevada_crashes_2021['Pedestrian_Involved'].isna()]['All_Involved'] = 'Unknown'


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nevada_crashes_2021.loc[~nevada_crashes_2021['Pedestrian_Involved'].isna()]['All_Involved'] = nevada_crashes_2021.loc[~nevada_crashes_2021['Pedestrian_Involved'].isna()].apply(populate_all_involved,axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nevada_crashes_2021.loc[nevada_crashes_2021['Pedestrian_Involved'].isna()]['All_Involved'] = 'Unknown'


In [15]:
# Convert nevada crashes to a year
nevada_crashes_2021['Year'] = nevada_crashes_2021['Date'].dt.year.astype(str)


In [19]:
nevada_crashes_2022 = nevada_crashes_2021[nevada_crashes_2021['Year'].isin(['2022','2023'])]

In [16]:
nevada_crashes_2021.to_excel('datatest.xlsx')

In [20]:
workspace = "F:/gis/PROJECTS/ResearchAnalysis/Monitoring/Data/Crash/"

# setup environment variables
arcpy.env.overwriteOutput = True
#arcpy.env.workspace = "//Trpa-fs01/GIS/PROJECTS/ResearchAnalysis/Monitoring/Data/Crash/CrashData/CrashData.gdb"
arcpy.env.workspace = "F:/GIS/PROJECTS/ResearchAnalysis/Monitoring/Data/Crash/CrashData/CrashData.gdb"

# create a spatial reference object for the output coordinate system 
# output projection for data going into SDE should be UTM Zone 10N (EPSG: 26910)
out_coordinate_system = arcpy.SpatialReference(26910)

# network path to connection files
#filePath = "//Trpa-fs01/GIS/DB_CONNECT"
filePath = "F:/GIS/DB_CONNECT"

# database file path 
sdeBase  = os.path.join(filePath, "Vector.sde")

# SDE feature classes needed for spatial joins
corridor = os.path.join(sdeBase, 'sde.SDE.Transportation\sde.SDE.Corridor')
trpa     = os.path.join(sdeBase, 'sde.SDE.Jurisdictions\sde.SDE.TRPA_bdy')
# # in memory files
memory = "memory" + "\\"

In [21]:

# export dataframe to csv 
nevada_crashes_2022.to_csv(os.path.join(workspace, "NV_Crash_2022.csv" ))
# get NV CSV for XY Table TO Point
nvCSV = os.path.join(workspace, "NV_Crash_2022.csv" )

# name the output feature class
nvFC  = 'NV_Crash_2022'



# Nevada data frame to feature class 
# input data is in NAD 1983 UTM Zone 11N coordinate system
arcpy.management.XYTableToPoint(nvCSV, nvFC, 
                                x_coords, y_coords, "",
                                # set prjoection transform to from
                                arcpy.SpatialReference(26911))
# output data for project tool
output_NV_Crash_Project = "NV_Crash_2022_Project"

# project from UTM to WGS
arcpy.Project_management(nvFC, output_NV_Crash_Project, out_coordinate_system)

In [23]:
# ## Spatial Join of Corridor IDs
# in memory points to be used for spatial join results
corridorPoints = memory + 'CrashPoint_Corridor'
# Spatial Join
tahoeCrash = "NV_Crash_2022_Project"

arcpy.SpatialJoin_analysis(tahoeCrash, corridor, corridorPoints, 
                           "JOIN_ONE_TO_ONE", "KEEP_ALL", "", "HAVE_THEIR_CENTER_IN", "", "")

# use function to transfer spatial join results to crash stagin layer



def fieldJoinCalc(updateFC, updateFieldsList, sourceFC, sourceFieldsList):
    from time import strftime  
    print ("Started data transfer: " + strftime("%Y-%m-%d %H:%M:%S"))
#     log.info("Started data transfer: " + strftime("%Y-%m-%d %H:%M:%S"))
    # Use list comprehension to build a dictionary from arcpy SearchCursor  
    valueDict = {r[0]:(r[1:]) for r in arcpy.da.SearchCursor(sourceFC, sourceFieldsList)}  
   
    with arcpy.da.UpdateCursor(updateFC, updateFieldsList) as updateRows:  
        for updateRow in updateRows:  
            # store the Join value of the row being updated in a keyValue variable  
            keyValue = updateRow[0]  
            # verify that the keyValue is in the Dictionary  
            if keyValue in valueDict:  
                # transfer the value stored under the keyValue from the dictionary to the updated field.  
                updateRow[1] = valueDict[keyValue][0]  
                updateRows.updateRow(updateRow)    
    del valueDict  
    print ("Finished data transfer: " + strftime("%Y-%m-%d %H:%M:%S"))
fieldJoinCalc(tahoeCrash, ['OBJECTID', 'Corridor_ID'], corridorPoints, ['OBJECTID','CORRIDOR_NAME'])
print("Finished updating staging layer")


tempLayer = "deleteLayers"

# Run MakeFeatureLayer
arcpy.management.MakeFeatureLayer(tahoeCrash, tempLayer)
 
arcpy.management.SelectLayerByLocation(tempLayer, "have_their_center_in", 
                                       trpa,
                                       search_distance="", 
                                       selection_type="NEW_SELECTION", 
                                       invert_spatial_relationship="INVERT")
 
# Run GetCount and if some features have been selected, then 
#  run DeleteFeatures to remove the selected features.
if int(arcpy.management.GetCount(tempLayer)[0]) > 0:
    arcpy.management.DeleteFeatures(tempLayer)
print("features deleted")

# outfc = 
# Update SDE - Truncate Append
# updateSDE(tahoeCrash, outfc, fieldnames)

Started data transfer: 2025-01-29 16:36:03
Finished data transfer: 2025-01-29 16:36:07
Finished updating staging layer
features deleted
