In [1]:
import pandas as pd
import numpy as np

### Date

In [15]:
import openmeteo_requests

import requests_cache
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": 52.52,
	"longitude": 13.41,
	"start_date": "2015-01-01",
	"end_date": "2023-12-31",
	"hourly": {"temperature_2m",
               "rain"}
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process hourly data. The order of variables needs to be the same as requested. ACTUALLY INVERSED
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(1).ValuesAsNumpy()
hourly_rain = hourly.Variables(0).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}
hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["rain"] = hourly_rain

hourly_dataframe = pd.DataFrame(data = hourly_data)
print(hourly_dataframe)

Coordinates 52.5483283996582°N 13.407821655273438°E
Elevation 38.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
                           date  temperature_2m  rain
0     2015-01-01 00:00:00+00:00          3.7890   0.0
1     2015-01-01 01:00:00+00:00          3.6390   0.0
2     2015-01-01 02:00:00+00:00          3.3390   0.0
3     2015-01-01 03:00:00+00:00          2.9890   0.0
4     2015-01-01 04:00:00+00:00          2.3390   0.0
...                         ...             ...   ...
78883 2023-12-31 19:00:00+00:00          6.3085   0.1
78884 2023-12-31 20:00:00+00:00          6.6585   0.0
78885 2023-12-31 21:00:00+00:00          6.0585   0.0
78886 2023-12-31 22:00:00+00:00          5.2085   0.0
78887 2023-12-31 23:00:00+00:00          5.1585   0.0

[78888 rows x 3 columns]


### Crash

In [19]:
crash_data = pd.read_csv("montgomery_incidents_data.csv")

In [5]:
crash_data.columns

Index(['Report Number', 'Local Case Number', 'Agency Name', 'ACRS Report Type',
       'Crash Date/Time', 'Hit/Run', 'Route Type', 'Mile Point',
       'Mile Point Direction', 'Lane Direction', 'Lane Number', 'Lane Type',
       'Number of Lanes', 'Direction', 'Distance', 'Distance Unit',
       'Road Grade', 'NonTraffic', 'Road Name', 'Cross-Street Type',
       'Cross-Street Name', 'Off-Road Description', 'Municipality',
       'Related Non-Motorist', 'At Fault', 'Collision Type', 'Weather',
       'Surface Condition', 'Light', 'Traffic Control',
       'Driver Substance Abuse', 'Non-Motorist Substance Abuse',
       'First Harmful Event', 'Second Harmful Event', 'Fixed Oject Struck',
       'Junction', 'Intersection Type', 'Intersection Area', 'Road Alignment',
       'Road Condition', 'Road Division', 'Latitude', 'Longitude', 'Location'],
      dtype='object')

In [72]:
crash_data['Agency Name'].value_counts()

Agency Name
Montgomery County Police     78084
MONTGOMERY                    5947
Rockville Police Departme     5621
Gaithersburg Police Depar     4491
Takoma Park Police Depart     1782
Maryland-National Capital      797
ROCKVILLE                      356
GAITHERSBURG                   232
TAKOMA                          96
MCPARK                          52
Name: count, dtype: int64

In [12]:
crash_data["DateDim"] = pd.to_datetime(crash_data["Crash Date/Time"])

  crash_data["DateDim"] = pd.to_datetime(crash_data["Crash Date/Time"])


In [None]:
crash_data["Crash Date/Time"].apply(lambda x: x.split('/'))

In [14]:
crash_data.sort_values(by=["DateDim"], ascending=False)

Unnamed: 0,Report Number,Local Case Number,Agency Name,ACRS Report Type,Crash Date/Time,Hit/Run,Route Type,Mile Point,Mile Point Direction,Lane Direction,...,Junction,Intersection Type,Intersection Area,Road Alignment,Road Condition,Road Division,Latitude,Longitude,Location,DateDim
1997,MCP3170003V,240000438,Montgomery County Police,Property Damage Crash,01/03/2024 02:55:00 PM,No,,,,,...,,,,,,,39.165005,-77.249310,"(39.16500483, -77.24931)",2024-01-03 14:55:00
1479,MCP15540064,230074436,Montgomery County Police,Property Damage Crash,12/31/2023 10:15:00 PM,No,County,2.08,East,South,...,,,,CURVE RIGHT,NO DEFECTS,"TWO-WAY, NOT DIVIDED",39.123936,-77.134791,"(39.12393557, -77.13479125)",2023-12-31 22:15:00
1737,MCP3300002L,230074431,Montgomery County Police,Property Damage Crash,12/31/2023 10:00:00 PM,No,Maryland (State),2.23,North,South,...,INTERSECTION,FOUR-WAY INTERSECTION,,STRAIGHT,NO DEFECTS,"TWO-WAY, DIVIDED, POSITIVE MEDIAN BARRIER",39.090600,-77.044838,"(39.09060018, -77.04483779)",2023-12-31 22:00:00
1804,EJ78710072,230074429,Gaithersburg Police Depar,Property Damage Crash,12/31/2023 09:19:00 PM,No,Maryland (State),3.59,North,West,...,INTERSECTION RELATED,FOUR-WAY INTERSECTION,INTERSECTION RELATED,STRAIGHT,NO DEFECTS,"TWO-WAY, DIVIDED, POSITIVE MEDIAN BARRIER",39.140054,-77.195621,"(39.14005447, -77.19562112)",2023-12-31 21:19:00
1465,MCP2302000G,230074415,Montgomery County Police,Property Damage Crash,12/31/2023 05:26:00 PM,No,County,1.35,North,East,...,INTERSECTION,T-INTERSECTION,INTERSECTION,STRAIGHT,NO DEFECTS,"TWO-WAY, NOT DIVIDED",39.174160,-77.208368,"(39.17416033, -77.20836817)",2023-12-31 17:26:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33645,MCP23000002,15000197,Montgomery County Police,Property Damage Crash,01/01/2015 02:00:00 AM,No,County,1.81,North,North,...,NON INTERSECTION,,,STRAIGHT,NO DEFECTS,"TWO-WAY, NOT DIVIDED",39.148678,-77.093848,"(39.148678, -77.093848)",2015-01-01 02:00:00
57035,MCP2674000D,15000016,Montgomery County Police,Property Damage Crash,01/01/2015 01:26:00 AM,No,County,0.76,North,West,...,INTERSECTION,FOUR-WAY INTERSECTION,INTERSECTION,STRAIGHT,NO DEFECTS,"TWO-WAY, DIVIDED, POSITIVE MEDIAN BARRIER",39.158059,-77.204349,"(39.158059, -77.204349)",2015-01-01 01:26:00
39427,MCP2783000G,15000025,Montgomery County Police,Injury Crash,01/01/2015 01:14:00 AM,No,Maryland (State),25.21,East,East,...,INTERSECTION,FOUR-WAY INTERSECTION,,STRAIGHT,NO DEFECTS,"TWO-WAY, DIVIDED, POSITIVE MEDIAN BARRIER",39.096217,-77.109803,"(39.09621667, -77.10980333)",2015-01-01 01:14:00
17985,MCP2686000D,15000017,Montgomery County Police,Property Damage Crash,01/01/2015 01:10:00 AM,No,Ramp,0.15,South,South,...,INTERCHANGE RELATED,T-INTERSECTION,ON RAMP EXIT AREA,CURVE LEFT,NO DEFECTS,ONE-WAY TRAFFICWAY,39.196610,-77.265547,"(39.19661, -77.26554667)",2015-01-01 01:10:00


In [37]:
crash_data['Lat'] = crash_data['Latitude'].round(1)
crash_data['Long'] = crash_data['Longitude'].round(1)

In [41]:
crash_data.groupby(['Lat', 'Long']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,Report Number,Local Case Number,Agency Name,ACRS Report Type,Crash Date/Time,Hit/Run,Route Type,Mile Point,Mile Point Direction,Lane Direction,...,Fixed Oject Struck,Junction,Intersection Type,Intersection Area,Road Alignment,Road Condition,Road Division,Latitude,Longitude,Location
Lat,Long,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
37.7,-79.5,MCP1291002F,190052687,Montgomery County Police,Property Damage Crash,11/01/2019 07:10:00 PM,No,Maryland (State),11.45,East,East,...,0,INTERSECTION,FOUR-WAY INTERSECTION,0,STRAIGHT,NO DEFECTS,"TWO-WAY, DIVIDED, UNPROTECTED PAINTED MIN 4 FEET",37.720000,-79.480000,"(37.72, -79.48)"
38.0,-77.0,MCP2829002G,16058465,Montgomery County Police,Property Damage Crash,11/13/2016 01:38:00 AM,Yes,Maryland (State),5.54,East,East,...,0,NON INTERSECTION,0,0,STRAIGHT,NO DEFECTS,"TWO-WAY, DIVIDED, POSITIVE MEDIAN BARRIER",38.008120,-76.998430,"(38.00812, -76.99843)"
38.4,-77.5,MCP2712000H,15012614,Montgomery County Police,Property Damage Crash,03/19/2015 06:10:00 AM,Yes,County,0.00,North,East,...,0,INTERSECTION,FOUR-WAY INTERSECTION,INTERSECTION,STRAIGHT,NO DEFECTS,"TWO-WAY, NOT DIVIDED",38.353495,-77.477263,"(38.353495, -77.47726333)"
38.6,-79.2,DM83880030,190026013,Takoma Park Police Depart,Injury Crash,05/31/2019 04:42:00 PM,No,0,0.00,0,0,...,0,0,0,0,0,0,0,38.554005,-79.181926,"(38.55400492, -79.18192616)"
38.7,-77.5,MCP20160032MCP1251001HMCP20160036MCP20160033MC...,1800351371800455111800409481800351521800443281...,Montgomery County PoliceMontgomery County Poli...,Property Damage CrashProperty Damage CrashProp...,07/16/2018 12:00:00 PM09/12/2018 09:40:00 AM08...,NoYesNoNoNoNo,Maryland (State)CountyCountyCounty,22.00,EastNorthEastEast,UnknownNorthWestWest,...,UNKNOWNCURB,UNKNOWNCOMMERCIAL DRIVEWAYINTERSECTIONINTERSEC...,UNKNOWNT-INTERSECTIONT-INTERSECTIONT-INTERSECTION,UNKNOWNINTERSECTIONINTERSECTIONINTERSECTION,UNKNOWNSTRAIGHTSTRAIGHTSTRAIGHT,UNKNOWNNO DEFECTSNO DEFECTSNO DEFECTS,"UNKNOWNTWO-WAY, DIVIDED, POSITIVE MEDIAN BARRI...",232.460238,-465.281982,"(38.743373, -77.54699707)(38.743373, -77.54699..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39.8,-76.9,MCP94190016,170503561,Montgomery County Police,Property Damage Crash,04/22/2017 02:14:00 PM,No,County,2.15,South,North,...,0,NON INTERSECTION,0,0,STRAIGHT,NO DEFECTS,"TWO-WAY, DIVIDED, POSITIVE MEDIAN BARRIER",39.838068,-76.865845,"(39.83806818, -76.86584473)"
39.9,-77.1,EJ7876004V,220037093,Gaithersburg Police Depar,Property Damage Crash,08/27/2022 05:00:00 PM,No,0,0.00,0,0,...,BUILDING,0,0,0,0,0,0,39.872900,-77.113580,"(39.8729, -77.11358)"
40.0,-78.7,MCP29360008,15021122,Montgomery County Police,Property Damage Crash,05/04/2015 02:36:00 PM,No,0,0.00,0,0,...,BUILDING,0,0,0,0,0,0,39.989747,-78.717041,"(39.98974718, -78.71704102)"
40.0,-77.1,MCP1202000HMCP12020016MCP1233004WMCP2777001T,150172991601313822004954415017121,Montgomery County PoliceMONTGOMERYMontgomery C...,Property Damage CrashProperty Damage CrashProp...,04/14/2015 12:34:00 PM03/17/2016 12:25:00 PM11...,NoNoNoNo,CountyGovernmentCountyMaryland (State),5.27,NorthSouthNorthEast,NorthNorthWestEast,...,BRIDGE OR OVERPASSOTHER,CROSSOVER RELATEDNON INTERSECTIONNON INTERSECT...,OTHERFOUR-WAY INTERSECTION,OTHER,STRAIGHTCURVE RIGHTSTRAIGHTSTRAIGHT,NO DEFECTSNO DEFECTSNO DEFECTSNO DEFECTS,"TWO-WAY, NOT DIVIDEDTWO-WAY, DIVIDED, POSITIVE...",159.913371,-308.421556,"(39.972695, -77.14739833)(39.96483333, -77.100..."


In [27]:
print(crash_data['Latitude'].round(1).value_counts())
print(crash_data['Longitude'].round(1).value_counts())

Latitude
39.1    42344
39.0    34925
39.2    18158
39.3     1828
38.9      122
39.4       34
39.5       10
39.7        9
38.7        8
40.0        6
38.8        5
39.6        3
39.8        1
37.7        1
39.9        1
38.4        1
38.0        1
38.6        1
Name: count, dtype: int64
Longitude
-77.1    32672
-77.2    28106
-77.0    26015
-77.3     7572
-76.9     2289
-77.4      657
-77.5       99
-76.7       15
-79.5        8
-76.8        5
-76.6        4
-76.5        3
-76.3        2
-76.0        2
-76.2        1
-78.7        1
-77.9        1
-79.4        1
-79.2        1
-77.8        1
-75.5        1
-78.2        1
-77.7        1
Name: count, dtype: int64


### Crash drivers

In [76]:
drivers = pd.read_csv('montgomery_drivers.csv')

  drivers = pd.read_csv('montgomery_drivers.csv')


In [114]:
brands = drivers['Vehicle Make'].value_counts().head(60)
brands

Vehicle Make
TOYOTA           23171
HONDA            18870
FORD             17138
TOYT              8840
NISSAN            8525
HOND              5765
DODGE             4488
HYUNDAI           3935
CHEVROLET         3796
JEEP              3769
UNKNOWN           3705
CHEV              3670
BMW               3310
CHEVY             3212
ACURA             2831
LEXUS             2818
KIA               2701
NISS              2649
SUBARU            2211
MAZDA             2151
HYUN              2063
GMC               1879
MERZ              1713
THOMAS            1569
MERCEDES          1364
AUDI              1356
VOLK              1068
GILL              1044
ACUR               962
CHRYSLER           951
SUBA               944
VOLVO              906
DODG               863
VOLKSWAGON         807
NEW FLYER          732
MAZD               714
FRHT               690
VOLKSWAGEN         679
INFINITI           660
BUICK              655
GILLIG             649
LEXS               605
CHRY               58

In [115]:
sum(brands.to_list())
# można wziąć 60/80/100 najpopularniejszych marek i z resztą nie trzeba się pierdolić i mozna wrzucić do unkown,
# to około 15k marek, czyli niecałe 10% danych. 

157930

In [116]:
# z modelami będzie pewnie więcej roboty ale to zobaczymy potem

In [78]:
drivers.columns

Index(['Report Number', 'Local Case Number', 'Agency Name', 'ACRS Report Type',
       'Crash Date/Time', 'Route Type', 'Road Name', 'Cross-Street Type',
       'Cross-Street Name', 'Off-Road Description', 'Municipality',
       'Related Non-Motorist', 'Collision Type', 'Weather',
       'Surface Condition', 'Light', 'Traffic Control',
       'Driver Substance Abuse', 'Non-Motorist Substance Abuse', 'Person ID',
       'Driver At Fault', 'Injury Severity', 'Circumstance',
       'Driver Distracted By', 'Drivers License State', 'Vehicle ID',
       'Vehicle Damage Extent', 'Vehicle First Impact Location',
       'Vehicle Second Impact Location', 'Vehicle Body Type',
       'Vehicle Movement', 'Vehicle Continuing Dir', 'Vehicle Going Dir',
       'Speed Limit', 'Driverless Vehicle', 'Parked Vehicle', 'Vehicle Year',
       'Vehicle Make', 'Vehicle Model', 'Equipment Problems', 'Latitude',
       'Longitude', 'Location'],
      dtype='object')

### Vehicles

In [28]:
vehicles = pd.read_csv("vehicles.csv")

  vehicles = pd.read_csv("vehicles.csv")


In [75]:
vehicles[(vehicles['make']=='Audi') & (vehicles['model']=='A4')]['year']

2974     1996
2975     1996
3805     1997
3806     1997
3807     1997
4629     1998
4630     1998
4631     1998
4632     1998
5494     1999
5495     1999
5496     1999
5497     1999
6381     2000
6382     2000
6383     2000
6384     2000
7305     2001
7306     2001
7307     2001
8323     2002
8324     2002
8325     2002
9401     2003
9402     2003
9403     2003
10588    2004
10590    2004
10591    2004
11844    2005
11846    2005
11847    2005
12832    2005
12833    2005
13090    2006
13091    2006
13092    2006
14478    2007
14479    2007
14480    2007
15690    2008
15691    2008
15692    2008
17480    2009
20448    2010
22285    2011
23250    2012
24910    2013
26137    2014
27545    2015
28985    2016
29976    2017
34227    2019
35414    2020
Name: year, dtype: int64

In [73]:
vehicles[(vehicles['make']=='Audi') & (vehicles['model']=='A4 quattro')]['year']

2976     1996
2977     1996
3808     1997
3809     1997
3810     1997
         ... 
35569    2020
36645    2021
37482    2022
39177    2023
40285    2024
Name: year, Length: 80, dtype: int64

In [74]:
# od 2021 audi a4 jest już tylko pod nazwą a4 quattro - to troche problematyczne :/

In [33]:
vehicles[['make', 'model', 'year']].sort_values(by='make')

Unnamed: 0,make,model,year
369,AM General,Post Office DJ8 2WD,1985
19311,AM General,FJ8c Post Office,1984
19313,AM General,DJ Po Vehicle 2WD,1984
358,AM General,Post Office DJ5 2WD,1985
20286,AM General,DJ Po Vehicle 2WD,1984
...,...,...,...
24546,smart,fortwo cabriolet,2013
16658,smart,fortwo coupe,2008
16657,smart,fortwo convertible,2008
34280,smart,EQ fortwo (convertible),2019
