In [1]:
import geopandas as gpd

In [None]:
segments_with_risk = gpd.read_parquet("../notebooks/data/panel/segments_with_risk.parquet")
segments_with_risk = segments_with_risk.loc[:, ["counter_name", "year", "month", "geometry", "latitude", "longitude"
                                                "total_accidents", "sum_strava_total_trip_count", "street_name", "rr_eb"]]
original_crs = segments_with_risk.crs

In [33]:
segments_with_risk.latitude

0         52.529021
1         52.529021
2         52.529021
3         52.529021
4         52.529021
            ...    
260095    52.515498
260096    52.515498
260097    52.515498
260098    52.515498
260099    52.515498
Name: latitude, Length: 260100, dtype: float64

aggregate dataframe over all years, months

In [15]:
agg_logic = {
    'total_accidents': 'sum',
    'sum_strava_total_trip_count': 'sum',
    'geometry': 'first',
    'street_name': 'first',
    'rr_eb': 'first'
}

# Perform the aggregation
df_aggregated = segments_with_risk.groupby('counter_name').agg(agg_logic).reset_index()
df_aggregated = gpd.GeoDataFrame(df_aggregated, geometry='geometry', crs=original_crs)

In [16]:
print(df_aggregated.total_accidents.describe())

count    4335.000000
mean        2.353172
std         4.114487
min         0.000000
25%         0.000000
50%         1.000000
75%         3.000000
max        57.000000
Name: total_accidents, dtype: float64


In [17]:
print(df_aggregated.sum_strava_total_trip_count.describe())

count      4335.000000
mean      29030.116494
std       43805.850980
min          35.000000
25%        3527.500000
50%       13680.000000
75%       37180.000000
max      586505.000000
Name: sum_strava_total_trip_count, dtype: float64


In [18]:
mask1 = (df_aggregated.total_accidents > 20) & (df_aggregated.sum_strava_total_trip_count > 50000.0)
mask2 = (df_aggregated.total_accidents > 20) & (df_aggregated.sum_strava_total_trip_count < 10000.0)

In [20]:
df_aggregated[mask1].head()

Unnamed: 0,counter_name,total_accidents,sum_strava_total_trip_count,geometry,street_name,rr_eb
671,streetsegment_1677,26.0,59485.0,"LINESTRING (392317.91 5816520.498, 392336.166 ...",streetsegment_1677,7.046532
1170,streetsegment_2143,27.0,80685.0,"LINESTRING (389863.308 5821905.647, 389894.99 ...",streetsegment_2143,5.272712
1251,streetsegment_2223,37.0,72610.0,"LINESTRING (390316.726 5821280.076, 390373.935...",streetsegment_2223,8.055337
1265,streetsegment_2236,35.0,94065.0,"LINESTRING (390629.547 5821592.699, 390637.882...",streetsegment_2236,5.993906
1276,streetsegment_2246,22.0,56855.0,"LINESTRING (395038.266 5814938.5, 395060.355 5...",streetsegment_2246,6.48398


In [21]:
df_aggregated[mask2]

Unnamed: 0,counter_name,total_accidents,sum_strava_total_trip_count,geometry,street_name,rr_eb
3641,streetsegment_4765,21.0,2200.0,"LINESTRING (404295.644 5821962.657, 404296.239...",streetsegment_4765,81.916781


In [25]:
accidents_with_segments = gpd.read_parquet("../data/dataframes/accidents_with_segment.parquet")
accidents_with_segments[accidents_with_segments.counter_name == "streetsegment_4765"]

Unnamed: 0,ULAND,UREGBEZ,UKREIS,UGEMEINDE,UJAHR,UMONAT,USTUNDE,UWOCHENTAG,UKATEGORIE,UART,...,IstStrassenzustand,PLST,date,geometry,acc_id,index_right,counter_name,latitude,longitude,dist
19685,11,0,10,10,2022,11,16,4,3,5,...,0.0,,2022-11-01,POINT (404475.164 5822466.059),19685,4765.0,streetsegment_4765,52.54291,13.591434,0.1161
16359,11,0,10,10,2022,3,16,1,3,2,...,0.0,,2022-03-01,POINT (404534.894 5822743.516),16359,4765.0,streetsegment_4765,52.54291,13.591434,0.339328
8236,11,0,10,10,2019,7,10,7,3,5,...,,,2019-07-01,POINT (404533.842 5822739.311),8236,4765.0,streetsegment_4765,52.54291,13.591434,0.361506
29757,11,0,10,10,2021,5,19,4,3,5,...,0.0,,2021-05-01,POINT (404533.149 5822736.541),29757,4765.0,streetsegment_4765,52.54291,13.591434,0.376054
31855,11,0,10,10,2021,9,12,5,3,5,...,0.0,,2021-09-01,POINT (404519.761 5822682.947),31855,4765.0,streetsegment_4765,52.54291,13.591434,0.63992
30447,11,0,10,10,2021,6,14,3,3,5,...,0.0,,2021-06-01,POINT (404519.223 5822680.771),30447,4765.0,streetsegment_4765,52.54291,13.591434,0.64531
31870,11,0,10,10,2021,8,11,6,3,5,...,0.0,,2021-08-01,POINT (404514.793 5822662.859),31870,4765.0,streetsegment_4765,52.54291,13.591434,0.701158
19735,11,0,10,10,2022,10,11,4,2,5,...,1.0,,2022-10-01,POINT (404485.522 5822218.963),19735,4765.0,streetsegment_4765,52.54291,13.591434,0.726281
29897,11,0,10,10,2021,5,13,7,3,5,...,0.0,,2021-05-01,POINT (404509.325 5822640.749),29897,4765.0,streetsegment_4765,52.54291,13.591434,0.800669
17710,11,0,10,10,2022,6,13,4,3,3,...,0.0,,2022-06-01,POINT (404497.922 5822594.637),17710,4765.0,streetsegment_4765,52.54291,13.591434,1.008259
