# AnyWay project - Analyzing Road Safety for Electric Scooters and Bicycles

Project Scope:

This project aims to assess road safety conditions for electric scooter and electric bicycle users by analyzing accident data in the country. The analysis will focus on the following key aspects:

    Identification of accident-prone areas that are less friendly for electric scooter and electric bicycle riders.
    Examination of the individuals involved in accidents, including their roles (e.g., riders, drivers, pedestrians), and the nature of injuries sustained.
    Geospatial analysis to determine the specific roads and intersections where accidents occur most frequently.
    
    


In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
%matplotlib inline
import plotly.graph_objects as go
import seaborn as sns
from IPython.display import Image

In [2]:
#Specify column names
columns_to_read = ['provider_and_id', 'involved_type','accident_year','accident_yishuv_symbol','involve_yishuv_name','injured_type_hebrew','injury_severity_hebrew','injury_severity','involve_yishuv_symbol','involve_vehicle_type_hebrew','street1_hebrew', 'street2_hebrew', 'longitude', 'latitude','accident_type','accident_type_hebrew','involved_type_hebrew']
involved_markers_hebrew = pd.read_csv('./csv_tables/involved_markers_heb_ta.csv', usecols=columns_to_read)

FileNotFoundError: [Errno 2] No such file or directory: './csv_tables/involved_markers_heb_ta.csv'

In [None]:
# Check for duplicate rows
duplicates = involved_markers_hebrew[involved_markers_hebrew.duplicated()]

In [None]:
duplicates

In [None]:
# Remove duplicate rows
involved_markers_hebrew.drop_duplicates(inplace=True)

In [None]:
print("Minimum Date:", involved_markers_hebrew['accident_year'].min())
print("Maximum Date:", involved_markers_hebrew['accident_year'].max())

In [None]:
involved_markers_hebrew['accident_year'] = pd.to_datetime(involved_markers_hebrew['accident_year'], format='%Y')

In [None]:
involved_markers_hebrew_new = involved_markers_hebrew[involved_markers_hebrew['accident_year'].dt.year >= 2017]
involved_markers_hebrew_new

In [None]:
involved_markers_hebrew_tv = involved_markers_hebrew_new[involved_markers_hebrew_new['accident_yishuv_symbol'] == 5000]
involved_markers_hebrew_tv

In [None]:
involved_markers_hebrew_tv = involved_markers_hebrew_new[involved_markers_hebrew_new['accident_yishuv_symbol'] == 5000]
involved_markers_hebrew_tv

In [None]:
involved_markers_hebrew_tv['involve_vehicle_type_hebrew'].value_counts()

In [None]:
involved_markers_hebrew_tv['injury_severity'].value_counts()

In [None]:
involved_markers_hebrew_tv = involved_markers_hebrew_tv[involved_markers_hebrew_tv.injury_severity != 0 ]
involved_markers_hebrew_tv.head()

In [None]:
involved_markers_hebrew_tv.info()

In [None]:
involved_markers_hebrew_tv['involve_vehicle_type_hebrew'].value_counts()

In [None]:
# Create a boolean mask to filter for "Car" and "Bicycle"
vehicle_mask = (involved_markers_hebrew_tv['involve_vehicle_type_hebrew'] == "קורקינט חשמלי") | (involved_markers_hebrew_tv['involve_vehicle_type_hebrew'] == "אופניים חשמליים")

# Apply the mask to filter the DataFrame
filtered_df = involved_markers_hebrew_tv[vehicle_mask]

In [None]:
filtered_df

In [None]:
filtered_df['injury_severity'].value_counts()

In [None]:
filtered_df['injury_severity_hebrew'].value_counts()

The percentage of the dead and severely injured out of the total number of injured

In [None]:
# Calculate the total number of injured individuals
total_injured = len(filtered_df)

# Calculate the number of dead and severely injured individuals
dead_severely_injured = len(filtered_df[filtered_df['injury_severity'].isin([1, 2])])

# Calculate the percentage
percentage_dead_severely_injured = (dead_severely_injured / total_injured) * 100

# Print the result
print("Percentage of dead and severely injured out of total injured:", percentage_dead_severely_injured, "%")

In [None]:
#Assuming you have a DataFrame 'df' with 'street_1' and 'street_2' columns

# Create a DataFrame for accidents at intersections (both streets filled)
intersections_df = filtered_df[(filtered_df['street1_hebrew'].notna()) & (filtered_df['street2_hebrew'].notna())]

# Create a DataFrame for accidents on streets (at least one street missing)
streets_df = filtered_df[(filtered_df['street1_hebrew'].isna() & filtered_df['street2_hebrew'].notna()) | (filtered_df['street1_hebrew'].notna() & filtered_df['street2_hebrew'].isna())]
# Now you have 'intersections_df' and 'streets_df' for further analysis

In [None]:
intersections_df

In [None]:
intersections_df.info()

In [None]:
streets_df

In [None]:
streets_df.tail()

In [None]:
streets_df.info()

In [None]:
# Create a new column 'intersection_name' with standardized intersection names
intersections_df['intersection_name'] = intersections_df[['street1_hebrew', 'street2_hebrew']].apply(lambda x: ' & '.join(sorted(x)), axis=1)

# Group the data by 'intersection_name' and count the accidents for each intersection
intersection_counts = intersections_df['intersection_name'].value_counts().reset_index()
intersection_counts.columns = ['intersection_name', 'accident_count']

# Sort the intersections by accident count (from highest to lowest)
intersection_counts = intersection_counts.sort_values(by='accident_count', ascending=False)

# Print the intersections with the most accidents
intersection_counts.head()

In [None]:
fig = px.bar(intersection_counts.head(10), x='intersection_name', y='accident_count', title='Top Intersections by Accident Count')

# Show the plot
fig.show()

In [None]:
# Create a histogram of accident counts by year
plt.hist(streets_df['accident_year'], bins=10, edgecolor='k')
plt.xlabel('Year')
plt.ylabel('Accident Count')
plt.title('Distribution of Accidents by Year for streets')
plt.show()

In [None]:
# Group by 'street1_hebrew' and count the accidents for each street
street_counts = streets_df['street1_hebrew'].value_counts().reset_index()
street_counts.columns = ['street1_hebrew', 'accident_count']

# Sort the streets by accident count (from highest to lowest)
street_counts = street_counts.sort_values(by='accident_count', ascending=False)

# Create a bar chart using Plotly
fig = px.bar(
    street_counts,
    x='street1_hebrew',
    y='accident_count',
    title='Accident Counts per Street',
    labels={'street_name': 'Street Name', 'accident_count': 'Accident Count'},
    width=1000,  # Adjust the width of the chart as needed
    height=500,  # Adjust the height of the chart as needed
)

# Rotate x-axis labels for better readability
fig.update_xaxes(tickangle=45)

# Show the plot
fig.show()

## שדרות ירושלים

In [None]:
# Assuming you have a DataFrame 'df' with 'latitude' and 'longitude' columns
plt.scatter(streets_df['longitude'], streets_df['latitude'])
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('Scatter Plot of Latitude vs. Longitude')
plt.show()

In [None]:
# Calculate summary statistics
print("Latitude Summary Statistics:")
print(streets_df['latitude'].describe())

print("\nLongitude Summary Statistics:")
print(streets_df['longitude'].describe())

In [None]:
fig = px.scatter_mapbox(
   streets_df,
   lat="latitude",  # Latitude column
   lon="longitude",  # Longitude column
   hover_name="injury_severity_hebrew",  # Information to display on hover
   hover_data=["injured_type_hebrew", "accident_year"],  # Additional data on hover
   color="injury_severity",  # Color points by injury severity
   color_continuous_scale=px.colors.cyclical.IceFire,  # Choose a color scale
   text="street1_hebrew",  # Text labels for street names
   title="Accidents on Streets Map",
   mapbox_style="carto-positron",  # Mapbox style
   zoom=5,  # Initial zoom level
)

# Customize map layout
fig.update_layout(
   margin={"r": 0, "t": 0, "l": 0, "b": 0},  # Remove margins
)

# Show the map
fig.show()

disply dengerous area or streets in map accourding to the accident number in each street

In [None]:
street_accidents = streets_df['street1_hebrew'].value_counts().reset_index()
street_accidents.columns = ['street', 'accident_count']

In [None]:
street_accidents.head(15)

In [None]:
streets_df.info()

In [None]:
missing_values = streets_df[['street1_hebrew', 'longitude', 'latitude']].isnull().sum()
missing_values

In [None]:
streets_df = streets_df.dropna(subset=['street1_hebrew'])

In [None]:
# Create a density mapbox plot
fig = px.density_mapbox(
    streets_df,
    lat='latitude',  # Latitude column
    lon='longitude',  # Longitude column
    hover_name='street1_hebrew',  # Use 'street1_hebrew' as the hover information
    radius=10,  # Adjust the radius as needed
    center=dict(lat=31.0, lon=35.0),  # Center of the map
    zoom=8,  # Initial zoom level
    mapbox_style="carto-positron",  # Mapbox style
    title="Density Map of Accidents by Street",
)

# Show the plot
fig.show()

## explore the intersaction data from 2018 to 2022

In [None]:
# Define the list of years you want to visualize
years = [2018, 2019, 2020, 2021, 2022]

# Create a list to store the figures
figures = []

# Loop over the years
for year in years:
    # Filter the DataFrame for the current year
    year_df = intersections_df[intersections_df['accident_year'].dt.year == year]

    # Create a density mapbox plot for the current year
    fig = go.Figure(go.Densitymapbox(
        lat=year_df['latitude'],
        lon=year_df['longitude'],
        hoverinfo='skip',  # Skip hover info to keep it clean
        radius=10,
        colorbar=dict(thickness=20, title=f'Accidents ({year})'),
    ))

    # Set the mapbox style and center
    fig.update_layout(
        mapbox=dict(
            center=dict(lat=31.0, lon=35.0),
            style="carto-positron",
            zoom=8,
        ),
        showlegend=False,
        height=800,
        width=600,
        title_text=f"Density Map of Accidents at Intersections ({year})",
    )

    # Append the figure to the list
    figures.append(fig)

# Display the figures
for fig in figures:
    fig.show()

In [None]:
# Create a density mapbox plot for intersections
fig = px.density_mapbox(
    intersections_df,  # Use intersections_df instead of streets_df
    lat='latitude',  # Latitude column
    lon='longitude',  # Longitude column
    hover_name='intersection_name',  # Use 'intersection_name' as the hover information
    radius=10,  # Adjust the radius as needed
    center=dict(lat=31.0, lon=35.0),  # Center of the map
    zoom=8,  # Initial zoom level
    mapbox_style="carto-positron",  # Mapbox style
    title="Density Map of Accidents at Intersections",
)

# Show the plot
fig.show()

In [None]:
filtered_df['accident_type_hebrew'].value_counts()

## Distribution of Accident Types

In [None]:
# Create a pie chart of accident types
accident_type_counts = filtered_df['accident_type_hebrew'].value_counts().reset_index()
accident_type_counts.columns = ['Accident Type', 'Count']

fig = px.pie(accident_type_counts, names='Accident Type', values='Count', title='Distribution of Accident Types')
fig.show()

Compare position of accidents with accidents that are not micro mobility

is it better to take the most streets that have accident ???????

In [None]:

# Group the data by "street1_hebrew" and count the number of accidents for each street
street_counts = streets_df['street1_hebrew'].value_counts()

# Choose the top streets with the most accidents
top_streets = street_counts.head(10).index

# Create an empty DataFrame to store the results
result_df = pd.DataFrame(columns=['Street', 'Accident Type', 'Count'])

# Loop through the top streets
for street in top_streets:
    # Filter the data for the current street
    street_data = streets_df[streets_df['street1_hebrew'] == street]

    # Count the occurrences of each accident type for the current street
    accident_type_counts = street_data['accident_type_hebrew'].value_counts().reset_index()

    # Rename the columns for clarity
    accident_type_counts.columns = ['Accident Type', 'Count']

    # Add a column for the street name
    accident_type_counts['Street'] = street

    # Append the results to the result DataFrame
    result_df = result_df.append(accident_type_counts, ignore_index=True)

# Display the result DataFrame
print(result_df)

### which type of accident each top street with the most accidents has

In [None]:
# Assuming you have the 'result_df' DataFrame as described in the previous answer
fig = px.bar(result_df, x='Street', y='Count', color='Accident Type',
             labels={'Count': 'Accident Count'})
fig.update_xaxes(title='Street', tickangle=45)
fig.update_yaxes(title='Accident Count')
fig.update_layout(
    title='Accident Types',  # Set the title here
    legend_title_text='Accident Type',
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)
fig.show()

![WhatsApp%20Image%202023-09-26%20at%2021.35.21.jpeg](attachment:WhatsApp%20Image%202023-09-26%20at%2021.35.21.jpeg)

## אבן גבירול

In [None]:
Image(url='https://upload.wikimedia.org/wikipedia/commons/thumb/a/a4/HPIM2097.JPG/1200px-HPIM2097.JPG')

In [None]:
    fig = px.scatter_mapbox(
        filtered_df,
        lat='latitude',
        lon='longitude',
        hover_name='accident_type_hebrew',
        title='Accident Locations',
        mapbox_style='carto-positron',
    )

    fig.update_layout(
        mapbox=dict(
            center=dict(lat=filtered_df['latitude'].mean(), lon=filtered_df['longitude'].mean()),
            zoom=10,
        )
    )

    fig.show()



who is involved, what vehicle --------------- FOR THE NEXT WEEK

In [None]:
involved_counts = filtered_df['involved_type_hebrew'].value_counts()
print(involved_counts)

In [None]:
vehicle_counts = filtered_df['involve_vehicle_type_hebrew'].value_counts()
print(vehicle_counts)