In [22]:
import pandas as pd
import numpy as np
import matplotlib as plt
import seaborn as sns

In [23]:
# Loading the Traffic Dataset
df = pd.read_csv('traffic_data.csv')
df.head()

Unnamed: 0,Day,Hour,Vehicle_Count,Avg_Speed,Weather,Accidents
0,Monday,6,120,55.2,Clear,0
1,Monday,8,250,42.5,Clear,1
2,Monday,12,180,48.3,Clear,0
3,Monday,18,300,35.4,Rainy,2
4,Tuesday,6,130,53.1,Foggy,0


In [24]:
# Checking datatypes and nulls
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26 entries, 0 to 25
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Day            26 non-null     object 
 1   Hour           26 non-null     int64  
 2   Vehicle_Count  26 non-null     int64  
 3   Avg_Speed      26 non-null     float64
 4   Weather        26 non-null     object 
 5   Accidents      26 non-null     int64  
dtypes: float64(1), int64(3), object(2)
memory usage: 1.3+ KB


In [25]:
# Count missing values in each column
df.isna().sum()

Day              0
Hour             0
Vehicle_Count    0
Avg_Speed        0
Weather          0
Accidents        0
dtype: int64

In [26]:
# Check rows for missing values
df[df.isna().any(axis=1)]

Unnamed: 0,Day,Hour,Vehicle_Count,Avg_Speed,Weather,Accidents


In [27]:
# Checking categories
print(df['Day'].value_counts())
print(df['Hour'].value_counts())
print(df['Vehicle_Count'].value_counts())
print(df['Avg_Speed'].value_counts())
print(df['Weather'].value_counts())
print(df['Accidents'].value_counts())

Day
Monday       4
Tuesday      4
Wednesday    4
Thursday     4
Friday       4
Saturday     3
Sunday       3
Name: count, dtype: int64
Hour
18    7
6     5
8     5
12    5
10    2
14    2
Name: count, dtype: int64
Vehicle_Count
250    2
180    2
310    2
270    2
210    2
130    1
300    1
120    1
260    1
140    1
320    1
330    1
200    1
125    1
255    1
195    1
150    1
280    1
230    1
350    1
220    1
Name: count, dtype: int64
Avg_Speed
55.2    1
42.5    1
48.3    1
35.4    1
53.1    1
40.2    1
49.0    1
33.7    1
52.0    1
38.5    1
46.8    1
34.0    1
54.0    1
41.2    1
47.1    1
36.3    1
50.8    1
37.9    1
45.0    1
32.4    1
44.3    1
43.1    1
39.5    1
46.7    1
45.2    1
40.1    1
Name: count, dtype: int64
Weather
Clear    15
Rainy     7
Foggy     4
Name: count, dtype: int64
Accidents
0    14
1     7
2     3
3     2
Name: count, dtype: int64


In [28]:
# Ensuring the following columns are numeric
df['Hour'] = df['Hour'].astype(int)
df['Vehicle_Count'] = df['Vehicle_Count'].astype(int)
df['Avg_Speed'] = pd.to_numeric(df['Avg_Speed'], errors='coerce')
df['Accidents'] = df['Accidents'].astype(int)

In [29]:
# Creating a congestion index
df['Congestion_index'] = df['Vehicle_Count'] / df['Avg_Speed']

In [33]:
# Creating a column for rush hours
df['Rush_hour'] = df['Hour'].isin([7,8,9,16,17,18]).astype(int)
df.head()

Unnamed: 0,Day,Hour,Vehicle_Count,Avg_Speed,Weather,Accidents,Congestion_index,Rush_hour
0,Monday,6,120,55.2,Clear,0,2.173913,0
1,Monday,8,250,42.5,Clear,1,5.882353,1
2,Monday,12,180,48.3,Clear,0,3.726708,0
3,Monday,18,300,35.4,Rainy,2,8.474576,1
4,Tuesday,6,130,53.1,Foggy,0,2.448211,0


In [36]:
# Checking the day type (Weekday/Weekend)
df['Weekend?'] = df['Day'].isin(['Saturday','Sunday']).astype(int)
df.tail(10)

Unnamed: 0,Day,Hour,Vehicle_Count,Avg_Speed,Weather,Accidents,Congestion_index,Rush_hour,Weekend?
16,Friday,6,150,50.8,Foggy,0,2.952756,0,0
17,Friday,8,280,37.9,Clear,2,7.387863,1,0
18,Friday,12,230,45.0,Rainy,0,5.111111,0,0
19,Friday,18,350,32.4,Rainy,3,10.802469,1,0
20,Saturday,10,220,44.3,Clear,0,4.96614,0,1
21,Saturday,14,270,43.1,Clear,0,6.264501,0,1
22,Saturday,18,310,39.5,Clear,1,7.848101,1,1
23,Sunday,10,180,46.7,Foggy,0,3.85439,0,1
24,Sunday,14,210,45.2,Clear,0,4.646018,0,1
25,Sunday,18,250,40.1,Rainy,1,6.234414,1,1


In [None]:
# Aggregations for plotting
traffic_by_hour = df.groupby('Hour')['Vehicle_Count'].mean().sort_index()
speed_by_weather = df.groupby('Weather')['Avg_Speed'].mean().sort_index()
accidents_by_day = df.groupby('Day')['Accidents'].mean().sort_index()