In [10]:
import pandas as pd

traffic_logs = [
"id:501,zone:A1,vehicle:Car,speed:62,time:08:30,violations:[None],status:Smooth",
"id:502,zone:A1,vehicle:Bike,speed:85,time:09:10,violations:[Helmet],status:Busy",
"id:503,zone:B2,vehicle:Bus,speed:45,time:17:25,violations:[None],status:Smooth",
"id:504,zone:C3,vehicle:Car,speed:110,time:14:15,violations:[Overspeed],status:Congested",
"id:505,zone:A1,vehicle:Truck,speed:40,time:18:50,violations:[None],status:Smooth"
]

def parse_log(log_str):
    record = {}
    parts = log_str.split(",")

    for part in parts:
        key, value = part.split(":", 1)
        key = key.strip()
        value = value.strip()

        if key == "id":
            record[key] = int(value)
        elif key == "speed":
            record[key] = int(value)
        elif key == "violations":
            v = value.strip("[]")
            record[key] = [] if v == "None" else [v]
        else:
            record[key] = value

    return record

parsed_logs = [parse_log(entry) for entry in traffic_logs]


Convert each traffic log into structured Python data with validation.

In [36]:
df = pd.DataFrame(parsed_logs)
df

Unnamed: 0,id,zone,vehicle,speed,time,violations,status
0,501,A1,Car,62,08:30,[],Smooth
1,502,A1,Bike,85,09:10,[Helmet],Busy
2,503,B2,Bus,45,17:25,[],Smooth
3,504,C3,Car,110,14:15,[Overspeed],Congested
4,505,A1,Truck,40,18:50,[],Smooth


Calculate average speed per zone.

In [39]:
avg_speed_per_zone = df.groupby("zone")["speed"].mean()
avg_speed_per_zone

zone
A1     62.333333
B2     45.000000
C3    110.000000
Name: speed, dtype: float64

Determine peak hour (hour with highest traffic entries).

In [13]:
import pandas as pd

df['hour'] = df['time'].str.split(':').str[0].astype(int)

# Count number of entries per hour
entries_per_hour = df.groupby('hour').size()

# Find the hour with the maximum entries
peak_hour = entries_per_hour.idxmax()
max_entries = entries_per_hour.max()

print(f"Peak hour: {peak_hour}:00 with {max_entries} entries.")


Peak hour: 8:00 with 1 entries.


Find vehicles with speed > 80 km/h.

In [22]:
fast_vehicles = df[df['speed'] > 80]
fast_vehicles

Unnamed: 0,id,zone,vehicle,speed,time,violations,status,hour
1,502,A1,Bike,85,09:10,[Helmet],Busy,9
3,504,C3,Car,110,14:15,[Overspeed],Congested,14


Count occurrences of each violation type.

In [30]:
count=df['violations'].value_counts()
count

violations
[]             3
[Helmet]       1
[Overspeed]    1
Name: count, dtype: int64

Compute safety index for each zone.

In [38]:
df_exploded = df.explode('violations')

violations_per_zone = df_exploded[df_exploded['violations'].notna()].groupby('zone').size()
total_vehicles_per_zone = df.groupby('zone').size()
safety_index = 1 - (violations_per_zone / total_vehicles_per_zone)
safety_index = safety_index.fillna(1)
safety_index

zone
A1    0.666667
B2    1.000000
C3    0.000000
dtype: float64

Create a summary for each vehicle category.

In [41]:
df_exploded = df.explode('violations')

summary = df_exploded.groupby('vehicle').agg(
    total_vehicles=('id', 'count'),                   # number of vehicles
    avg_speed=('speed', 'mean'),                       # average speed
    total_violations=('violations', lambda x: x.notna().sum())  # count violations
).reset_index()

summary

Unnamed: 0,vehicle,total_vehicles,avg_speed,total_violations
0,Bike,1,85.0,1
1,Bus,1,45.0,0
2,Car,2,86.0,1
3,Truck,1,40.0,0


Identify high-congestion zones.

In [47]:
high_congestion = df[df['status'].isin(['Busy', 'Congested'])]

# Count number of congested vehicles per zone
congestion_count = high_congestion.groupby('zone').size()

congestion_count

zone
A1    1
C3    1
dtype: int64

Classify each log into time windows (Morning, Afternoon, Evening, Night).

In [49]:
df['hour'] = df['time'].str.split(':').str[0].astype(int)
def time_window(hour):
    if 6 <= hour < 12:
        return 'Morning'
    elif 12 <= hour < 18:
        return 'Afternoon'
    elif 18 <= hour < 22:
        return 'Evening'
    else:
        return 'Night'

df['time_window'] = df['hour'].apply(time_window)
df[['time', 'hour', 'time_window']]

Unnamed: 0,time,hour,time_window
0,08:30,8,Morning
1,09:10,9,Morning
2,17:25,17,Afternoon
3,14:15,14,Afternoon
4,18:50,18,Evening


Generate final zone-level report (vehicles, avg speed, violations, common vehicle type, safety
category).

In [51]:
total_vehicles = df.groupby('zone')['id'].count()

# Average speed per zone
avg_speed = df.groupby('zone')['speed'].mean()

# Total violations per zone
total_violations = df.explode('violations')
total_violations = total_violations.groupby('zone')['violations'].apply(lambda x: x.notna().sum())

# Most common vehicle type per zone
common_vehicle = df.groupby('zone')['vehicle'].agg(lambda x: x.mode()[0])

Safety index 
safety_index = df.explode('violations').groupby('zone').apply(
    lambda x: 1 - x['violations'].notna().sum() / len(x)
)

def safety_category(index):
    if index >= 0.75:
        return 'High'
    elif index >= 0.5:
        return 'Medium'
    else:
        return 'Low'

safety_cat = safety_index.apply(safety_category)

zone_report = pd.DataFrame({
    'total_vehicles': total_vehicles,
    'avg_speed': avg_speed,
    'total_violations': total_violations,
    'common_vehicle': common_vehicle,
    'safety_category': safety_cat
}).reset_index()

zone_report


  safety_index = df.explode('violations').groupby('zone').apply(


Unnamed: 0,zone,total_vehicles,avg_speed,total_violations,common_vehicle,safety_category
0,A1,3,62.333333,1,Bike,Medium
1,B2,1,45.0,0,Bus,High
2,C3,1,110.0,1,Car,Low
