In [14]:
# Mount Google Drive to access the data file
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Import Required Libraries

In [15]:
import pandas as pd


Load the datasets

In [16]:
traffic_data_path = r"/content/drive/MyDrive/Predicting City Traffic Flow Based on Weather and Events/cleaned_traffic_weather_dataset.csv"
weather_data_path = r"/content/drive/MyDrive/Predicting City Traffic Flow Based on Weather and Events/weather.csv"

traffic_df = pd.read_csv(traffic_data_path)
weather_df = pd.read_csv(weather_data_path)

Parse datetime columns for aggregation

In [17]:
traffic_df['date'] = pd.to_datetime(traffic_df['updated_at_30min']).dt.date
weather_df['date'] = pd.to_datetime(weather_df['date']).dt.date

Aggregate traffic data to daily level

In [18]:
traffic_daily = traffic_df.groupby('date').agg({
    'velocity': ['mean', 'median', 'std'],
    'long': 'first',
    'lat': 'first',
    'street_level': 'first',
    'e_node_id': 'first',
    'length': 'mean',
    'street_type': 'first'
}).reset_index()


Rename columns for better readability

In [19]:
traffic_daily.columns = ['date', 'mean_velocity', 'median_velocity', 'std_velocity',
                         'long', 'lat', 'street_level', 'e_node_id', 'length', 'street_type']


 Merge with weather data

In [20]:
merged_df = pd.merge(weather_df, traffic_daily, on='date', how='inner')

Drop rows with missing values after merge

In [21]:
merged_df = merged_df.dropna()

Missing Values

In [22]:
missing_values = merged_df.isnull().sum()
print("missing values:\n",missing_values)

missing values:
 province           0
max                0
min                0
wind               0
wind_d             0
rain               0
humidi             0
cloud              0
pressure           0
date               0
mean_velocity      0
median_velocity    0
std_velocity       0
long               0
lat                0
street_level       0
e_node_id          0
length             0
street_type        0
dtype: int64


Save the merged dataframe to a new CSV file

In [23]:
output_path = r"/content/drive/MyDrive/Predicting City Traffic Flow Based on Weather and Events/daily_traffic_weather.csv"
merged_df.to_csv(output_path, index=False)

print(f"Merged dataset saved to: {output_path}")

Merged dataset saved to: /content/drive/MyDrive/Predicting City Traffic Flow Based on Weather and Events/daily_traffic_weather.csv
