# Importing libraries


In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib
df = pd.read_csv("traffic_data.csv")  # Replace with your dataset
print(df.head())
print(df.info())


  holiday    temp  rain_1h  snow_1h  clouds_all weather_main  \
0     NaN  288.28      0.0      0.0          40       Clouds   
1     NaN  289.36      0.0      0.0          75       Clouds   
2     NaN  289.58      0.0      0.0          90       Clouds   
3     NaN  290.13      0.0      0.0          90       Clouds   
4     NaN  291.14      0.0      0.0          75       Clouds   

  weather_description            date_time  traffic_volume  
0    scattered clouds  2012-10-02 09:00:00            5545  
1       broken clouds  2012-10-02 10:00:00            4516  
2     overcast clouds  2012-10-02 11:00:00            4767  
3     overcast clouds  2012-10-02 12:00:00            5026  
4       broken clouds  2012-10-02 13:00:00            4918  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48204 entries, 0 to 48203
Data columns (total 9 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   holiday              61 non-null    

# Feature Engineering 


In [5]:
df['date_time'] = pd.to_datetime(df['date_time'])
df['hour'] = df['date_time'].dt.hour
df['day'] = df['date_time'].dt.day
df['month'] = df['date_time'].dt.month
df['day_of_week'] = df['date_time'].dt.dayofweek
df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype(int)
df = df.drop(['date_time'], axis=1)


In [4]:
print(df.columns)


Index(['holiday', 'temp', 'rain_1h', 'snow_1h', 'clouds_all', 'weather_main',
       'weather_description', 'date_time', 'traffic_volume'],
      dtype='object')


# prepare data for training

In [6]:
from sklearn.model_selection import train_test_split
y = df['traffic_volume']
X = df.drop('traffic_volume', axis=1))
X = pd.get_dummies(X, drop_first=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Train machine learning model

In [7]:
from sklearn.ensemble import RandomForestRegressor
import joblib
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
joblib.dump(model, "traffic_model.pkl")


['traffic_model.pkl']

# Evaluate the model

In [8]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np
y_pred = model.predict(X_test)
print("MAE:", mean_absolute_error(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("R² Score:", r2_score(y_test, y_pred))


MAE: 220.84919302976868
RMSE: 391.72127090747983
R² Score: 0.9611875482938723


# Real-time simulation

In [10]:

import joblib

joblib.dump(X_train.columns.tolist(), "feature_columns.pkl")


['feature_columns.pkl']

# Prediction simulation

In [12]:
import joblib
import pandas as pd
import numpy as np
model = joblib.load("traffic_model.pkl")
feature_cols = joblib.load("feature_columns.pkl")
input_df = pd.DataFrame(columns=feature_cols)
input_df.loc[0] = [0] * len(feature_cols)
input_values = {
    'temp': 290.1,
    'rain_1h': 0.0,
    'snow_1h': 0.0,
    'clouds_all': 20,
    'hour': 8,
    'day': 30,
    'month': 6,
    'day_of_week': 6,
    'is_weekend': 1,
    'holiday_Independence Day': 1,
    'weather_main_Clear': 1,
}
for key, value in input_values.items():
    if key in input_df.columns:
        input_df.at[0, key] = input_df[key].dtype.type(value)
pred = model.predict(input_df)
print("✅ Predicted Traffic Volume:", int(pred[0]))


✅ Predicted Traffic Volume: 1952
