# Traffic-based Route Guidance Solution

## Data Preprocessing & Analyzing

In [None]:
import pandas as pd
import numpy as nm

1. Load the VicRoads Boroondara dataset (.csv)

2. Clean and preprocess:
    * Convert timestamps

    * Handle missing values

    * Normalize/scale traffic flow values

3. Reshape for time-series forecasting (e.g., sequences of past 1-2 hours to predict next 15-min slot)

### Import the Dataset for analyze

In [None]:
df = pd.read_csv('../data/raw/Scats Data October 2006.csv')
df

Unnamed: 0,SCATS Number,Location,CD_MELWAY,NB_LATITUDE,NB_LONGITUDE,HF VicRoads Internal,VR Internal Stat,VR Internal Loc,NB_TYPE_SURVEY,Date,...,V89,V90,V91,V92,V93,V94,V95,Unnamed: 106,Unnamed: 107,Unnamed: 108
0,970,WARRIGAL_RD N of HIGH STREET_RD,060 G10,-37.86703,145.09159,249,182,1,1,1/10/2006,...,66,81,50,59,47,29,34,,,
1,970,WARRIGAL_RD N of HIGH STREET_RD,060 G10,-37.86703,145.09159,249,182,1,1,2/10/2006,...,114,80,60,62,48,44,26,,,
2,970,WARRIGAL_RD N of HIGH STREET_RD,060 G10,-37.86703,145.09159,249,182,1,1,3/10/2006,...,86,93,90,73,57,29,40,,,
3,970,WARRIGAL_RD N of HIGH STREET_RD,060 G10,-37.86703,145.09159,249,182,1,1,4/10/2006,...,101,113,90,78,66,52,44,,,
4,970,WARRIGAL_RD N of HIGH STREET_RD,060 G10,-37.86703,145.09159,249,182,1,1,5/10/2006,...,113,99,91,61,55,49,36,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4187,4821,VICTORIA_ST W OF BURNLEY_ST,002HF02,-37.81296,145.00830,6673,1513,7,1,27/10/2006,...,103,122,124,117,99,108,88,,,
4188,4821,VICTORIA_ST W OF BURNLEY_ST,002HF02,-37.81296,145.00830,6673,1513,7,1,28/10/2006,...,105,105,112,82,97,106,107,,,
4189,4821,VICTORIA_ST W OF BURNLEY_ST,002HF02,-37.81296,145.00830,6673,1513,7,1,29/10/2006,...,76,66,64,77,60,49,45,,,
4190,4821,VICTORIA_ST W OF BURNLEY_ST,002HF02,-37.81296,145.00830,6673,1513,7,1,30/10/2006,...,80,74,48,67,62,50,62,,,


In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Load the dataframe
df = pd.read_csv('https://github.com/yasirulokesha/IntroToAI-Assignment-2B/blob/main/TBRGS/data/raw/Scats%20Data%20October%202006.csv')

# Assuming your dataframe 'df' is already loaded and preprocessed as in your previous code

# Feature Engineering (Example: Create time features)
df['Timestamp'] = pd.to_datetime(df['TIMESTAMP'])
df['Hour'] = df['Timestamp'].dt.hour
df['DayOfWeek'] = df['Timestamp'].dt.dayofweek
df['Month'] = df['Timestamp'].dt.month


# Select relevant features for prediction (adjust as needed)
features = ['Volume', 'Hour', 'DayOfWeek', 'Month', 'Detector ID'] # Example features
target = 'Volume'  # Target variable (traffic volume)


# One-hot encode categorical features (if needed)
df = pd.get_dummies(df, columns=['Detector ID'], prefix='Detector')


# Normalize/Scale Numerical Features (important for neural networks)
scaler = MinMaxScaler()
numerical_features = ['Volume', 'Hour', 'DayOfWeek', 'Month'] # Features to scale
df[numerical_features] = scaler.fit_transform(df[numerical_features])


# Prepare the data for time series prediction (Example: sequences of past 2 hours to predict next 15 minutes)
def create_sequences(data, seq_length):
    xs = []
    ys = []
    for i in range(len(data) - seq_length - 1):
        x = data[i:(i + seq_length)]
        y = data[i + seq_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)


seq_length = 2 # Example sequence length (adjust as needed)
X, y = create_sequences(df[features].values, seq_length)


# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Build and train an LSTM model (example)
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.fit(X_train, y_train[:, 0], epochs=50, batch_size=32, validation_split = 0.1, verbose = 1) # Adjust epochs and batch size


# Prediction Function
def predict_traffic(model, data, origin, destination, timestamp):
    # Preprocess the input (e.g., convert timestamp to features like Hour, DayOfWeek)
    # One-hot encode categorical features (e.g. Origin and Destination)
    # Scale input data
    # Reshape the data into a sequence of length 'seq_length'
    # Use the model to make a prediction
    prediction = model.predict(data)[0][0]  # Extract prediction
    prediction = scaler.inverse_transform([[prediction, 0, 0, 0]])[0][0] # Inverse transform prediction to original scale
    return prediction

# Example usage (replace with actual data)
# Input the origin, destination, date, and time
# Preprocess the input data
# Predict the traffic volume for that timestamp
# Output the prediction



ParserError: Error tokenizing data. C error: Expected 1 fields in line 42, saw 48
