In [7]:
import pandas as pd
import numpy as np
import pickle
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor

# Load dataset
data_file = "Tuljapur_Updated_Combined.xlsx"
df = pd.read_excel(data_file)

# Convert 'Date' to datetime format
df["Date"] = pd.to_datetime(df["Date"], errors="coerce")

# Extract useful date features
df["Year"] = df["Date"].dt.year
df["Month"] = df["Date"].dt.month
df["Day"] = df["Date"].dt.day
df["DayOfWeek"] = df["Date"].dt.dayofweek

# Define numerical columns to use as features
numeric_cols = ["Temp_High_C", "Temp_Low_C", "Humidity", "Wind_Speed_Miles", "Precipitation", "Functions"]

# Fill missing values with column means
df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].mean())

# Define input (X) and target (y)
X = df[["Year", "Month", "Day", "DayOfWeek"] + numeric_cols]
y = df["Footfall_Crowd"].astype(float)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Random Forest model
model = RandomForestRegressor(n_estimators=50, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
print(f"Model Evaluation: RMSE = {rmse:.3f}, R² = {r2:.3f}")

# Function to predict footfall for a given date
def predict_footfall(date_str):
    date = datetime.strptime(date_str, "%Y-%m-%d")
    features = {
        "Year": date.year,
        "Month": date.month,
        "Day": date.day,
        "DayOfWeek": date.weekday()
    }

    # Use mean values for other features
    for col in numeric_cols:
        features[col] = df[col].mean()

    # Convert to DataFrame
    input_data = pd.DataFrame([features])

    # Predict footfall
    prediction = model.predict(input_data)
    return round(prediction[0])

# Example usage
future_date = "2025-11-05"
predicted_footfall = predict_footfall(future_date)
print(f"Predicted footfall for {future_date}: {predicted_footfall}")


Model Evaluation: RMSE = 0.241, R² = 0.909
Predicted footfall for 2025-11-05: 1


In [3]:
r2 = r2_score(y_test, y_pred)
print(f"Model Evaluation: RMSE = {rmse:.3f}, R² = {r2:.3f}")

Model Evaluation: RMSE = 0.241, R² = 0.909


In [4]:
accuracy = r2 * 100
print(f"Model Accuracy: {accuracy:.2f}%")

Model Accuracy: 90.90%
