<a href="https://colab.research.google.com/github/vignu10/parker-demographics/blob/main/metropolis_demographics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# New Section

In [None]:
!pip install streamlit pyngrok --quiet


In [None]:
from google.colab import files
uploaded = files.upload()
filename = list(uploaded.keys())[0]  # Save uploaded file name

Saving extensive_diverse_parking_data.csv to extensive_diverse_parking_data (1).csv


In [None]:
# 🚀 3. Upload Car Price Dataset
print("Upload your full_car_price_dataset.csv file")
uploaded_price = files.upload()
price_filename = list(uploaded_price.keys())[0]

Upload your full_car_price_dataset.csv file


Saving Full_Car_Price_Dataset.csv to Full_Car_Price_Dataset (1).csv


In [None]:
import pandas as pd
import numpy as np
import os
import time
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

# Read both files
df = pd.read_csv(filename)
car_prices = pd.read_csv(price_filename)

# Merge price and year using make and model
df = df.merge(car_prices, on=["Make", "Model"], how="left")

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import pandas as pd

# --- Segment 1: Vehicle Tier ---
def label_vehicle_segment(p):
    if p >= 40000:
        return "Luxury"
    elif p >= 20000:
        return "Mid-level"
    return "Economy"

df["Vehicle Segment"] = df["Price"].apply(label_vehicle_segment)

# --- Segment 2: Parking Pattern ---
def classify_parking_pattern(row):
    entry = pd.to_datetime(row["Entry Time"])
    exit = pd.to_datetime(row["Exit Time"])
    duration = (exit - entry).total_seconds() / 3600
    validated = row["Validated Amount"]

    if duration <= 3:
        return "Short-Term Validated" if validated > 0 else "Short-Term Non-Validated"
    else:
        return "Long-Term Validated" if validated > 0 else "Long-Term Non-Validated"

df["Parking Pattern"] = df.apply(classify_parking_pattern, axis=1)

# --- Segment 3: Time of Parking ---
def classify_time_segment(row):
    hour = pd.to_datetime(row["Entry Time"]).hour
    if 6 <= hour <= 9:
        return "Morning Rush"
    elif 10 <= hour <= 15:
        return "Midday"
    elif 17 <= hour <= 21:
        return "Evening"
    else:
        return "Late Night"

df["Time Segment"] = df.apply(classify_time_segment, axis=1)

# --- ML-Based Prediction of Vehicle Tier (optional) ---
df["Make_enc"] = LabelEncoder().fit_transform(df["Make"].astype(str))
df["Model_enc"] = LabelEncoder().fit_transform(df["Model"].astype(str))
df["State_enc"] = LabelEncoder().fit_transform(df["State"].astype(str))

X = df[["Make_enc", "Model_enc", "State_enc"]]
y = df["Vehicle Segment"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = DecisionTreeClassifier(max_depth=5)
clf.fit(X_train, y_train)

print("Model accuracy:", clf.score(X_test, y_test))
df["Predicted Segment"] = clf.predict(X)

# Save updated data
df.to_csv("enriched_data.csv", index=False)

Model accuracy: 0.8128195763330899


In [None]:
with open("app.py", "w") as f:
    f.write('''
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt

st.title("🚗 Parked Car Demographics Dashboard")

# Load enriched data
df = pd.read_csv("enriched_data.csv")

# 🚩 Sidebar Multi-select for locations
st.sidebar.header("🔍 Filter Options")
all_locations = sorted(df["Location"].dropna().unique())
selected_locations = st.sidebar.multiselect("📍 Select One or More Locations", all_locations, default=all_locations[:1])

# Filter data for selected locations
filtered_df = df[df["Location"].isin(selected_locations)]

if not selected_locations:
    st.warning("Please select at least one location to view the data.")
else:
    st.subheader("🚘 Combined Vehicle Segment Distribution")
    segment_counts = filtered_df["Predicted Segment"].value_counts()
    fig1, ax1 = plt.subplots()
    ax1.pie(segment_counts, labels=segment_counts.index, autopct='%1.1f%%', startangle=90)
    ax1.axis("equal")
    st.pyplot(fig1)

    # 💰 Histogram: Price Distribution
    st.subheader("💰 Vehicle Price Distribution")
    fig2, ax2 = plt.subplots()
    for loc in selected_locations:
        loc_data = filtered_df[filtered_df["Location"] == loc]["Price"]
        ax2.hist(loc_data, bins=20, alpha=0.6, label=loc)

    ax2.set_xlabel("Price ($)")
    ax2.set_ylabel("Vehicle Count")
    ax2.legend()
    st.pyplot(fig2)

    # ⏱️ Line Chart: Entry Time Hourly Volume
    st.subheader("⏱️ Traffic Volume by Hour")
    filtered_df["Entry Hour"] = pd.to_datetime(filtered_df["Entry Time"]).dt.hour
    hourly = filtered_df.groupby(["Location", "Entry Hour"]).size().unstack(fill_value=0).T
    st.line_chart(hourly)

    # 💸 Bar Chart: Revenue (Total vs Validated)
    st.subheader("💸 Total & Validated Revenue by Location")
    rev = filtered_df.groupby("Location")[["Total Amount", "Validated Amount"]].sum()
    st.bar_chart(rev)
''')


In [None]:
from pyngrok import ngrok
import time
import os

ngrok.kill()
os.system('streamlit run app.py &')
time.sleep(3)

public_url = ngrok.connect(addr="8501", proto="http")
print(f"✅ Your Streamlit app is live at: {public_url}")

✅ Your Streamlit app is live at: NgrokTunnel: "https://382a-35-221-14-105.ngrok-free.app" -> "http://localhost:8501"


# New Section

# LPR Data

In [None]:
import pandas as pd
import numpy as np
lpr_df = pd.read_csv("/content/car_parking_data (1).csv")
lpr_df["Price"] = np.random.choice([18000, 25000, 32000, 42000, 55000], size=len(lpr_df))# Simulate that locations 55141, 42338, and 99755 belong to zone 'Zone-A'
zone_a_ids = ['55141', '42338', '99755']
lpr_df["Location"] = lpr_df["Location"].astype(str)  # Ensure consistent string type
lpr_df.loc[lpr_df["Location"].isin(zone_a_ids), "Location"] = "Zone-A"
lpr_df["Validated Amount"] = lpr_df["Total Amount"] * np.random.uniform(0, 1, len(lpr_df))
lpr_df["Entry Time"] = pd.to_datetime(lpr_df["Entry Time"])
lpr_df["Exit Time"] = lpr_df["Entry Time"] + pd.to_timedelta(np.random.uniform(1, 8, len(lpr_df)), unit="h")
new_path = "tested.csv"
lpr_df = lpr_df.drop(columns=['Year', 'Price'])
lpr_df
lpr_df.to_csv(new_path, index=False)