<a href="https://colab.research.google.com/github/unbeatable951/Street-Vendor-Sales-Pattern-Visualizer/blob/main/Street_Vendor_Sales_Pattern_Visualizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import folium
from folium.plugins import MarkerCluster


# Load the Dataset

In [None]:
df = pd.read_csv("/content/street_vendor_data.csv")


# Remove Duplicate Timestamps (per location)

In [None]:
df['timestamp'] = pd.to_datetime(df['timestamp'], format="%Y-%m-%d %I:%M %p")


In [None]:
df = df.drop_duplicates(subset=['timestamp', 'location'], keep='first')


In [None]:
df['timestamp'] = pd.to_datetime(df['timestamp'], format="%Y-%m-%d %I:%M %p")
df['hour'] = df['timestamp'].dt.hour


In [None]:
df.head()

Unnamed: 0,timestamp,location,item_sold,price,weather,footfall,payment_type,hour
0,2024-07-16 18:55:00,"Colaba, Mumbai",Samosa,15.0,Cloudy,27,UPI,18
1,2024-07-16 10:58:00,"Colaba, Mumbai",Dosa,45.0,Rainy,16,UPI,10
2,2024-07-16 16:18:00,"Park Street, Kolkata",Vada Pav,12.0,Partly Cloudy,24,UPI,16
3,2024-07-16 14:15:00,"FC Road, Pune",Momo,50.0,Cloudy,20,Card,14
4,2024-07-16 10:31:00,"Colaba, Mumbai",Samosa,15.0,Rainy,27,Cash,10


# Normalize item_sold Names

In [None]:
item_map = {
    "samosa": "Samosa",
    "vada pav": "Vada Pav",
    "momomo": "Momo",
    "momo": "Momo",
    "pani puri": "Pani Puri",
    "idli": "Idli",
    "chaat": "Chaat",
    "dosa": "Dosa"
}

df['item_sold'] = df['item_sold'].str.lower().str.strip().map(item_map)


# Clean Currency Symbols in price

In [None]:
df['price'] = df['price'].replace('[₹]', '', regex=True).astype(float)


# Save the Cleaned Dataset

In [None]:
df.to_csv("street_vendor_data_final_cleaned.csv", index=False)
df.to_excel("street_vendor_data_final_cleaned.xlsx", index=False)


# 1. Line Graph – Footfall by Hour

In [None]:
footfall_by_hour = df.groupby('hour')['footfall'].sum().reset_index()

fig = px.line(
    footfall_by_hour,
    x='hour',
    y='footfall',
    markers=True,
    title='Total Footfall by Hour of Day',
    labels={'hour': 'Hour of Day', 'footfall': 'Total Footfall'}
)

fig.show()


#2. Stacked Bar Chart – Items Sold by Location

In [None]:
item_counts = df.groupby(['location', 'item_sold']).size().reset_index(name='count')

fig = px.bar(
    item_counts,
    x='location',
    y='count',
    color='item_sold',
    title='Items Sold by Location (Stacked)',
    labels={'count': 'Number of Items Sold'},
    barmode='stack'
)

fig.show()


# 3. Map Plot – Total Footfall by Location (Folium)

In [None]:
location_coords = {
    "MG Road, Bengaluru": (12.9758, 77.6055),
    "Connaught Place, Delhi": (28.6315, 77.2167),
    "Colaba, Mumbai": (18.9156, 72.8146),
    "Park Street, Kolkata": (22.5535, 88.3528),
    "FC Road, Pune": (18.5167, 73.8415)
}

m = folium.Map(location=[22.9734, 78.6569], zoom_start=5)
marker_cluster = MarkerCluster().add_to(m)

for location, group in df.groupby('location'):
    coords = location_coords.get(location)
    if coords:
        total_footfall = group['footfall'].sum()
        popup_text = f"<b>{location}</b><br>Total Footfall: {total_footfall}"
        folium.Marker(location=coords, popup=popup_text).add_to(marker_cluster)

m


In [None]:
import pickle
from google.colab import files

# Save DataFrame (or any object) as pickle in Colab VM
with open('cleaned_data.pkl', 'wb') as f:
    pickle.dump(df, f)

# Download the pickle file to your local computer
files.download('cleaned_data.pkl')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>