In [2]:
import pandas as pd
import streamlit as st
import altair as alt
import numpy as np

df = pd.read_csv("copenhagen_traffic_raw.csv")
df['timestamp'] = pd.to_datetime(df['timestamp'])




In [3]:
# --- Area Simulation ---
areas = ['City Center', 'Østerbro', 'Nørrebro', 'Vesterbro', 'Amager', 'Frederiksberg']

# Optional: Weighted probabilities (City Center is busier)
area_weights = [0.3, 0.15, 0.15, 0.15, 0.15, 0.1]

df['area'] = np.random.choice(areas, size=len(df), p=area_weights)


In [9]:
import numpy as np

# Example values for weather
weather_values = ['sunny', 'cloudy', 'rainy']

# Randomly assign weather to each row
df['weather'] = np.random.choice(weather_values, size=len(df))


In [10]:
df.columns


Index(['timestamp', 'vehicle_count', 'bike_count', 'bus_count', 'hour',
       'dayofweek', 'area', 'weather'],
      dtype='object')

In [11]:
datetime_col = 'timestamp'

# Final cleanup
df = df[[datetime_col, 'area', 'vehicle_count', 'bike_count', 'bus_count', 'weather']]


In [13]:
# Final cleanup
df = df[[datetime_col, 'area', 'vehicle_count', 'bike_count', 'bus_count', 'weather']]

# Save to CSV
df.to_csv("copenhagen_traffic_raw.csv", index=False)

print("✅ Simulation complete — area, traffic, and weather data saved to 'copenhagen_traffic_raw.csv'")


✅ Simulation complete — area, traffic, and weather data saved to 'copenhagen_traffic_raw.csv'


In [None]:
df.head()

Unnamed: 0,timestamp,area,vehicle_count,bike_count,bus_count,weather
0,2025-06-02 00:00:00,Amager,18,17,11,rain
1,2025-06-02 01:00:00,City Center,23,0,14,snow
2,2025-06-02 02:00:00,City Center,24,11,7,rain
3,2025-06-02 03:00:00,Vesterbro,27,10,12,snow
4,2025-06-02 04:00:00,City Center,10,0,13,snow


In [14]:
df.to_csv("copenhagen_traffic_raw.csv", index=False)
print("✅ Data saved successfully.")


✅ Data saved successfully.


 #Updated Traffic Simulation Code with Area Weights
#Replace your vehicle, bike, and bus simulation functions with the following updated versions:

In [15]:
# --- Area traffic multipliers ---
area_multipliers = {
    'City Center':    {'vehicle': 1.3, 'bike': 1.0, 'bus': 1.3},
    'Østerbro':       {'vehicle': 1.0, 'bike': 1.3, 'bus': 0.7},
    'Nørrebro':       {'vehicle': 1.0, 'bike': 1.5, 'bus': 1.0},
    'Vesterbro':      {'vehicle': 1.2, 'bike': 1.0, 'bus': 1.0},
    'Amager':         {'vehicle': 1.0, 'bike': 0.7, 'bus': 1.3},
    'Frederiksberg':  {'vehicle': 1.0, 'bike': 1.3, 'bus': 0.8}
}

# --- Vehicle Count Simulation (area-aware) ---
def simulate_vehicles(row):
    hour = row['hour']
    day = row['dayofweek']
    weather = row['weather']
    area = row['area']
    
    # Base pattern by hour
    if 6 <= hour <= 9 or 15 <= hour <= 18:
        base = 200
    elif 10 <= hour <= 14:
        base = 120
    elif 19 <= hour <= 22:
        base = 80
    else:
        base = 30

    # Weekend adjustment
    if day >= 5:
        base *= 0.7

    # Weather adjustment
    if weather == 'rain':
        base *= 0.8
    elif weather == 'snow':
        base *= 0.6
    elif weather == 'fog':
        base *= 0.9

    # Area adjustment
    base *= area_multipliers[area]['vehicle']

    return max(0, int(base + np.random.normal(0, 10)))

# --- Bike Count Simulation (area-aware) ---
def simulate_bikes(row):
    hour = row['hour']
    day = row['dayofweek']
    weather = row['weather']
    area = row['area']
    
    if 6 <= hour <= 9 or 16 <= hour <= 18:
        base = 100
    elif 10 <= hour <= 15:
        base = 60
    else:
        base = 15

    if day >= 5:
        base *= 0.6

    if weather == 'rain':
        base *= 0.5
    elif weather == 'snow':
        base *= 0.3
    elif weather == 'fog':
        base *= 0.7

    base *= area_multipliers[area]['bike']

    return max(0, int(base + np.random.normal(0, 8)))

# --- Bus Count Simulation (area-aware) ---
def simulate_buses(row):
    hour = row['hour']
    day = row['dayofweek']
    area = row['area']

    if 6 <= hour <= 9 or 15 <= hour <= 18:
        base = 50
    elif 10 <= hour <= 22:
        base = 30
    else:
        base = 10

    if day >= 5:
        base *= 0.8

    base *= area_multipliers[area]['bus']

    return max(0, int(base + np.random.normal(0, 3)))


In [16]:
# Ensure timestamp is in datetime format
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Extract hour and day of week
df['hour'] = df['timestamp'].dt.hour
df['dayofweek'] = df['timestamp'].dt.dayofweek  # Monday = 0, Sunday = 6


In [17]:
df['vehicle_count'] = df.apply(simulate_vehicles, axis=1)
df['bike_count'] = df.apply(simulate_bikes, axis=1)
df['bus_count'] = df.apply(simulate_buses, axis=1)


In [18]:
df.head()

Unnamed: 0,timestamp,area,vehicle_count,bike_count,bus_count,weather,hour,dayofweek
0,2025-06-02 00:00:00,City Center,37,12,15,rainy,0,0
1,2025-06-02 01:00:00,Vesterbro,43,27,9,rainy,1,0
2,2025-06-02 02:00:00,Østerbro,32,24,4,sunny,2,0
3,2025-06-02 03:00:00,Østerbro,16,12,4,cloudy,3,0
4,2025-06-02 04:00:00,Frederiksberg,35,18,12,sunny,4,0


In [19]:
# ✅ Final cleanup (drop temp columns if you want to hide them)
# Optionally keep 'hour' and 'dayofweek' for analysis
df.to_csv("copenhagen_traffic_raw.csv", index=False)
print("✅ Dataset saved to copenhagen_traffic_raw.csv")


✅ Dataset saved to copenhagen_traffic_raw.csv


In [22]:
weather_counts = df.value_counts().reset_index()

weather_counts

Unnamed: 0,timestamp,area,vehicle_count,bike_count,bus_count,weather,hour,dayofweek,count
0,2025-06-02 00:00:00,City Center,37,12,15,rainy,0,0,1
1,2025-06-06 19:00:00,City Center,100,4,44,rainy,19,4,1
2,2025-06-06 11:00:00,Vesterbro,153,54,31,rainy,11,4,1
3,2025-06-06 12:00:00,Nørrebro,120,94,29,rainy,12,4,1
4,2025-06-06 13:00:00,Vesterbro,138,67,27,cloudy,13,4,1
...,...,...,...,...,...,...,...,...,...
163,2025-06-04 09:00:00,Østerbro,193,137,35,sunny,9,2,1
164,2025-06-04 10:00:00,City Center,143,45,39,rainy,10,2,1
165,2025-06-04 11:00:00,Frederiksberg,114,60,25,rainy,11,2,1
166,2025-06-04 12:00:00,Frederiksberg,106,75,25,sunny,12,2,1


In [25]:
weather_counts = df['weather'].value_counts().reset_index()
weather_counts.columns = ['weather', 'count']
weather_counts



Unnamed: 0,weather,count
0,sunny,66
1,rainy,62
2,cloudy,40


In [30]:
df.set_index('weather', inplace=True)


In [31]:
# ✅ Final cleanup (drop temp columns if you want to hide them)
# Optionally keep 'hour' and 'dayofweek' for analysis
df.to_csv("copenhagen_traffic_raw.csv", index=False)
print("✅ Dataset saved to copenhagen_traffic_raw.csv")

✅ Dataset saved to copenhagen_traffic_raw.csv


In [34]:
import pandas as pd
import random

# Load your dataset
df = pd.read_csv("copenhagen_traffic_raw.csv")

# Define area options
areas = ["City Center", "Amager", "Nørrebro", "Vesterbro", "Østerbro"]

# Assign random area to each row
df["area"] = [random.choice(areas) for _ in range(len(df))]

# Move 'area' next to 'timestamp' for readability
columns = ['timestamp', 'area'] + [col for col in df.columns if col not in ['timestamp', 'area']]
df = df[columns]

# Save updated dataset
df.to_csv("copenhagen_traffic_raw.csv", index=False)

print("✅ 'area' column added and dataset updated.")


✅ 'area' column added and dataset updated.


In [35]:
df.head(2)

Unnamed: 0,timestamp,area,vehicle_count,bike_count,bus_count,weather
0,2025-06-02 00:00:00,City Center,18,17,11,rain
1,2025-06-02 01:00:00,City Center,23,0,14,snow


In [33]:
df.columns


Index(['timestamp', 'vehicle_count', 'bike_count', 'bus_count', 'hour',
       'dayofweek'],
      dtype='object')