In [None]:
import pandas as pd
import random
import numpy as np

# Set seed for reproducibility
random.seed(42)
np.random.seed(42)

# Constants
num_records = 10400
battery_types = ["Li-ion", "Solid-State", "NiMH"]
fast_charge_ports = ["CCS", "CHAdeMO", "Tesla Supercharger", "Type 2"]
drivetrains = ["FWD", "RWD", "AWD"]
segments = ["Compact", "Sedan", "SUV", "Luxury", "Truck"]
car_body_types = ["Hatchback", "Sedan", "SUV", "Pickup", "Coupe"]
brands = ["Tesla", "BMW", "Hyundai", "Tata", "Ford", "Nissan", "BYD", "Volkswagen", "Kia", "Lucid"]
ev_types = ["BEV", "PHEV", "HEV"]
countries = ["USA", "Germany", "China", "India", "UK", "France", "Japan", "South Korea", "Canada", "Australia"]
cities_by_country = {
    "USA": ["New York", "Los Angeles", "Chicago", "Washington"],
    "Germany": ["Berlin", "Munich", "Frankfurt"],
    "China": ["Beijing", "Shanghai", "Shenzhen"],
    "India": ["Delhi", "Mumbai", "Bangalore", "Chennai"],
    "UK": ["London", "Manchester", "Birmingham"],
    "France": ["Paris", "Lyon", "Marseille"],
    "Japan": ["Tokyo", "Osaka", "Yokohama"],
    "South Korea": ["Seoul", "Busan", "Incheon"],
    "Canada": ["Toronto", "Vancouver", "Montreal"],
    "Australia": ["Sydney", "Melbourne", "Brisbane"]
}
states_by_city = {
    "New York": "NY", "Los Angeles": "CA", "Chicago": "IL", "Washington": "Seattle",
    "Berlin": "Berlin", "Munich": "Bavaria", "Frankfurt": "Hesse",
    "Beijing": "Beijing", "Shanghai": "Shanghai", "Shenzhen": "Guangdong",
    "Delhi": "Delhi", "Mumbai": "Maharashtra", "Bangalore": "Karnataka", "Chennai": "Tamil Nadu",
    "London": "England", "Manchester": "England", "Birmingham": "England",
    "Paris": "Île-de-France", "Lyon": "Auvergne-Rhône-Alpes", "Marseille": "Provence-Alpes-Côte d'Azur",
    "Tokyo": "Tokyo", "Osaka": "Osaka", "Yokohama": "Kanagawa",
    "Seoul": "Seoul", "Busan": "Busan", "Incheon": "Incheon",
    "Toronto": "Ontario", "Vancouver": "British Columbia", "Montreal": "Quebec",
    "Sydney": "New South Wales", "Melbourne": "Victoria", "Brisbane": "Queensland"
}

# Generate synthetic data
data = []
for i in range(num_records):
    country = random.choice(countries)
    city = random.choice(cities_by_country[country])
    state = states_by_city[city]
    record = {
        "brand": random.choice(brands),
        "country": country,
        "state": state,
        "city": city,
        "top_speed_kmh": random.randint(120, 300),
        "battery_capacity_kWh": round(random.uniform(30, 200), 1),
        "battery_type": random.choice(battery_types),
        "efficiency_wh_per_km": round(random.uniform(130, 220), 1),
        "range_km": random.randint(200, 800),
        "fast_charging_power_kw_dc": random.randint(50, 350),
        "fast_charge_port": random.choice(fast_charge_ports),
        "seats": random.choice([2, 4, 5, 6, 7]),
        "drivetrain": random.choice(drivetrains),
        "segment": random.choice(segments),
        "car_body_type": random.choice(car_body_types),
        "electric_vehicle_type": random.choice(ev_types),
        "safety_rating": round(random.uniform(2.0, 5.0), 1),
        "cost": random.randint(70000, 200000),
    }
    data.append(record)

# Save to CSV
df = pd.DataFrame(data)
df.to_csv("Electric Vehicle.csv", index=False)

print("✅ Dataset created: Electric Vehicle.csv")