In [None]:
import pandas as pd
import numpy as np

def generate_energy_dataset(start="2021-01-01", end="2021-12-31", freq="H", filename="energy.csv"):
    """
    Generate a synthetic energy usage dataset with temperature, humidity, appliances, and usage.
    
    Parameters:
        start (str): start date (YYYY-MM-DD)
        end (str): end date (YYYY-MM-DD)
        freq (str): frequency of data ('H' for hourly, 'D' for daily)
        filename (str): output CSV file name
    """
    np.random.seed(42)
    date_range = pd.date_range(start=start, end=end, freq=freq)
    n = len(date_range)

    # Generate synthetic features
    temperature = np.random.normal(20, 5, n)      # around 20°C
    humidity = np.random.normal(50, 10, n)        # around 50%
    appliances = np.random.randint(1, 10, n)      # 1–9 appliances running

    # Energy usage depends on appliances, temperature, and noise
    usage = (appliances * 2.5 +
             (25 - temperature) * 1.2 +
             np.random.normal(0, 2, n))

    # Create DataFrame
    df = pd.DataFrame({
        "date": date_range,
        "temperature": temperature,
        "humidity": humidity,
        "appliances": appliances,
        "usage": usage
    })

    # Save CSV
    df.to_csv(filename, index=False)
    print(f"Dataset generated: {filename} ({len(df)} rows)")
    return df

# Example: generate 1 year of hourly data
df = generate_energy_dataset(start="2021-01-01", end="2021-12-31", freq="H", filename="energy.csv")

print(df.head())
