In [1]:
import pandas as pd
import numpy as np

def generate_csv():
    # 1. Define the Date Range (Same as the project requirement)
    start_date = '2016-05-09'
    end_date = '2019-12-03'
    dates = pd.date_range(start=start_date, end=end_date, freq='D')
    n = len(dates)

    # 2. Generate Data (Linear Trend + Seasonality + Noise)
    # We start with a base trend (growth over time)
    # Start around 2,000 views, grow to 60,000 views
    trend = np.linspace(2000, 60000, n)
    
    # Add some random noise (standard deviation of 4000)
    noise = np.random.normal(loc=0, scale=4000, size=n)
    
    # Add some outliers (to test the cleaning logic)
    # We randomly pick 20 days and multiply traffic by 3
    outlier_indices = np.random.choice(n, 20, replace=False)
    
    values = trend + noise
    
    # Apply outliers
    values[outlier_indices] = values[outlier_indices] * 3

    # Ensure no negative page views
    values = np.maximum(values, 0).astype(int)

    # 3. Create DataFrame
d    df = pd.DataFrame({
        'date': dates,
        'value': values
    })

    # 4. Save to CSV
    # The index=False is crucial so we don't get an extra comma at the start
    df.to_csv('fcc-forum-pageviews.csv', index=False)
    print(f"Successfully generated 'fcc-forum-pageviews.csv' with {n} rows.")

if __name__ == "__main__":
    generate_csv()

Successfully generated 'fcc-forum-pageviews.csv' with 1304 rows.
