In [1]:
import pandas as pd
import numpy as np

# Configuration
num_entries = 10000
start_year = 1880

# 1. Generate Years
# We simply extend the years from 1880 to 11880 to get 10k unique X-axis points
years = np.arange(start_year, start_year + num_entries)

# 2. Simulate Sea Level Data (Inches)
# Logic: Base linear rise + slight acceleration (quadratic) + random noise
# This ensures the "Since 2000" slope is steeper than the "All time" slope.
linear_trend = 0.06 * (years - start_year) 
acceleration = 0.000005 * (years - start_year)**2
noise = np.random.normal(0, 0.4, num_entries) # Random variation

sea_levels = linear_trend + acceleration + noise

# 3. Create DataFrame
# We include the extra columns found in the real EPA dataset 
# so your code doesn't break if you try to inspect other columns.
df = pd.DataFrame({
    'Year': years,
    'CSIRO Adjusted Sea Level': sea_levels,
    'Lower Error Bound': sea_levels - 0.2,
    'Upper Error Bound': sea_levels + 0.2,
    'NOAA Adjusted Sea Level': sea_levels + np.random.normal(0, 0.1, num_entries)
})

# 4. Save to CSV
output_filename = 'epa-sea-level.csv'
df.to_csv(output_filename, index=False)

print(f"Successfully created '{output_filename}' with {len(df)} entries.")
print(f"Head of data:\n{df.head()}")

Successfully created 'epa-sea-level.csv' with 10000 entries.
Head of data:
   Year  CSIRO Adjusted Sea Level  Lower Error Bound  Upper Error Bound  \
0  1880                 -0.196003          -0.396003           0.003997   
1  1881                  0.223857           0.023857           0.423857   
2  1882                  0.291349           0.091349           0.491349   
3  1883                  0.778310           0.578310           0.978310   
4  1884                 -0.226569          -0.426569          -0.026569   

   NOAA Adjusted Sea Level  
0                -0.167225  
1                 0.366856  
2                 0.159428  
3                 0.669158  
4                -0.138758  
