# Synthetic Data Generation for Hybrid Energy System Fault Analysis

This notebook generates synthetic data for a hybrid energy system in Kenya, including:
- Solar PV System (500 kW)
- Diesel Generator (2 MVA)
- Battery Storage (1 MWh)
- Grid Connection

The data includes normal operations and various fault scenarios.

In [None]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

# Set random seed for reproducibility
np.random.seed(42)

# Plotting style
plt.style.use('seaborn')
sns.set_palette('husl')

## 1. Generate Time Series Base

In [None]:
def create_time_index(start_date='2023-01-01', periods_years=2):
    """
    Create hourly datetime index with seasonal information.
    """
    # Generate datetime index
    hours = periods_years * 365 * 24
    dates = pd.date_range(start=start_date, periods=hours, freq='H')
    
    # Create DataFrame
    df = pd.DataFrame(index=dates)
    
    # Add temporal features
    df['hour'] = df.index.hour
    df['day_of_year'] = df.index.dayofyear
    
    # Add cyclical encoding
    df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
    df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)
    df['day_sin'] = np.sin(2 * np.pi * df['day_of_year'] / 365)
    df['day_cos'] = np.cos(2 * np.pi * df['day_of_year'] / 365)
    
    # Add Kenyan seasons
    def get_season(date):
        month = date.month
        if 3 <= month <= 5:
            return 'long_rains'
        elif 10 <= month <= 12:
            return 'short_rains'
        else:
            return 'dry'
    
    df['season'] = df.index.map(get_season)
    
    return df

# Create base dataframe
df = create_time_index()
print(f"Generated time series with {len(df)} hours")
df.head()