In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import os
import pickle
from tqdm import tqdm
import shap
from datetime import datetime, timedelta

# Set random seeds for reproducibility
np.random.seed(42)
torch.manual_seed(42)

# Create dedicated directory for LSTM model outputs
model_dir = 'lstm_model'
os.makedirs(f'{model_dir}/plots', exist_ok=True)
os.makedirs(f'{model_dir}/data', exist_ok=True)
os.makedirs(f'{model_dir}/checkpoints', exist_ok=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

print("="*80)
print("IMPROVED SOLAR PV LSTM MODEL - MULTI-LOCATION TRAINING, SINGLE LOCATION PREDICTION (PyTorch)")
print("="*80)

# 1. Load the preprocessed data
print("\n1. Loading preprocessed data...")
train_data = pd.read_csv('processed_data/train_all_predict_one/train_data.csv')
test_data = pd.read_csv('processed_data/train_all_predict_one/test_data.csv')

# Try to load validation data - if it's empty or doesn't exist, we'll create it from training data
try:
    val_data = pd.read_csv('processed_data/train_all_predict_one/val_data.csv')
    if val_data.empty:
        print("Validation data is empty. Will create validation set from training data.")
        create_val_from_train = True
    else:
        print(f"Loaded validation data with {len(val_data)} samples")
        create_val_from_train = False
except Exception as e:
    print(f"Could not load validation data: {e}. Will create validation set from training data.")
    val_data = pd.DataFrame()  # Empty DataFrame
    create_val_from_train = True

# Convert timestamps back to datetime
for df in [train_data, val_data, test_data]:
    if not df.empty:
        if 'LocalTime' in df.columns:
            df['LocalTime'] = pd.to_datetime(df['LocalTime'])
        if 'date' in df.columns:
            df['date'] = pd.to_datetime(df['date'])
        else:
            # Create date column if it doesn't exist
            if 'LocalTime' in df.columns:
                df['date'] = df['LocalTime'].dt.date

# Print dataset info
print(f"Training data shape: {train_data.shape}")
print(f"Validation data shape: {val_data.shape}")
print(f"Test data shape: {test_data.shape}")

# 2. Define target location ID
target_location_id = test_data['location_id'].iloc[0]  # Get from test data
print(f"\nTarget location for prediction: {target_location_id}")

# 3. Feature selection for the LSTM model
print("\n2. Preparing features for LSTM model...")

# Define features to use
weather_features = [
    'Temperature', 'Dewpoint', 'Pressure', 'WindSpeed', 
    'WindDirection', 'GHI', 'DNI', 'DHI', 'Cloud_Coverage', 'Cloud_Type'
]

# Check if all weather features exist in the data
existing_weather_features = [f for f in weather_features if f in train_data.columns]
print(f"Using weather features: {existing_weather_features}")

time_features = [
    'hour_sin', 'hour_cos', 'month_sin', 'month_cos', 
    'dayofyear_sin', 'dayofyear_cos', 'is_daylight'
]
# Keep only time features that exist in the data
time_features = [f for f in time_features if f in train_data.columns]

# Get lag features that exist in the data
lag_features = [col for col in train_data.columns if 'lag' in col]
print(f"Using {len(lag_features)} lag features")

# Combine all features
features = existing_weather_features + time_features + lag_features

# Define target variable
target = 'Power(MW)'
print(f"Target variable: {target}")

# Print list of all features used
print("Full feature list:")
for i, feature in enumerate(features):
    print(f"{i+1}. {feature}")

# 4. Prepare data for LSTM
print("\n3. Preparing data for sequence-based LSTM model...")

# Define sequence length (number of time steps to look back)
sequence_length = 8  # 4 hours (with 30 min intervals)

# If we need to create validation data from training data
if create_val_from_train:
    # Split training data into train and validation
    # We'll use a simple 90/10 split
    train_indices = int(len(train_data) * 0.9)
    # Shuffle indices to get a random split
    indices = np.random.permutation(len(train_data))
    train_idx, val_idx = indices[:train_indices], indices[train_indices:]
    val_data = train_data.iloc[val_idx].copy()
    train_data = train_data.iloc[train_idx].copy()
    print(f"Created validation set with {len(val_data)} samples from training data")
    print(f"New training data size: {len(train_data)}")

# Create feature scalers (fitted on training data only)
feature_scaler = RobustScaler()  # Better for outlier handling
target_scaler = MinMaxScaler(feature_range=(-0.1, 1.1))  # Expanded range for margin

# Fit scalers on training data
feature_scaler.fit(train_data[features])
target_scaler.fit(train_data[[target]])

# Save scalers for later use
with open(f'{model_dir}/data/feature_scaler.pkl', 'wb') as f:
    pickle.dump(feature_scaler, f)
with open(f'{model_dir}/data/target_scaler.pkl', 'wb') as f:
    pickle.dump(target_scaler, f)

# Scale all datasets
train_data_scaled = train_data.copy()
val_data_scaled = val_data.copy()
test_data_scaled = test_data.copy()

train_data_scaled[features] = feature_scaler.transform(train_data[features])
val_data_scaled[features] = feature_scaler.transform(val_data[features])
test_data_scaled[features] = feature_scaler.transform(test_data[features])

train_data_scaled[target] = target_scaler.transform(train_data[[target]]).reshape(-1)
val_data_scaled[target] = target_scaler.transform(val_data[[target]]).reshape(-1)
test_data_scaled[target] = target_scaler.transform(test_data[[target]]).reshape(-1)

# Add a flag for nighttime hours (for forced zero prediction)
train_data_scaled['is_night'] = (~train_data['is_daylight']).astype(int)
val_data_scaled['is_night'] = (~val_data['is_daylight']).astype(int)
test_data_scaled['is_night'] = (~test_data['is_daylight']).astype(int)

# Custom PyTorch Dataset for sequences with day pattern preservation
class ImprovedSolarPowerDataset(Dataset):
    def __init__(self, dataframe, seq_length, features, target, location_id=None):
        self.seq_length = seq_length
        self.features = features
        self.target = target
        
        # Filter by location if needed
        if location_id:
            dataframe = dataframe[dataframe['location_id'] == location_id]
        
        # Sort by location and time
        dataframe = dataframe.sort_values(['location_id', 'LocalTime'])
        
        # Group by location_id
        self.sequences = []
        self.targets = []
        self.night_masks = []  # Track if target time is nighttime
        self.timestamps = []   # Store timestamps for later reference
        
        for loc_id, group in dataframe.groupby('location_id'):
            # Try to further group by day to maintain daily patterns
            try:
                if 'date' in group.columns:
                    day_groups = group.groupby(group['date'])
                else:
                    day_groups = [(None, group)]
                    
                for day, day_group in day_groups:
                    feature_data = day_group[features].values
                    target_data = day_group[target].values
                    night_data = day_group['is_night'].values  # Get nighttime flags
                    
                    # Only create sequences if we have enough data points for this day
                    if len(feature_data) > seq_length:
                        for i in range(len(feature_data) - seq_length):
                            self.sequences.append(feature_data[i:i+seq_length])
                            self.targets.append(target_data[i+seq_length])
                            self.night_masks.append(night_data[i+seq_length])
                            if 'LocalTime' in day_group.columns:
                                self.timestamps.append(day_group['LocalTime'].iloc[i+seq_length])
                            else:
                                self.timestamps.append(None)
            except Exception as e:
                # Fallback to original method if day grouping fails
                feature_data = group[features].values
                target_data = group[target].values
                night_data = group['is_night'].values  # Get nighttime flags
                
                # Create sequences
                for i in range(len(group) - seq_length):
                    self.sequences.append(feature_data[i:i+seq_length])
                    self.targets.append(target_data[i+seq_length])
                    self.night_masks.append(night_data[i+seq_length])
                    if 'LocalTime' in group.columns:
                        self.timestamps.append(group['LocalTime'].iloc[i+seq_length])
                    else:
                        self.timestamps.append(None)
    
    def __len__(self):
        return len(self.sequences)
    
    def __getitem__(self, idx):
        return (
            torch.FloatTensor(self.sequences[idx]), 
            torch.FloatTensor([self.targets[idx]]),
            torch.FloatTensor([self.night_masks[idx]])
        )
    
    def get_timestamp(self, idx):
        return self.timestamps[idx]

# Create PyTorch datasets
train_dataset = ImprovedSolarPowerDataset(train_data_scaled, sequence_length, features, target)
val_dataset = ImprovedSolarPowerDataset(val_data_scaled, sequence_length, features, target)
print(f"Training dataset size: {len(train_dataset)}")
print(f"Validation dataset size: {len(val_dataset)}")

# Create test dataset specifically for target location
test_dataset = ImprovedSolarPowerDataset(
    test_data_scaled, sequence_length, features, target,
    location_id=target_location_id
)
print(f"Test dataset size: {len(test_dataset)}")

# Create data loaders
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# 5. Define the improved LSTM Model with time awareness
class TimeAwareLSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size_1=128, hidden_size_2=64, dropout_rate=0.2):
        super(TimeAwareLSTMModel, self).__init__()
        
        # Split input features into time-related and other features
        time_feature_count = len(time_features)
        self.time_feature_indices = [features.index(tf) for tf in time_features]
        self.other_feature_indices = [i for i in range(input_size) if i not in self.time_feature_indices]
        
        # First LSTM layer specifically for time features
        self.time_lstm = nn.LSTM(time_feature_count, hidden_size_1//2, batch_first=True)
        
        # LSTM layer for non-time features
        self.other_lstm = nn.LSTM(input_size - time_feature_count, hidden_size_1//2, batch_first=True)
        
        # Combined LSTM layer
        self.combined_lstm = nn.LSTM(hidden_size_1, hidden_size_2, batch_first=True)
        
        # Normalization and dropout
        self.bn1 = nn.BatchNorm1d(hidden_size_1)
        self.bn2 = nn.BatchNorm1d(hidden_size_2)
        self.bn3 = nn.BatchNorm1d(32)
        
        # Clear dropout definitions with consistent rate
        self.dropout_rate = dropout_rate
        self.dropout1 = nn.Dropout(dropout_rate)
        self.dropout2 = nn.Dropout(dropout_rate)
        self.dropout3 = nn.Dropout(dropout_rate / 2)  # Lighter dropout before output
        
        # Additional dense layers
        self.fc1 = nn.Linear(hidden_size_2, 32)
        self.relu1 = nn.LeakyReLU(0.1)
        
        # Output layer
        self.fc2 = nn.Linear(32, 1)
        
        # Day/Night awareness layer
        self.day_night_gate = nn.Linear(hidden_size_2, 1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x, apply_night_mask=True):
        batch_size, seq_len, _ = x.shape
        
        # Split input into time and other features
        time_features = x[:, :, self.time_feature_indices]
        other_features = x[:, :, self.other_feature_indices]
        
        # Process time features
        time_lstm_out, _ = self.time_lstm(time_features)
        
        # Process other features
        other_lstm_out, _ = self.other_lstm(other_features)
        
        # Concatenate outputs
        combined_features = torch.cat((time_lstm_out, other_lstm_out), dim=2)
        
        # Apply batch normalization
        reshaped_combined = combined_features.contiguous().view(batch_size * seq_len, -1)
        normalized_combined = self.bn1(reshaped_combined)
        normalized_combined = normalized_combined.view(batch_size, seq_len, -1)
        normalized_combined = self.dropout1(normalized_combined)
        
        # Apply combined LSTM
        combined_lstm_out, _ = self.combined_lstm(normalized_combined)
        
        # Extract last time step
        last_output = combined_lstm_out[:, -1, :]
        
        # Apply batch norm and dropout
        last_output = self.bn2(last_output)
        last_output = self.dropout2(last_output)
        
        # Detect day/night (for demonstration, will be overridden by actual night mask)
        day_night_pred = self.sigmoid(self.day_night_gate(last_output))
        
        # Process through dense layer
        x = self.fc1(last_output)
        x = self.relu1(x)
        x = self.bn3(x)
        x = self.dropout3(x)
        
        # Final prediction
        output = self.fc2(x)
        
        return output, day_night_pred

# 6. Initialize model, loss function, and optimizer
input_size = len(features)
model = TimeAwareLSTMModel(
    input_size=input_size,
    hidden_size_1=128,
    hidden_size_2=64,
    dropout_rate=0.2
).to(device)
print("\n4. Building and training time-aware LSTM model with PyTorch...")
print(model)

# Display model parameter summary
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Total trainable parameters: {count_parameters(model):,}")
print(f"Model architecture:")
print(f"- Input size: {input_size} features")
print(f"- Time features: {len(time_features)}")
print(f"- Weather/other features: {input_size - len(time_features)}")
print(f"- LSTM layers: 3 (time, weather, combined)")
print(f"- Hidden sizes: 128 → 64 → 32 → 1")
print(f"- Dropout rate: {model.dropout_rate}")

# Custom loss function that includes night awareness
class NightAwareLoss(nn.Module):
    def __init__(self, base_criterion=nn.HuberLoss(delta=0.5)):
        super(NightAwareLoss, self).__init__()
        self.base_criterion = base_criterion
        
    def forward(self, predictions, targets, night_mask):
        # Basic loss calculation
        base_loss = self.base_criterion(predictions, targets)
        
        # Add penalty for predicting non-zero values during nighttime
        night_samples = night_mask.bool()
        if torch.any(night_samples):
            night_predictions = predictions[night_samples]
            night_penalty = torch.mean(torch.abs(night_predictions))
            return base_loss + 5.0 * night_penalty  # Higher weight for night penalty
        
        return base_loss

# Use a custom loss function
criterion = NightAwareLoss()

# Add weight decay to prevent overfitting
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)

# Improved scheduler for better learning rate adjustment
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, patience=3, verbose=True, min_lr=1e-6
)

# 7. Improved training function with reduced patience (5 instead of 20)
def train_time_aware_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=10, patience=10):
    train_losses = []
    val_losses = []
    best_val_loss = float('inf')
    counter = 0
    
    # For saving best model
    best_model_state = None
    
    for epoch in range(num_epochs):
        # Training
        model.train()
        train_loss = 0.0
        for inputs, targets, night_mask in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Training"):
            inputs, targets, night_mask = inputs.to(device), targets.to(device), night_mask.to(device)
            
            optimizer.zero_grad()
            outputs, day_night_pred = model(inputs)
            loss = criterion(outputs, targets, night_mask)
            loss.backward()
            
            # Add gradient clipping to prevent exploding gradients
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            optimizer.step()
            
            train_loss += loss.item() * inputs.size(0)
        
        train_loss = train_loss / len(train_loader.dataset)
        train_losses.append(train_loss)
        
        # Validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, targets, night_mask in tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Validation"):
                inputs, targets, night_mask = inputs.to(device), targets.to(device), night_mask.to(device)
                outputs, day_night_pred = model(inputs)
                
                # Apply night time zero-forcing
                night_indices = night_mask.bool().squeeze()
                if torch.any(night_indices):
                    outputs[night_indices] = 0.0
                
                loss = criterion(outputs, targets, night_mask)
                val_loss += loss.item() * inputs.size(0)
        
        val_loss = val_loss / len(val_loader.dataset)
        val_losses.append(val_loss)
        
        # Update learning rate
        scheduler.step(val_loss)
        
        # Print progress
        print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {train_loss:.6f}, Val Loss: {val_loss:.6f}")
        
        # Check if this is the best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = model.state_dict()
            counter = 0
            print(f"New best model with validation loss: {best_val_loss:.6f}")
        else:
            counter += 1
            print(f"EarlyStopping counter: {counter} out of {patience}")
            
        # Early stopping
        if counter >= patience:
            print("Early stopping triggered")
            break
    
    # Load best model
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
        
    return model, train_losses, val_losses

# 8. Train the model
model, train_losses, val_losses = train_time_aware_model(
    model, train_loader, val_loader, criterion, optimizer, scheduler,
    num_epochs=10,  # Reduced to 10
    patience=10
)

# Save the model
torch.save(model.state_dict(), f'{model_dir}/checkpoints/time_aware_lstm_model.pth')
print(f"\nModel saved to '{model_dir}/checkpoints/time_aware_lstm_model.pth'")

# 9. Plot training history
plt.figure(figsize=(12, 6))
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Validation Loss')
plt.title('Time-Aware LSTM Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()
plt.grid(True, alpha=0.3)
plt.savefig(f'{model_dir}/plots/training_history.png', dpi=300)
plt.close()

# 10. Evaluate the model
print("\n5. Evaluating model performance...")

# Predict on test set
model.eval()
all_targets = []
all_predictions = []
all_night_masks = []
all_timestamps = []

with torch.no_grad():
    for batch_idx, (inputs, targets, night_mask) in enumerate(test_loader):
        inputs = inputs.to(device)
        outputs, day_night_pred = model(inputs)
        
        # Force zero predictions during nighttime
        night_indices = night_mask.bool().squeeze()
        if len(night_indices.shape) == 0:  # Handle single sample batches
            night_indices = night_indices.unsqueeze(0)
        
        outputs_np = outputs.cpu().numpy()
        if torch.any(night_indices):
            outputs_np[night_indices.cpu().numpy()] = 0.0
        
        # Move to CPU for numpy conversion
        targets_np = targets.numpy()
        night_mask_np = night_mask.numpy()
        
        all_targets.append(targets_np)
        all_predictions.append(outputs_np)
        all_night_masks.append(night_mask_np)
        
        # Get corresponding timestamps for this batch
        batch_size = inputs.size(0)
        for i in range(batch_size):
            idx = batch_idx * batch_size + i
            if idx < len(test_dataset):
                all_timestamps.append(test_dataset.get_timestamp(idx))

# Concatenate batches
y_test_scaled = np.concatenate(all_targets)
y_pred_scaled = np.concatenate(all_predictions)
night_masks = np.concatenate(all_night_masks)

# Inverse transform to get actual values
y_test_actual = target_scaler.inverse_transform(y_test_scaled).flatten()
y_pred_actual = target_scaler.inverse_transform(y_pred_scaled).flatten()

# Set nighttime predictions explicitly to zero
y_pred_actual[night_masks.flatten() == 1] = 0.0

# Calculate metrics
mse = mean_squared_error(y_test_actual, y_pred_actual)
mae = mean_absolute_error(y_test_actual, y_pred_actual)
rmse = np.sqrt(mse)
r2 = r2_score(y_test_actual, y_pred_actual)

print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"R² Score: {r2:.4f}")
print(f"Mean Power Output: {np.mean(y_test_actual):.4f} MW")
print(f"RMSE as % of Mean Output: {(rmse/np.mean(y_test_actual))*100:.2f}%")

# Calculate daytime-only metrics
day_mask = night_masks.flatten() == 0
if np.any(day_mask):
    day_rmse = np.sqrt(mean_squared_error(y_test_actual[day_mask], y_pred_actual[day_mask]))
    day_mae = mean_absolute_error(y_test_actual[day_mask], y_pred_actual[day_mask])
    day_r2 = r2_score(y_test_actual[day_mask], y_pred_actual[day_mask])
    print("\nDaytime-only metrics:")
    print(f"Daytime RMSE: {day_rmse:.4f}")
    print(f"Daytime MAE: {day_mae:.4f}")
    print(f"Daytime R²: {day_r2:.4f}")

# 11. Create results DataFrame with timestamps
test_results_df = pd.DataFrame({
    'LocalTime': all_timestamps,
    'ActualPower': y_test_actual,
    'PredictedPower': y_pred_actual,
    'IsNight': night_masks.flatten()
})

# 12. SHAP Analysis for Feature Importance
print("\n6. Performing SHAP analysis for feature importance...")

try:
    import shap
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    import torch
    from sklearn.preprocessing import StandardScaler
    
    # Create a flattened version of the data for SHAP that properly handles 3D LSTM input
    class FlattenedModelWrapper:
        def __init__(self, model, device, seq_length, num_features):
            self.model = model
            self.device = device
            self.seq_length = seq_length
            self.num_features = num_features
            self.model.eval()
        
        def __call__(self, X):
            with torch.no_grad():
                # X comes in as 2D: (samples, flattened_features)
                # Need to reshape to 3D: (samples, seq_length, features)
                batch_size = X.shape[0]
                
                # Reshape from (samples, seq_length*features) to (samples, seq_length, features)
                X_reshaped = X.reshape(batch_size, self.seq_length, self.num_features)
                
                # Convert to PyTorch tensor
                X_tensor = torch.FloatTensor(X_reshaped).to(self.device)
                
                # Forward pass
                outputs, _ = self.model(X_tensor)
                
                # Return predictions as numpy array
                return outputs.cpu().numpy()
    
    # Print some diagnostic information
    print(f"LSTM input shape: [batch_size, {sequence_length}, {len(features)}]")
    
    # Select a manageable number of samples for SHAP analysis
    sample_size = min(50, len(test_dataset))
    sample_indices = np.random.choice(len(test_dataset), sample_size, replace=False)
    
    # Prepare the data in the right format
    sample_data = np.array([test_dataset[i][0].cpu().numpy() for i in sample_indices])
    
    # Create flattened data for SHAP by reshaping from 3D to 2D
    # From (samples, seq_length, features) to (samples, seq_length*features)
    flattened_sample_data = sample_data.reshape(sample_size, -1)
    
    # Check the shape
    print(f"Original data shape: {sample_data.shape}")
    print(f"Flattened data shape for SHAP: {flattened_sample_data.shape}")
    
    # Create a smaller background dataset using actual data distributions
    background_size = min(20, len(val_dataset))
    background_indices = np.random.choice(len(val_dataset), background_size, replace=False)
    background_data = np.array([val_dataset[i][0].cpu().numpy() for i in background_indices])
    flattened_background_data = background_data.reshape(background_size, -1)
    
    print(f"Using {background_size} background samples and {sample_size} explanation samples")
    
    # Create model wrapper that handles reshaping
    model_wrapper = FlattenedModelWrapper(
        model, 
        device,
        seq_length=sequence_length,
        num_features=len(features)
    )
    
    # Use KernelExplainer with flattened data
    print("Creating KernelExplainer...")
    explainer = shap.KernelExplainer(
        model_wrapper, 
        flattened_background_data[:10],  # Use fewer background samples to reduce memory usage
        link="identity"
    )
    
    # Calculate SHAP values on a subset of samples to save memory
    print("Calculating SHAP values (this may take some time)...")
    
    # Using fewer samples and iterations for memory efficiency
    shap_values = explainer.shap_values(
        flattened_sample_data[:10],  # Use only 10 samples to explain
        nsamples=100,  # Reduce sample count for faster processing
        l1_reg="aic"   # Add regularization to make calculation more efficient
    )
    
    # Reshape SHAP values back to match features
    # First, check the shape we received
    print(f"Raw SHAP values shape: {np.array(shap_values).shape}")
    
    # Reshape the SHAP values to match feature names
    # If SHAP values are 2D (samples, flattened_features), reshape to 3D
    if len(np.array(shap_values).shape) == 2:
        reshaped_shap_values = np.array(shap_values).reshape(
            -1, sequence_length, len(features)
        )
        # Average across sequence length to get per-feature importance
        feature_importance = np.abs(reshaped_shap_values).mean(axis=(0, 1))
    else:
        # Handle case where SHAP returns a different structure
        print(f"Unexpected SHAP values shape. Using alternative calculation method.")
        # Use a more direct calculation as fallback
        feature_importance = np.abs(np.array(shap_values)).mean(axis=0)
        
        # If the feature_importance is still flattened, try to reshape
        if len(feature_importance) == sequence_length * len(features):
            feature_importance = feature_importance.reshape(sequence_length, len(features)).mean(axis=0)
    
    # Create importance dataframe
    importance_df = pd.DataFrame({
        'Feature': features,
        'Importance': feature_importance
    }).sort_values('Importance', ascending=False)
    
    # Plot feature importance
    plt.figure(figsize=(12, 8))
    plt.barh(importance_df['Feature'][:15], importance_df['Importance'][:15])
    plt.title('Feature Importance Based on SHAP Analysis')
    plt.xlabel('Average Impact on Model Output')
    plt.tight_layout()
    plt.savefig(f'{model_dir}/plots/shap_feature_importance.png', dpi=300)
    plt.close()
    
    # Print feature importance
    print("\nTop 15 most important features:")
    for i, (feature, importance) in enumerate(zip(importance_df['Feature'][:15], importance_df['Importance'][:15])):
        print(f"{i+1}. {feature}: {importance:.6f}")
    
    # Save feature importance to CSV
    importance_df.to_csv(f'{model_dir}/data/feature_importance.csv', index=False)
    
    print("SHAP analysis completed successfully!")
    
except Exception as e:
    import traceback
    print(f"Error performing SHAP analysis: {e}")
    print(traceback.format_exc())
    print("Continuing without SHAP analysis...")

# 13. Create daily aggregation of results
print("\n7. Creating daily aggregations and visualizations...")

# Ensure we have timestamps for all predictions
if len(test_results_df) > 0 and 'LocalTime' in test_results_df.columns:
    # Add date column
    test_results_df['date'] = pd.to_datetime(test_results_df['LocalTime']).dt.date
    
    # Create daily aggregations
    daily_results = test_results_df.groupby('date').agg({
        'ActualPower': 'sum',
        'PredictedPower': 'sum'
    }).reset_index()
    
    # Save full high-resolution results (30-minute intervals)
    high_res_output_path = f'{model_dir}/data/high_resolution_predictions.csv'
    test_results_df.to_csv(high_res_output_path, index=False)
    print(f"Saved high-resolution predictions to {high_res_output_path}")
    
    # Save daily aggregated results
    daily_output_path = f'{model_dir}/data/daily_predictions.csv'
    daily_results.to_csv(daily_output_path, index=False)
    print(f"Saved daily predictions to {daily_output_path}")
    
    # ENHANCED VISUALIZATION: Actual values only with extended plot
    plt.figure(figsize=(24, 10))
    
    # Convert date to datetime for proper sorting
    daily_results['date'] = pd.to_datetime(daily_results['date'])
    daily_results = daily_results.sort_values('date')
    
    # Set positions for the bars
    indices = np.arange(len(daily_results))
    
    # Create the bar chart for actual values only
    plt.bar(indices, daily_results['ActualPower'], color='#1f77b4', label='ActualValue(MW)')
    
    # Add title and labels
    plt.title('Actual Solar Power Generated', fontsize=16)
    plt.ylabel('Total Daily Power Generated (MW)', fontsize=14)
    plt.legend(fontsize=12)
    
    # Format x-axis dates with maximum resolution
    date_strs = [d.strftime('%Y-%m-%d') for d in daily_results['date']]
    plt.xticks(indices, date_strs, rotation=45, ha='right', fontsize=10)
    
    # Ensure there's adequate spacing on both sides
    plt.xlim(indices.min() - 1, indices.max() + 1)
    
    # Add grid for better readability
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    
    # Use tight layout with custom padding
    plt.tight_layout(pad=2.0)
    
    # Save high-resolution image
    plt.savefig(f'{model_dir}/plots/actual_power_daily.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    # ENHANCED VISUALIZATION: actual vs predicted with extended plot
    plt.figure(figsize=(24, 10))
    
    # Set the width of the bars
    bar_width = 0.35
    
    # Create the bar chart
    plt.bar(indices - bar_width/2, daily_results['ActualPower'], width=bar_width, 
            color='#1f77b4', label='ActualValue(MW)')
    plt.bar(indices + bar_width/2, daily_results['PredictedPower'], width=bar_width, 
            color='#ff7f0e', label='PredValue(MW)')
    
    # Add title and labels
    plt.title('Actual vs. Predicted Solar Power Generated', fontsize=16)
    plt.ylabel('Total Daily Power Generated (MW)', fontsize=14)
    plt.legend(fontsize=12)
    
    # Format x-axis dates with higher resolution
    plt.xticks(indices, date_strs, rotation=45, ha='right', fontsize=10)
    
    # Ensure there's adequate spacing on both sides
    plt.xlim(indices.min() - 1, indices.max() + 1)
    
    # Add grid for better readability
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    
    # Use tight layout with custom padding
    plt.tight_layout(pad=2.0)
    
    # Save high-resolution image
    plt.savefig(f'{model_dir}/plots/daily_prediction_bars.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    # 16. Error visualization with higher resolution
    plt.figure(figsize=(20, 8))
    
    # Calculate error
    daily_results['Error'] = daily_results['PredictedPower'] - daily_results['ActualPower']
    daily_results['Error_Percent'] = (daily_results['Error'] / (daily_results['ActualPower'] + 1e-8)) * 100  # Avoid division by zero
    
    # Create error bar chart
    plt.bar(indices, daily_results['Error_Percent'], color='#2ca02c', alpha=0.7)
    plt.axhline(y=0, color='red', linestyle='-', alpha=0.7)
    
    # Add title and labels
    plt.title('Time-Aware LSTM Prediction Error by Day (Percentage)', fontsize=14)
    plt.ylabel('Error (%)', fontsize=12)
    plt.xticks(indices, date_strs, rotation=45, ha='right')
    
    # Improve layout
    plt.tight_layout()
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.savefig(f'{model_dir}/plots/daily_prediction_error_bars.png', dpi=300)
    plt.close()
    
    # 17. Create a time-series plot to see daily patterns
    # Select a few sample days for detailed visualization
    unique_days = test_results_df['date'].nunique()
    num_days_to_show = min(5, unique_days)
    sample_days = sorted(test_results_df['date'].unique())[:num_days_to_show]
    
    plt.figure(figsize=(20, 15))
    for i, sample_day in enumerate(sample_days):
        plt.subplot(num_days_to_show, 1, i+1)
        day_data = test_results_df[test_results_df['date'] == sample_day].copy()
        day_data.sort_values('LocalTime', inplace=True)
        
        # Plot with improved styling
        plt.plot(day_data['LocalTime'], day_data['ActualPower'], 'b-', 
                 label='Actual', alpha=0.7, linewidth=2, marker='o', markersize=4)
        plt.plot(day_data['LocalTime'], day_data['PredictedPower'], 'r-', 
                 label='Predicted', alpha=0.7, linewidth=2, marker='x', markersize=4)
        
        # Highlight night periods with clearer visualization
        night_periods = day_data[day_data['IsNight'] == 1]
        if not night_periods.empty:
            for _, period in night_periods.iterrows():
                plt.axvspan(period['LocalTime'], period['LocalTime'] + pd.Timedelta(minutes=30), 
                           alpha=0.2, color='gray')
        
        # Add label for night periods
        if i == 0:
            plt.axvspan(0, 0, alpha=0.2, color='gray', label='Night period')
        
        plt.title(f"Day Pattern: {sample_day}", fontsize=14)
        plt.ylabel("Power (MW)", fontsize=12)
        plt.grid(True, alpha=0.3)
        plt.legend(fontsize=12, loc='upper left')
        
        # Format x-axis to show hours with better resolution
        plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
        plt.gca().xaxis.set_major_locator(mdates.HourLocator(interval=2))
        plt.xticks(fontsize=10)
    
    plt.tight_layout()
    plt.savefig(f'{model_dir}/plots/daily_pattern_samples.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    # 18. Create 365-day prediction CSV (as requested)
    try:
        # Fill in any missing days to create a 365-day dataset
        start_date = daily_results['date'].min()
        end_date = start_date + pd.DateOffset(days=364)
        
        # Create complete date range
        all_dates = pd.date_range(start=start_date, end=end_date)
        complete_dates_df = pd.DataFrame({'date': all_dates})
        
        # Merge with existing results
        full_year_df = pd.merge(
            complete_dates_df, 
            daily_results[['date', 'ActualPower', 'PredictedPower']], 
            on='date', 
            how='left'
        )
        
        # Fill missing values with more intelligent estimates based on nearby days and seasonality
        # This is better than simple zero-filling
        full_year_df['ActualPower'] = full_year_df['ActualPower'].interpolate(method='linear').fillna(0)
        full_year_df['PredictedPower'] = full_year_df['PredictedPower'].interpolate(method='linear').fillna(0)
        
        # Add day of year and month columns for easier analysis
        full_year_df['day_of_year'] = full_year_df['date'].dt.dayofyear
        full_year_df['month'] = full_year_df['date'].dt.month
        
        # Save 365-day dataset
        full_year_path = f'{model_dir}/data/full_year_predictions.csv'
        full_year_df.to_csv(full_year_path, index=False)
        print(f"Created 365-day prediction dataset and saved to {full_year_path}")
        
    except Exception as e:
        print(f"Error creating 365-day dataset: {e}")
else:
    print("Cannot create daily visualizations: missing timestamp data")

# Save evaluation metrics to a file
with open(f'{model_dir}/evaluation_metrics.txt', 'w') as f:
    f.write(f"LSTM Model Evaluation Metrics\n")
    f.write(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
    f.write(f"Root Mean Squared Error (RMSE): {rmse:.4f}\n")
    f.write(f"Mean Absolute Error (MAE): {mae:.4f}\n")
    f.write(f"R² Score: {r2:.4f}\n")
    f.write(f"Mean Power Output: {np.mean(y_test_actual):.4f} MW\n")
    f.write(f"RMSE as % of Mean Output: {(rmse/np.mean(y_test_actual))*100:.2f}%\n\n")
    
    if np.any(day_mask):
        f.write("Daytime-only metrics:\n")
        f.write(f"Daytime RMSE: {day_rmse:.4f}\n")
        f.write(f"Daytime MAE: {day_mae:.4f}\n")
        f.write(f"Daytime R²: {day_r2:.4f}\n")

print("\n" + "="*80)
print("TIME-AWARE LSTM MODEL TRAINING AND EVALUATION COMPLETE")
print(f"All model files and outputs saved to '{model_dir}' directory:")
print(f"  - Model: {model_dir}/checkpoints/time_aware_lstm_model.pth")
print(f"  - Data: {model_dir}/data/")
print(f"  - Plots: {model_dir}/plots/")
print("="*80)

Using device: cuda
IMPROVED SOLAR PV LSTM MODEL - MULTI-LOCATION TRAINING, SINGLE LOCATION PREDICTION (PyTorch)

1. Loading preprocessed data...
Validation data is empty. Will create validation set from training data.
Training data shape: (347520, 55)
Validation data shape: (0, 55)
Test data shape: (2880, 55)

Target location for prediction: loc_41p55_-74p35

2. Preparing features for LSTM model...
Using weather features: ['Temperature', 'Pressure', 'GHI', 'DHI', 'Cloud_Type']
Using 11 lag features
Target variable: Power(MW)
Full feature list:
1. Temperature
2. Pressure
3. GHI
4. DHI
5. Cloud_Type
6. hour_sin
7. hour_cos
8. month_sin
9. month_cos
10. dayofyear_sin
11. dayofyear_cos
12. is_daylight
13. Cloud_Fill_Flag
14. DNI_Fill_Flag
15. power_lag_1
16. power_lag_2
17. power_lag_3
18. Temperature_lag_1
19. Temperature_lag_2
20. GHI_lag_1
21. GHI_lag_2
22. Cloud_Type_lag_1
23. Cloud_Type_lag_2

3. Preparing data for sequence-based LSTM model...
Created validation set with 34752 samples

Epoch 1/10 - Training: 100%|██████████| 3982/3982 [00:37<00:00, 106.36it/s]
Epoch 1/10 - Validation: 100%|██████████| 9/9 [00:00<00:00, 143.48it/s]


Epoch 1/10 - Train Loss: 0.068216, Val Loss: 0.006918
New best model with validation loss: 0.006918


Epoch 2/10 - Training: 100%|██████████| 3982/3982 [00:37<00:00, 107.07it/s]
Epoch 2/10 - Validation: 100%|██████████| 9/9 [00:00<00:00, 313.12it/s]


Epoch 2/10 - Train Loss: 0.029466, Val Loss: 0.007037
EarlyStopping counter: 1 out of 10


Epoch 3/10 - Training: 100%|██████████| 3982/3982 [00:40<00:00, 98.19it/s] 
Epoch 3/10 - Validation: 100%|██████████| 9/9 [00:00<00:00, 118.29it/s]


Epoch 3/10 - Train Loss: 0.026747, Val Loss: 0.006898
New best model with validation loss: 0.006898


Epoch 4/10 - Training: 100%|██████████| 3982/3982 [00:44<00:00, 88.49it/s] 
Epoch 4/10 - Validation: 100%|██████████| 9/9 [00:00<00:00, 194.92it/s]


Epoch 4/10 - Train Loss: 0.025095, Val Loss: 0.006669
New best model with validation loss: 0.006669


Epoch 5/10 - Training: 100%|██████████| 3982/3982 [01:02<00:00, 63.60it/s]
Epoch 5/10 - Validation: 100%|██████████| 9/9 [00:00<00:00, 136.46it/s]


Epoch 5/10 - Train Loss: 0.023916, Val Loss: 0.006782
EarlyStopping counter: 1 out of 10


Epoch 6/10 - Training: 100%|██████████| 3982/3982 [00:57<00:00, 69.37it/s] 
Epoch 6/10 - Validation: 100%|██████████| 9/9 [00:00<00:00, 134.39it/s]


Epoch 6/10 - Train Loss: 0.023650, Val Loss: 0.006881
EarlyStopping counter: 2 out of 10


Epoch 7/10 - Training: 100%|██████████| 3982/3982 [00:46<00:00, 86.48it/s] 
Epoch 7/10 - Validation: 100%|██████████| 9/9 [00:00<00:00, 358.00it/s]


Epoch 7/10 - Train Loss: 0.023012, Val Loss: 0.006688
EarlyStopping counter: 3 out of 10


Epoch 8/10 - Training: 100%|██████████| 3982/3982 [00:31<00:00, 126.52it/s]
Epoch 8/10 - Validation: 100%|██████████| 9/9 [00:00<00:00, 326.40it/s]


Epoch 8/10 - Train Loss: 0.023322, Val Loss: 0.006960
EarlyStopping counter: 4 out of 10


Epoch 9/10 - Training: 100%|██████████| 3982/3982 [00:33<00:00, 120.53it/s]
Epoch 9/10 - Validation: 100%|██████████| 9/9 [00:00<00:00, 275.92it/s]


Epoch 9/10 - Train Loss: 0.019862, Val Loss: 0.006771
EarlyStopping counter: 5 out of 10


Epoch 10/10 - Training: 100%|██████████| 3982/3982 [00:36<00:00, 107.70it/s]
Epoch 10/10 - Validation: 100%|██████████| 9/9 [00:00<00:00, 188.95it/s]


Epoch 10/10 - Train Loss: 0.019294, Val Loss: 0.006880
EarlyStopping counter: 6 out of 10

Model saved to 'lstm_model/checkpoints/time_aware_lstm_model.pth'

5. Evaluating model performance...
Root Mean Squared Error (RMSE): 2.8253
Mean Absolute Error (MAE): 1.9357
R² Score: 0.8535
Mean Power Output: 5.5163 MW
RMSE as % of Mean Output: 51.22%

Daytime-only metrics:
Daytime RMSE: 3.5038
Daytime MAE: 2.9732
Daytime R²: 0.7909

6. Performing SHAP analysis for feature importance...
LSTM input shape: [batch_size, 8, 23]
Original data shape: (50, 8, 23)
Flattened data shape for SHAP: (50, 184)
Using 20 background samples and 50 explanation samples
Creating KernelExplainer...
Calculating SHAP values (this may take some time)...


  0%|          | 0/10 [00:00<?, ?it/s]

Raw SHAP values shape: (10, 184, 1)
Unexpected SHAP values shape. Using alternative calculation method.

Top 15 most important features:
1. power_lag_1: 0.004387
2. hour_cos: 0.002521
3. hour_sin: 0.001588
4. Pressure: 0.000844
5. Temperature: 0.000590
6. DHI: 0.000506
7. dayofyear_cos: 0.000467
8. power_lag_2: 0.000444
9. dayofyear_sin: 0.000426
10. Cloud_Type_lag_2: 0.000397
11. Cloud_Type: 0.000322
12. GHI_lag_1: 0.000320
13. month_cos: 0.000316
14. DNI_Fill_Flag: 0.000288
15. power_lag_3: 0.000229
SHAP analysis completed successfully!

7. Creating daily aggregations and visualizations...
Saved high-resolution predictions to lstm_model/data/high_resolution_predictions.csv
Saved daily predictions to lstm_model/data/daily_predictions.csv


Locator attempting to generate 173647 ticks ([-657.75, ..., 13812.75]), which exceeds Locator.MAXTICKS (1000).
Locator attempting to generate 173647 ticks ([-657.75, ..., 13812.75]), which exceeds Locator.MAXTICKS (1000).
Locator attempting to generate 173647 ticks ([-657.75, ..., 13812.75]), which exceeds Locator.MAXTICKS (1000).
Locator attempting to generate 173647 ticks ([-657.75, ..., 13812.75]), which exceeds Locator.MAXTICKS (1000).
Locator attempting to generate 173647 ticks ([-657.75, ..., 13812.75]), which exceeds Locator.MAXTICKS (1000).
Locator attempting to generate 173647 ticks ([-657.75, ..., 13812.75]), which exceeds Locator.MAXTICKS (1000).
Locator attempting to generate 173647 ticks ([-657.75, ..., 13812.75]), which exceeds Locator.MAXTICKS (1000).
Locator attempting to generate 173647 ticks ([-657.75, ..., 13812.75]), which exceeds Locator.MAXTICKS (1000).
Locator attempting to generate 173647 ticks ([-657.75, ..., 13812.75]), which exceeds Locator.MAXTICKS (1000).
L

Created 365-day prediction dataset and saved to lstm_model/data/full_year_predictions.csv

TIME-AWARE LSTM MODEL TRAINING AND EVALUATION COMPLETE
All model files and outputs saved to 'lstm_model' directory:
  - Model: lstm_model/checkpoints/time_aware_lstm_model.pth
  - Data: lstm_model/data/
  - Plots: lstm_model/plots/
