# =============================================================================
# BASELINE MODEL TRAINING NOTEBOOK
# =============================================================================
## Purpose:
    - Load and preprocess the raw weather dataset.
    - Define the Gated Recurrent Unit (GRU) model architecture.
    - Train the baseline GRU model using specified hyperparameters and save its state.
    - Save the preprocessed and split data tensors for subsequent use.
# =============================================================================

# === Clone Repository & Install Dependencies ===

In [1]:
!rm -rf Sustainable_AI_Agent_Project
!git clone https://github.com/trongjhuongwr/Sustainable_AI_Agent_Project.git
%cd Sustainable_AI_Agent_Project

Cloning into 'Sustainable_AI_Agent_Project'...
remote: Enumerating objects: 35, done.[K
remote: Counting objects: 100% (35/35), done.[K
remote: Compressing objects: 100% (28/28), done.[K
remote: Total 35 (delta 7), reused 30 (delta 5), pack-reused 0 (from 0)[K
Receiving objects: 100% (35/35), 60.76 KiB | 15.19 MiB/s, done.
Resolving deltas: 100% (7/7), done.
/kaggle/working/Sustainable_AI_Agent_Project


In [2]:
!pip install -q --extra-index-url https://download.pytorch.org/whl/cu121 -r /kaggle/working/Sustainable_AI_Agent_Project/requirements.txt

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m780.5/780.5 MB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.6/60.6 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m85.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m823.6/823.6 kB[0m [31m48.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.1/14.1 MB[0m [31m108.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m410.6/410.6 MB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.6/121.6 MB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━

# 1. Import Libraries and Configuration

In [3]:
import os
import warnings
import logging
import copy
import random

# Suppress warnings for cleaner output
os.environ["GYM_DISABLE_WARNINGS"] = "true"
warnings.filterwarnings("ignore", module="gymnasium")
warnings.filterwarnings("ignore", category=UserWarning)
logging.getLogger("gymnasium").setLevel(logging.ERROR)

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tqdm.notebook import tqdm
from torch.optim.lr_scheduler import CosineAnnealingLR
from builtins import print as builtin_print

print("Libraries imported successfully.")

Libraries imported successfully.


# 2. Configuration Class

In [4]:
# Defines hyperparameters, file paths, and other parameters used throughout the baseline training process.
class Config:
    # --- Data Paths ---
    DATA_PATH = '/kaggle/input/weather-prediction/seattle-weather.csv' # Path to the raw dataset CSV file
    PROCESSED_DATA_SAVE_PATH = '/kaggle/working/processed_data.pt' # Output path for saving processed data tensors
    BASELINE_MODEL_SAVE_PATH = '/kaggle/working/baseline_model.pth' # Output path for saving the trained baseline model state dictionary

    # --- Data Preprocessing Parameters ---
    SEQUENCE_LENGTH = 30 # Number of past days used to predict the next day
    TEST_SIZE = 0.2 # Proportion of data reserved for the final test set
    VAL_SIZE_FROM_TEMP = 0.1 # Proportion of the remaining data (after test split) used for validation
    SEED = 42 # Random seed for reproducibility

    # --- Model Architecture Parameters ---
    INPUT_DIM = 4      # Number of input features: precipitation, temp_max, temp_min, wind
    HIDDEN_DIM = 256   # Dimensionality of the GRU hidden state
    N_LAYERS = 2       # Number of stacked GRU layers
    OUTPUT_DIM = 1     # Output dimension (binary classification: rain probability)
    DROPOUT = 0.2      # Dropout rate applied between GRU layers

    # --- Training Hyperparameters ---
    BATCH_SIZE = 64
    EPOCHS = 500       # Number of training epochs
    LEARNING_RATE = 0.0001
    WEIGHT_DECAY = 1e-4 # Weight decay for AdamW optimizer
    SCHEDULER_T_MAX = 50  # T_max for CosineAnnealingLR scheduler (cycle length)
    SCHEDULER_ETA_MIN = 1e-6 # Minimum learning rate for scheduler

    # --- Computation Device ---
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set seeds for reproducibility
random.seed(Config.SEED)
np.random.seed(Config.SEED)
torch.manual_seed(Config.SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(Config.SEED)
    torch.cuda.manual_seed_all(Config.SEED)
    # Optional: Enable deterministic algorithms for full reproducibility, may impact performance
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False

print(f"Configuration loaded. Using device: {Config.DEVICE}")
print(f"Seed set to: {Config.SEED}")
print(f"Processed data will be saved to: {Config.PROCESSED_DATA_SAVE_PATH}")
print(f"Baseline model will be saved to: {Config.BASELINE_MODEL_SAVE_PATH}")

Configuration loaded. Using device: cuda
Seed set to: 42
Processed data will be saved to: /kaggle/working/processed_data.pt
Baseline model will be saved to: /kaggle/working/baseline_model.pth


# 3. Data Loading and Preprocessing

In [5]:
# Loads the dataset, performs feature engineering, scales features, creates time sequences,
# splits data into training, validation, and test sets, and converts them to PyTorch tensors.

def create_sequences(input_data, target_data, sequence_length):
    """
    Generates sequences suitable for time-series forecasting with RNNs.
    Args:
        input_data (np.ndarray): Array of input features.
        target_data (np.ndarray): Array of target values.
        sequence_length (int): The length of each input sequence.
    Returns:
        tuple: (np.ndarray, np.ndarray) containing input sequences and corresponding targets.
    """
    xs, ys = [], []
    for i in range(len(input_data) - sequence_length):
        x = input_data[i:(i + sequence_length)]
        y = target_data[i + sequence_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

# Load the dataset
try:
    df = pd.read_csv(Config.DATA_PATH)
    builtin_print(f"Dataset loaded successfully from {Config.DATA_PATH}. Shape: {df.shape}")
except FileNotFoundError:
    builtin_print(f"Error: Dataset file not found at {Config.DATA_PATH}. Please ensure the dataset is correctly added.")
    raise

# Feature Engineering: Convert categorical weather to binary target
df['weather_numeric'] = df['weather'].apply(lambda x: 1 if x in ['rain', 'drizzle'] else 0)
df = df.drop(columns=['date', 'weather']) # Drop original date and weather columns

# Scaling: Normalize input features to [0, 1] range
scaler = MinMaxScaler()
features_to_scale = ['precipitation', 'temp_max', 'temp_min', 'wind'] # Explicitly list features
scaled_features_np = scaler.fit_transform(df[features_to_scale])
target_np = df['weather_numeric'].values
builtin_print("Input features scaled using MinMaxScaler.")

# Sequence Creation: Generate input sequences and corresponding targets
X_np, y_np = create_sequences(scaled_features_np, target_np, Config.SEQUENCE_LENGTH)
builtin_print(f"Sequences created with length {Config.SEQUENCE_LENGTH}. Shape X: {X_np.shape}, Shape y: {y_np.shape}")

# Data Splitting: Stratified split into train, validation, and test sets
# First split: Separate the test set (20%)
X_temp, X_test_np, y_temp, y_test_np = train_test_split(
    X_np, y_np,
    test_size=Config.TEST_SIZE,
    random_state=Config.SEED,
    stratify=y_np # Ensure proportional target distribution
)
# Second split: Split the remaining data into train (90% of remainder) and validation (10% of remainder)
X_train_np, X_val_np, y_train_np, y_val_np = train_test_split(
    X_temp, y_temp,
    test_size=Config.VAL_SIZE_FROM_TEMP,
    random_state=Config.SEED,
    stratify=y_temp # Ensure proportional target distribution
)
builtin_print(f"Data split completed: Train={len(X_train_np)}, Validation={len(X_val_np)}, Test={len(X_test_np)}")

# Convert NumPy arrays to PyTorch Tensors
X_train_tensor = torch.tensor(X_train_np, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_np, dtype=torch.float32).unsqueeze(1) # Add channel dim for BCELoss
X_val_tensor = torch.tensor(X_val_np, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val_np, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test_np, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_np, dtype=torch.float32).unsqueeze(1)
builtin_print("Data successfully converted to PyTorch tensors.")

# Save the processed data tensors for use by other notebooks
processed_data = {
    'X_train': X_train_tensor, 'y_train': y_train_tensor,
    'X_val': X_val_tensor, 'y_val': y_val_tensor,
    'X_test': X_test_tensor, 'y_test': y_test_tensor,
}
try:
    torch.save(processed_data, Config.PROCESSED_DATA_SAVE_PATH)
    builtin_print(f"Processed data tensors saved to {Config.PROCESSED_DATA_SAVE_PATH}")
except Exception as e:
    builtin_print(f"Error saving processed data: {e}")

Dataset loaded successfully from /kaggle/input/weather-prediction/seattle-weather.csv. Shape: (1461, 6)
Input features scaled using MinMaxScaler.
Sequences created with length 30. Shape X: (1431, 30, 4), Shape y: (1431,)
Data split completed: Train=1029, Validation=115, Test=287
Data successfully converted to PyTorch tensors.
Processed data tensors saved to /kaggle/working/processed_data.pt


# 4. GRU Model Definition

In [6]:
# Utility function to count model parameters
def count_parameters(model):
    """Counts the number of trainable parameters in a PyTorch model."""
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print("Utility function count_parameters defined.")

# Defines the architecture of the Gated Recurrent Unit network used for weather prediction.
class WeatherGRU(nn.Module):
    """
    GRU model for binary weather prediction based on past sequence data.
    Args:
        input_dim (int): Number of input features.
        hidden_dim (int): Dimension of the GRU hidden state.
        n_layers (int): Number of stacked GRU layers.
        output_dim (int): Number of output units (1 for binary classification).
        dropout (float): Dropout probability applied between GRU layers.
    """
    def __init__(self, input_dim, hidden_dim, n_layers, output_dim, dropout):
        super(WeatherGRU, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        # GRU Layer: batch_first=True expects input shape (batch, seq_len, features)
        # Dropout is applied only between layers if n_layers > 1
        self.gru = nn.GRU(input_dim, hidden_dim, n_layers,
                          batch_first=True, dropout=dropout if n_layers > 1 else 0)

        # Fully Connected Layer: Maps the last hidden state to the output dimension
        self.fc = nn.Linear(hidden_dim, output_dim)

        # Sigmoid Activation: Outputs a probability for binary classification
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        """Forward pass through the GRU network.
        Args:
            x (torch.Tensor): Input tensor of shape (batch, sequence_length, input_dim).
        Returns:
            torch.Tensor: Output tensor of shape (batch, output_dim) representing probabilities.
        """
        # GRU layer processes the sequence
        # We don't explicitly need the final hidden state `h_n` here
        gru_out, _ = self.gru(x)

        # Select the output from the last time step of the sequence
        # gru_out shape: (batch, seq_len, hidden_dim) -> gru_out[:, -1, :] shape: (batch, hidden_dim)
        last_time_step_output = gru_out[:, -1, :]

        # Pass through the fully connected layer
        out = self.fc(last_time_step_output)

        # Apply sigmoid activation
        return self.sigmoid(out)

print("WeatherGRU model class defined.")

Utility function count_parameters defined.
WeatherGRU model class defined.


# 5. Baseline Model Training Function

In [7]:
# Encapsulates the training loop, including loss calculation, optimization,
# learning rate scheduling, validation, and saving the best model state based on validation loss.

def train_baseline_model(model, train_loader, val_loader, config):
    """
    Trains the baseline GRU model.
    Args:
        model (nn.Module): The WeatherGRU model instance.
        train_loader (DataLoader): DataLoader for the training set.
        val_loader (DataLoader): DataLoader for the validation set.
        config (Config): Configuration object containing hyperparameters.
    Returns:
        nn.Module: The trained model loaded with the best state observed during validation.
    """
    criterion = nn.BCELoss() # Binary Cross-Entropy Loss for binary classification
    # AdamW optimizer with specified learning rate and weight decay
    optimizer = torch.optim.AdamW(model.parameters(), lr=config.LEARNING_RATE, weight_decay=config.WEIGHT_DECAY)
    # Cosine annealing learning rate scheduler
    scheduler = CosineAnnealingLR(optimizer, T_max=config.SCHEDULER_T_MAX, eta_min=config.SCHEDULER_ETA_MIN)

    model.to(config.DEVICE) # Move model to the configured device (GPU or CPU)
    best_val_loss = float('inf')
    best_model_state = None # To store the state_dict of the best model

    print("\n--- Starting Baseline Model Training ---")
    for epoch in range(config.EPOCHS):
        model.train() # Set model to training mode
        total_train_loss = 0.0

        # Progress bar for training batches
        train_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{config.EPOCHS} [Train]", leave=False)
        for inputs, labels in train_bar:
            inputs, labels = inputs.to(config.DEVICE), labels.to(config.DEVICE)

            optimizer.zero_grad() # Clear previous gradients
            outputs = model(inputs) # Forward pass
            loss = criterion(outputs, labels) # Calculate loss
            loss.backward() # Backpropagation
            optimizer.step() # Update weights

            total_train_loss += loss.item()
            train_bar.set_postfix(loss=f"{loss.item():.4f}") # Update progress bar description

        avg_train_loss = total_train_loss / len(train_loader)

        # --- Validation Phase ---
        model.eval() # Set model to evaluation mode
        total_val_loss = 0.0
        with torch.no_grad(): # Disable gradient calculations for validation
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(config.DEVICE), labels.to(config.DEVICE)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                total_val_loss += loss.item()

        avg_val_loss = total_val_loss / len(val_loader)
        current_lr = optimizer.param_groups[0]['lr'] # Get current learning rate

        builtin_print(f"Epoch {epoch+1}/{config.EPOCHS}: Train Loss={avg_train_loss:.4f}, Val Loss={avg_val_loss:.4f}, LR={current_lr:.6f}")

        # --- Learning Rate Scheduler Step ---
        scheduler.step()

        # --- Save Best Model State ---
        # Keep track of the model state that yields the lowest validation loss
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            # Use deepcopy to ensure the state isn't affected by further training
            best_model_state = copy.deepcopy(model.state_dict())
            builtin_print(f"  New best validation loss: {best_val_loss:.4f}. Saving model state.")

    # --- Load Best Model State ---
    # After training completes, load the best state found during validation
    if best_model_state:
        model.load_state_dict(best_model_state)
        builtin_print(f"\n--- Best model state loaded (Validation Loss: {best_val_loss:.4f}) ---")
    else:
        builtin_print("\n--- Warning: No best model state was saved. Check validation loss behavior. ---")

    print("--- Baseline Model Training Finished ---")
    return model

print("Baseline model training function defined.")

Baseline model training function defined.


# 6. Execute Baseline Training

In [8]:
# Initializes the DataLoaders, instantiates the WeatherGRU model, trains it using the
# `train_baseline_model` function, and saves the final trained model state dictionary.

# Create DataLoaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
train_loader = DataLoader(train_dataset, batch_size=Config.BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=Config.BATCH_SIZE, shuffle=False)
builtin_print("Train and Validation DataLoaders created.")

# Initialize the WeatherGRU model
baseline_model = WeatherGRU(
    input_dim=Config.INPUT_DIM,
    hidden_dim=Config.HIDDEN_DIM,
    n_layers=Config.N_LAYERS,
    output_dim=Config.OUTPUT_DIM,
    dropout=Config.DROPOUT
)
builtin_print(f"Baseline WeatherGRU model initialized with {count_parameters(baseline_model):,} parameters.")

# Train the model
baseline_model_trained = train_baseline_model(
    model=baseline_model,
    train_loader=train_loader,
    val_loader=val_loader,
    config=Config # Pass the whole config object
)

# Save the state dictionary of the trained baseline model
try:
    # It's generally recommended to save only the state_dict
    torch.save(baseline_model_trained.state_dict(), Config.BASELINE_MODEL_SAVE_PATH)
    builtin_print(f"\nBaseline model state dictionary saved successfully to {Config.BASELINE_MODEL_SAVE_PATH}")
except Exception as e:
    builtin_print(f"\nError saving baseline model state dictionary: {e}")

Train and Validation DataLoaders created.
Baseline WeatherGRU model initialized with 596,225 parameters.

--- Starting Baseline Model Training ---


Epoch 1/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 1/500: Train Loss=0.6892, Val Loss=0.6840, LR=0.000100
  New best validation loss: 0.6840. Saving model state.


Epoch 2/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 2/500: Train Loss=0.6889, Val Loss=0.6774, LR=0.000100
  New best validation loss: 0.6774. Saving model state.


Epoch 3/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 3/500: Train Loss=0.6806, Val Loss=0.6705, LR=0.000100
  New best validation loss: 0.6705. Saving model state.


Epoch 4/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 4/500: Train Loss=0.6761, Val Loss=0.6532, LR=0.000099
  New best validation loss: 0.6532. Saving model state.


Epoch 5/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 5/500: Train Loss=0.6717, Val Loss=0.6311, LR=0.000098
  New best validation loss: 0.6311. Saving model state.


Epoch 6/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 6/500: Train Loss=0.6528, Val Loss=0.6143, LR=0.000098
  New best validation loss: 0.6143. Saving model state.


Epoch 7/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 7/500: Train Loss=0.6564, Val Loss=0.6023, LR=0.000097
  New best validation loss: 0.6023. Saving model state.


Epoch 8/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 8/500: Train Loss=0.6424, Val Loss=0.6137, LR=0.000095


Epoch 9/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 9/500: Train Loss=0.6447, Val Loss=0.6044, LR=0.000094


Epoch 10/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 10/500: Train Loss=0.6448, Val Loss=0.6142, LR=0.000092


Epoch 11/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 11/500: Train Loss=0.6554, Val Loss=0.6088, LR=0.000091


Epoch 12/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 12/500: Train Loss=0.6413, Val Loss=0.6110, LR=0.000089


Epoch 13/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 13/500: Train Loss=0.6448, Val Loss=0.6042, LR=0.000087


Epoch 14/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 14/500: Train Loss=0.6634, Val Loss=0.6105, LR=0.000084


Epoch 15/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 15/500: Train Loss=0.6445, Val Loss=0.6157, LR=0.000082


Epoch 16/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 16/500: Train Loss=0.6403, Val Loss=0.6079, LR=0.000080


Epoch 17/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 17/500: Train Loss=0.6458, Val Loss=0.6074, LR=0.000077


Epoch 18/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 18/500: Train Loss=0.6312, Val Loss=0.6068, LR=0.000074


Epoch 19/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 19/500: Train Loss=0.6342, Val Loss=0.6023, LR=0.000072


Epoch 20/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 20/500: Train Loss=0.6369, Val Loss=0.6033, LR=0.000069


Epoch 21/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 21/500: Train Loss=0.6452, Val Loss=0.6028, LR=0.000066


Epoch 22/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 22/500: Train Loss=0.6321, Val Loss=0.6042, LR=0.000063


Epoch 23/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 23/500: Train Loss=0.6424, Val Loss=0.6052, LR=0.000060


Epoch 24/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 24/500: Train Loss=0.6413, Val Loss=0.6066, LR=0.000057


Epoch 25/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 25/500: Train Loss=0.6425, Val Loss=0.6047, LR=0.000054


Epoch 26/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 26/500: Train Loss=0.6338, Val Loss=0.6053, LR=0.000050


Epoch 27/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 27/500: Train Loss=0.6474, Val Loss=0.6027, LR=0.000047


Epoch 28/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 28/500: Train Loss=0.6504, Val Loss=0.6056, LR=0.000044


Epoch 29/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 29/500: Train Loss=0.6428, Val Loss=0.6059, LR=0.000041


Epoch 30/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 30/500: Train Loss=0.6267, Val Loss=0.6051, LR=0.000038


Epoch 31/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 31/500: Train Loss=0.6322, Val Loss=0.6021, LR=0.000035
  New best validation loss: 0.6021. Saving model state.


Epoch 32/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 32/500: Train Loss=0.6403, Val Loss=0.6020, LR=0.000032
  New best validation loss: 0.6020. Saving model state.


Epoch 33/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 33/500: Train Loss=0.6501, Val Loss=0.6030, LR=0.000029


Epoch 34/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 34/500: Train Loss=0.6378, Val Loss=0.6070, LR=0.000027


Epoch 35/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 35/500: Train Loss=0.6386, Val Loss=0.6057, LR=0.000024


Epoch 36/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 36/500: Train Loss=0.6303, Val Loss=0.6056, LR=0.000021


Epoch 37/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 37/500: Train Loss=0.6480, Val Loss=0.6042, LR=0.000019


Epoch 38/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 38/500: Train Loss=0.6370, Val Loss=0.6063, LR=0.000017


Epoch 39/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 39/500: Train Loss=0.6418, Val Loss=0.6037, LR=0.000014


Epoch 40/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 40/500: Train Loss=0.6314, Val Loss=0.6028, LR=0.000012


Epoch 41/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 41/500: Train Loss=0.6353, Val Loss=0.6030, LR=0.000010


Epoch 42/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 42/500: Train Loss=0.6396, Val Loss=0.6038, LR=0.000009


Epoch 43/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 43/500: Train Loss=0.6243, Val Loss=0.6036, LR=0.000007


Epoch 44/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 44/500: Train Loss=0.6408, Val Loss=0.6031, LR=0.000006


Epoch 45/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 45/500: Train Loss=0.6425, Val Loss=0.6027, LR=0.000004


Epoch 46/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 46/500: Train Loss=0.6321, Val Loss=0.6029, LR=0.000003


Epoch 47/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 47/500: Train Loss=0.6390, Val Loss=0.6028, LR=0.000003


Epoch 48/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 48/500: Train Loss=0.6288, Val Loss=0.6028, LR=0.000002


Epoch 49/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 49/500: Train Loss=0.6345, Val Loss=0.6028, LR=0.000001


Epoch 50/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 50/500: Train Loss=0.6428, Val Loss=0.6028, LR=0.000001


Epoch 51/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 51/500: Train Loss=0.6385, Val Loss=0.6028, LR=0.000001


Epoch 52/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 52/500: Train Loss=0.6456, Val Loss=0.6027, LR=0.000001


Epoch 53/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 53/500: Train Loss=0.6362, Val Loss=0.6027, LR=0.000001


Epoch 54/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 54/500: Train Loss=0.6278, Val Loss=0.6027, LR=0.000002


Epoch 55/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 55/500: Train Loss=0.6330, Val Loss=0.6026, LR=0.000003


Epoch 56/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 56/500: Train Loss=0.6405, Val Loss=0.6025, LR=0.000003


Epoch 57/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 57/500: Train Loss=0.6365, Val Loss=0.6026, LR=0.000004


Epoch 58/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 58/500: Train Loss=0.6289, Val Loss=0.6029, LR=0.000006


Epoch 59/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 59/500: Train Loss=0.6525, Val Loss=0.6026, LR=0.000007


Epoch 60/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 60/500: Train Loss=0.6504, Val Loss=0.6043, LR=0.000009


Epoch 61/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 61/500: Train Loss=0.6340, Val Loss=0.6049, LR=0.000010


Epoch 62/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 62/500: Train Loss=0.6354, Val Loss=0.6036, LR=0.000012


Epoch 63/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 63/500: Train Loss=0.6412, Val Loss=0.6028, LR=0.000014


Epoch 64/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 64/500: Train Loss=0.6446, Val Loss=0.6036, LR=0.000017


Epoch 65/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 65/500: Train Loss=0.6425, Val Loss=0.6043, LR=0.000019


Epoch 66/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 66/500: Train Loss=0.6286, Val Loss=0.6039, LR=0.000021


Epoch 67/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 67/500: Train Loss=0.6326, Val Loss=0.6025, LR=0.000024


Epoch 68/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 68/500: Train Loss=0.6300, Val Loss=0.6029, LR=0.000027


Epoch 69/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 69/500: Train Loss=0.6415, Val Loss=0.6027, LR=0.000029


Epoch 70/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 70/500: Train Loss=0.6353, Val Loss=0.6017, LR=0.000032
  New best validation loss: 0.6017. Saving model state.


Epoch 71/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 71/500: Train Loss=0.6260, Val Loss=0.6044, LR=0.000035


Epoch 72/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 72/500: Train Loss=0.6480, Val Loss=0.6012, LR=0.000038
  New best validation loss: 0.6012. Saving model state.


Epoch 73/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 73/500: Train Loss=0.6445, Val Loss=0.6048, LR=0.000041


Epoch 74/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 74/500: Train Loss=0.6299, Val Loss=0.6057, LR=0.000044


Epoch 75/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 75/500: Train Loss=0.6351, Val Loss=0.6017, LR=0.000047


Epoch 76/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 76/500: Train Loss=0.6371, Val Loss=0.6022, LR=0.000050


Epoch 77/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 77/500: Train Loss=0.6419, Val Loss=0.6042, LR=0.000054


Epoch 78/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 78/500: Train Loss=0.6418, Val Loss=0.6041, LR=0.000057


Epoch 79/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 79/500: Train Loss=0.6464, Val Loss=0.6038, LR=0.000060


Epoch 80/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 80/500: Train Loss=0.6302, Val Loss=0.6075, LR=0.000063


Epoch 81/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 81/500: Train Loss=0.6317, Val Loss=0.6013, LR=0.000066


Epoch 82/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 82/500: Train Loss=0.6477, Val Loss=0.6069, LR=0.000069


Epoch 83/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 83/500: Train Loss=0.6374, Val Loss=0.6079, LR=0.000072


Epoch 84/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 84/500: Train Loss=0.6483, Val Loss=0.6039, LR=0.000074


Epoch 85/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 85/500: Train Loss=0.6381, Val Loss=0.6086, LR=0.000077


Epoch 86/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 86/500: Train Loss=0.6310, Val Loss=0.6030, LR=0.000080


Epoch 87/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 87/500: Train Loss=0.6422, Val Loss=0.6007, LR=0.000082
  New best validation loss: 0.6007. Saving model state.


Epoch 88/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 88/500: Train Loss=0.6368, Val Loss=0.6066, LR=0.000084


Epoch 89/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 89/500: Train Loss=0.6281, Val Loss=0.6059, LR=0.000087


Epoch 90/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 90/500: Train Loss=0.6586, Val Loss=0.6035, LR=0.000089


Epoch 91/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 91/500: Train Loss=0.6273, Val Loss=0.6088, LR=0.000091


Epoch 92/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 92/500: Train Loss=0.6232, Val Loss=0.6046, LR=0.000092


Epoch 93/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 93/500: Train Loss=0.6262, Val Loss=0.6023, LR=0.000094


Epoch 94/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 94/500: Train Loss=0.6413, Val Loss=0.6043, LR=0.000095


Epoch 95/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 95/500: Train Loss=0.6264, Val Loss=0.6067, LR=0.000097


Epoch 96/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 96/500: Train Loss=0.6206, Val Loss=0.6025, LR=0.000098


Epoch 97/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 97/500: Train Loss=0.6401, Val Loss=0.6037, LR=0.000098


Epoch 98/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 98/500: Train Loss=0.6291, Val Loss=0.6050, LR=0.000099


Epoch 99/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 99/500: Train Loss=0.6380, Val Loss=0.6081, LR=0.000100


Epoch 100/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 100/500: Train Loss=0.6478, Val Loss=0.6042, LR=0.000100


Epoch 101/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 101/500: Train Loss=0.6347, Val Loss=0.6076, LR=0.000100


Epoch 102/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 102/500: Train Loss=0.6350, Val Loss=0.6052, LR=0.000100


Epoch 103/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 103/500: Train Loss=0.6365, Val Loss=0.6016, LR=0.000100


Epoch 104/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 104/500: Train Loss=0.6230, Val Loss=0.6046, LR=0.000099


Epoch 105/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 105/500: Train Loss=0.6289, Val Loss=0.6038, LR=0.000098


Epoch 106/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 106/500: Train Loss=0.6372, Val Loss=0.5998, LR=0.000098
  New best validation loss: 0.5998. Saving model state.


Epoch 107/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 107/500: Train Loss=0.6313, Val Loss=0.6090, LR=0.000097


Epoch 108/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 108/500: Train Loss=0.6357, Val Loss=0.6071, LR=0.000095


Epoch 109/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 109/500: Train Loss=0.6275, Val Loss=0.6004, LR=0.000094


Epoch 110/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 110/500: Train Loss=0.6324, Val Loss=0.6009, LR=0.000092


Epoch 111/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 111/500: Train Loss=0.6177, Val Loss=0.5996, LR=0.000091
  New best validation loss: 0.5996. Saving model state.


Epoch 112/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 112/500: Train Loss=0.6180, Val Loss=0.6003, LR=0.000089


Epoch 113/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 113/500: Train Loss=0.6183, Val Loss=0.5999, LR=0.000087


Epoch 114/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 114/500: Train Loss=0.6246, Val Loss=0.5974, LR=0.000084
  New best validation loss: 0.5974. Saving model state.


Epoch 115/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 115/500: Train Loss=0.6239, Val Loss=0.5981, LR=0.000082


Epoch 116/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 116/500: Train Loss=0.6152, Val Loss=0.6022, LR=0.000080


Epoch 117/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 117/500: Train Loss=0.6180, Val Loss=0.5969, LR=0.000077
  New best validation loss: 0.5969. Saving model state.


Epoch 118/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 118/500: Train Loss=0.6201, Val Loss=0.5973, LR=0.000074


Epoch 119/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 119/500: Train Loss=0.6158, Val Loss=0.5996, LR=0.000072


Epoch 120/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 120/500: Train Loss=0.6401, Val Loss=0.5955, LR=0.000069
  New best validation loss: 0.5955. Saving model state.


Epoch 121/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 121/500: Train Loss=0.6192, Val Loss=0.6031, LR=0.000066


Epoch 122/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 122/500: Train Loss=0.6185, Val Loss=0.5984, LR=0.000063


Epoch 123/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 123/500: Train Loss=0.6251, Val Loss=0.5976, LR=0.000060


Epoch 124/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 124/500: Train Loss=0.6191, Val Loss=0.5965, LR=0.000057


Epoch 125/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 125/500: Train Loss=0.6207, Val Loss=0.5967, LR=0.000054


Epoch 126/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 126/500: Train Loss=0.6365, Val Loss=0.5986, LR=0.000051


Epoch 127/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 127/500: Train Loss=0.6154, Val Loss=0.5959, LR=0.000047


Epoch 128/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 128/500: Train Loss=0.6197, Val Loss=0.5962, LR=0.000044


Epoch 129/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 129/500: Train Loss=0.6176, Val Loss=0.6005, LR=0.000041


Epoch 130/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 130/500: Train Loss=0.6081, Val Loss=0.5946, LR=0.000038
  New best validation loss: 0.5946. Saving model state.


Epoch 131/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 131/500: Train Loss=0.6139, Val Loss=0.5957, LR=0.000035


Epoch 132/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 132/500: Train Loss=0.6220, Val Loss=0.5940, LR=0.000032
  New best validation loss: 0.5940. Saving model state.


Epoch 133/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 133/500: Train Loss=0.6113, Val Loss=0.5965, LR=0.000029


Epoch 134/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 134/500: Train Loss=0.6151, Val Loss=0.5952, LR=0.000027


Epoch 135/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 135/500: Train Loss=0.6160, Val Loss=0.5951, LR=0.000024


Epoch 136/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 136/500: Train Loss=0.6307, Val Loss=0.5972, LR=0.000021


Epoch 137/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 137/500: Train Loss=0.6175, Val Loss=0.5963, LR=0.000019


Epoch 138/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 138/500: Train Loss=0.6244, Val Loss=0.5969, LR=0.000017


Epoch 139/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 139/500: Train Loss=0.6176, Val Loss=0.5951, LR=0.000014


Epoch 140/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 140/500: Train Loss=0.6184, Val Loss=0.5947, LR=0.000012


Epoch 141/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 141/500: Train Loss=0.6151, Val Loss=0.5949, LR=0.000010


Epoch 142/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 142/500: Train Loss=0.6254, Val Loss=0.5948, LR=0.000009


Epoch 143/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 143/500: Train Loss=0.6011, Val Loss=0.5948, LR=0.000007


Epoch 144/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 144/500: Train Loss=0.6207, Val Loss=0.5951, LR=0.000006


Epoch 145/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 145/500: Train Loss=0.6064, Val Loss=0.5958, LR=0.000004


Epoch 146/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 146/500: Train Loss=0.6177, Val Loss=0.5956, LR=0.000003


Epoch 147/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 147/500: Train Loss=0.6108, Val Loss=0.5958, LR=0.000003


Epoch 148/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 148/500: Train Loss=0.6182, Val Loss=0.5954, LR=0.000002


Epoch 149/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 149/500: Train Loss=0.6161, Val Loss=0.5951, LR=0.000001


Epoch 150/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 150/500: Train Loss=0.6168, Val Loss=0.5949, LR=0.000001


Epoch 151/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 151/500: Train Loss=0.6090, Val Loss=0.5948, LR=0.000001


Epoch 152/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 152/500: Train Loss=0.6115, Val Loss=0.5947, LR=0.000001


Epoch 153/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 153/500: Train Loss=0.6130, Val Loss=0.5947, LR=0.000001


Epoch 154/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 154/500: Train Loss=0.6050, Val Loss=0.5945, LR=0.000002


Epoch 155/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 155/500: Train Loss=0.6182, Val Loss=0.5945, LR=0.000003


Epoch 156/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 156/500: Train Loss=0.6116, Val Loss=0.5942, LR=0.000003


Epoch 157/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 157/500: Train Loss=0.6246, Val Loss=0.5942, LR=0.000004


Epoch 158/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 158/500: Train Loss=0.6093, Val Loss=0.5939, LR=0.000006
  New best validation loss: 0.5939. Saving model state.


Epoch 159/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 159/500: Train Loss=0.6115, Val Loss=0.5946, LR=0.000007


Epoch 160/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 160/500: Train Loss=0.6068, Val Loss=0.5955, LR=0.000009


Epoch 161/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 161/500: Train Loss=0.6111, Val Loss=0.5945, LR=0.000010


Epoch 162/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 162/500: Train Loss=0.6114, Val Loss=0.5957, LR=0.000012


Epoch 163/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 163/500: Train Loss=0.6233, Val Loss=0.5952, LR=0.000014


Epoch 164/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 164/500: Train Loss=0.6123, Val Loss=0.5968, LR=0.000017


Epoch 165/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 165/500: Train Loss=0.6219, Val Loss=0.5948, LR=0.000019


Epoch 166/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 166/500: Train Loss=0.6149, Val Loss=0.5953, LR=0.000021


Epoch 167/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 167/500: Train Loss=0.6203, Val Loss=0.5923, LR=0.000024
  New best validation loss: 0.5923. Saving model state.


Epoch 168/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 168/500: Train Loss=0.6241, Val Loss=0.5963, LR=0.000027


Epoch 169/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 169/500: Train Loss=0.6066, Val Loss=0.5967, LR=0.000029


Epoch 170/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 170/500: Train Loss=0.6053, Val Loss=0.5921, LR=0.000032
  New best validation loss: 0.5921. Saving model state.


Epoch 171/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 171/500: Train Loss=0.6122, Val Loss=0.5927, LR=0.000035


Epoch 172/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 172/500: Train Loss=0.6245, Val Loss=0.5971, LR=0.000038


Epoch 173/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 173/500: Train Loss=0.6243, Val Loss=0.5943, LR=0.000041


Epoch 174/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 174/500: Train Loss=0.6321, Val Loss=0.5944, LR=0.000044


Epoch 175/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 175/500: Train Loss=0.6165, Val Loss=0.5973, LR=0.000047


Epoch 176/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 176/500: Train Loss=0.6181, Val Loss=0.5924, LR=0.000050


Epoch 177/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 177/500: Train Loss=0.6123, Val Loss=0.5927, LR=0.000054


Epoch 178/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 178/500: Train Loss=0.6065, Val Loss=0.5934, LR=0.000057


Epoch 179/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 179/500: Train Loss=0.6414, Val Loss=0.5905, LR=0.000060
  New best validation loss: 0.5905. Saving model state.


Epoch 180/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 180/500: Train Loss=0.6135, Val Loss=0.5992, LR=0.000063


Epoch 181/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 181/500: Train Loss=0.6153, Val Loss=0.5947, LR=0.000066


Epoch 182/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 182/500: Train Loss=0.6072, Val Loss=0.5917, LR=0.000069


Epoch 183/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 183/500: Train Loss=0.6150, Val Loss=0.5893, LR=0.000072
  New best validation loss: 0.5893. Saving model state.


Epoch 184/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 184/500: Train Loss=0.6014, Val Loss=0.5902, LR=0.000074


Epoch 185/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 185/500: Train Loss=0.6041, Val Loss=0.5867, LR=0.000077
  New best validation loss: 0.5867. Saving model state.


Epoch 186/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 186/500: Train Loss=0.6295, Val Loss=0.5904, LR=0.000080


Epoch 187/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 187/500: Train Loss=0.6197, Val Loss=0.5938, LR=0.000082


Epoch 188/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 188/500: Train Loss=0.6159, Val Loss=0.5898, LR=0.000084


Epoch 189/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 189/500: Train Loss=0.6123, Val Loss=0.5887, LR=0.000087


Epoch 190/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 190/500: Train Loss=0.6158, Val Loss=0.5882, LR=0.000089


Epoch 191/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 191/500: Train Loss=0.6000, Val Loss=0.5859, LR=0.000091
  New best validation loss: 0.5859. Saving model state.


Epoch 192/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 192/500: Train Loss=0.6015, Val Loss=0.5902, LR=0.000092


Epoch 193/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 193/500: Train Loss=0.6103, Val Loss=0.5789, LR=0.000094
  New best validation loss: 0.5789. Saving model state.


Epoch 194/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 194/500: Train Loss=0.6166, Val Loss=0.5882, LR=0.000095


Epoch 195/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 195/500: Train Loss=0.6139, Val Loss=0.5852, LR=0.000097


Epoch 196/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 196/500: Train Loss=0.6170, Val Loss=0.5865, LR=0.000098


Epoch 197/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 197/500: Train Loss=0.6065, Val Loss=0.5856, LR=0.000098


Epoch 198/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 198/500: Train Loss=0.5977, Val Loss=0.5826, LR=0.000099


Epoch 199/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 199/500: Train Loss=0.6334, Val Loss=0.5851, LR=0.000100


Epoch 200/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 200/500: Train Loss=0.5960, Val Loss=0.5854, LR=0.000100


Epoch 201/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 201/500: Train Loss=0.6208, Val Loss=0.5839, LR=0.000100


Epoch 202/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 202/500: Train Loss=0.6041, Val Loss=0.5852, LR=0.000100


Epoch 203/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 203/500: Train Loss=0.6002, Val Loss=0.6011, LR=0.000100


Epoch 204/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 204/500: Train Loss=0.6100, Val Loss=0.5811, LR=0.000099


Epoch 205/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 205/500: Train Loss=0.6201, Val Loss=0.5812, LR=0.000098


Epoch 206/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 206/500: Train Loss=0.6139, Val Loss=0.5938, LR=0.000098


Epoch 207/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 207/500: Train Loss=0.6062, Val Loss=0.5831, LR=0.000097


Epoch 208/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 208/500: Train Loss=0.6224, Val Loss=0.5878, LR=0.000095


Epoch 209/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 209/500: Train Loss=0.5977, Val Loss=0.5838, LR=0.000094


Epoch 210/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 210/500: Train Loss=0.5968, Val Loss=0.5806, LR=0.000092


Epoch 211/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 211/500: Train Loss=0.6089, Val Loss=0.5797, LR=0.000091


Epoch 212/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 212/500: Train Loss=0.5959, Val Loss=0.5864, LR=0.000089


Epoch 213/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 213/500: Train Loss=0.6026, Val Loss=0.5826, LR=0.000087


Epoch 214/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 214/500: Train Loss=0.6058, Val Loss=0.5783, LR=0.000084
  New best validation loss: 0.5783. Saving model state.


Epoch 215/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 215/500: Train Loss=0.6205, Val Loss=0.5848, LR=0.000082


Epoch 216/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 216/500: Train Loss=0.5987, Val Loss=0.5828, LR=0.000080


Epoch 217/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 217/500: Train Loss=0.6142, Val Loss=0.5849, LR=0.000077


Epoch 218/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 218/500: Train Loss=0.5907, Val Loss=0.5827, LR=0.000074


Epoch 219/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 219/500: Train Loss=0.5922, Val Loss=0.5786, LR=0.000072


Epoch 220/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 220/500: Train Loss=0.6063, Val Loss=0.5869, LR=0.000069


Epoch 221/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 221/500: Train Loss=0.6354, Val Loss=0.5847, LR=0.000066


Epoch 222/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 222/500: Train Loss=0.6001, Val Loss=0.5881, LR=0.000063


Epoch 223/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 223/500: Train Loss=0.5916, Val Loss=0.5798, LR=0.000060


Epoch 224/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 224/500: Train Loss=0.5947, Val Loss=0.5810, LR=0.000057


Epoch 225/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 225/500: Train Loss=0.6005, Val Loss=0.5785, LR=0.000054


Epoch 226/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 226/500: Train Loss=0.5925, Val Loss=0.5806, LR=0.000051


Epoch 227/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 227/500: Train Loss=0.6000, Val Loss=0.5813, LR=0.000047


Epoch 228/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 228/500: Train Loss=0.6017, Val Loss=0.5837, LR=0.000044


Epoch 229/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 229/500: Train Loss=0.5974, Val Loss=0.5789, LR=0.000041


Epoch 230/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 230/500: Train Loss=0.5910, Val Loss=0.5787, LR=0.000038


Epoch 231/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 231/500: Train Loss=0.5926, Val Loss=0.5834, LR=0.000035


Epoch 232/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 232/500: Train Loss=0.6081, Val Loss=0.5798, LR=0.000032


Epoch 233/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 233/500: Train Loss=0.5935, Val Loss=0.5792, LR=0.000029


Epoch 234/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 234/500: Train Loss=0.6020, Val Loss=0.5781, LR=0.000027
  New best validation loss: 0.5781. Saving model state.


Epoch 235/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 235/500: Train Loss=0.5972, Val Loss=0.5823, LR=0.000024


Epoch 236/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 236/500: Train Loss=0.5893, Val Loss=0.5801, LR=0.000021


Epoch 237/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 237/500: Train Loss=0.6030, Val Loss=0.5804, LR=0.000019


Epoch 238/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 238/500: Train Loss=0.5934, Val Loss=0.5811, LR=0.000017


Epoch 239/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 239/500: Train Loss=0.5992, Val Loss=0.5789, LR=0.000014


Epoch 240/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 240/500: Train Loss=0.5981, Val Loss=0.5800, LR=0.000012


Epoch 241/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 241/500: Train Loss=0.5887, Val Loss=0.5828, LR=0.000010


Epoch 242/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 242/500: Train Loss=0.6335, Val Loss=0.5814, LR=0.000009


Epoch 243/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 243/500: Train Loss=0.5906, Val Loss=0.5804, LR=0.000007


Epoch 244/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 244/500: Train Loss=0.5959, Val Loss=0.5805, LR=0.000006


Epoch 245/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 245/500: Train Loss=0.5905, Val Loss=0.5817, LR=0.000004


Epoch 246/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 246/500: Train Loss=0.6032, Val Loss=0.5815, LR=0.000003


Epoch 247/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 247/500: Train Loss=0.5956, Val Loss=0.5815, LR=0.000003


Epoch 248/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 248/500: Train Loss=0.5964, Val Loss=0.5815, LR=0.000002


Epoch 249/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 249/500: Train Loss=0.5903, Val Loss=0.5813, LR=0.000001


Epoch 250/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 250/500: Train Loss=0.5841, Val Loss=0.5814, LR=0.000001


Epoch 251/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 251/500: Train Loss=0.5924, Val Loss=0.5812, LR=0.000001


Epoch 252/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 252/500: Train Loss=0.5858, Val Loss=0.5811, LR=0.000001


Epoch 253/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 253/500: Train Loss=0.5872, Val Loss=0.5811, LR=0.000001


Epoch 254/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 254/500: Train Loss=0.5918, Val Loss=0.5808, LR=0.000002


Epoch 255/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 255/500: Train Loss=0.5848, Val Loss=0.5811, LR=0.000003


Epoch 256/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 256/500: Train Loss=0.6048, Val Loss=0.5811, LR=0.000003


Epoch 257/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 257/500: Train Loss=0.6001, Val Loss=0.5807, LR=0.000004


Epoch 258/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 258/500: Train Loss=0.5871, Val Loss=0.5806, LR=0.000006


Epoch 259/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 259/500: Train Loss=0.5980, Val Loss=0.5803, LR=0.000007


Epoch 260/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 260/500: Train Loss=0.5881, Val Loss=0.5809, LR=0.000009


Epoch 261/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 261/500: Train Loss=0.5937, Val Loss=0.5819, LR=0.000010


Epoch 262/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 262/500: Train Loss=0.6017, Val Loss=0.5819, LR=0.000012


Epoch 263/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 263/500: Train Loss=0.5838, Val Loss=0.5787, LR=0.000014


Epoch 264/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 264/500: Train Loss=0.6066, Val Loss=0.5799, LR=0.000017


Epoch 265/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 265/500: Train Loss=0.6026, Val Loss=0.5817, LR=0.000019


Epoch 266/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 266/500: Train Loss=0.5888, Val Loss=0.5827, LR=0.000021


Epoch 267/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 267/500: Train Loss=0.5892, Val Loss=0.5812, LR=0.000024


Epoch 268/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 268/500: Train Loss=0.5953, Val Loss=0.5795, LR=0.000027


Epoch 269/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 269/500: Train Loss=0.6013, Val Loss=0.5796, LR=0.000029


Epoch 270/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 270/500: Train Loss=0.5944, Val Loss=0.5823, LR=0.000032


Epoch 271/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 271/500: Train Loss=0.5958, Val Loss=0.5813, LR=0.000035


Epoch 272/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 272/500: Train Loss=0.5948, Val Loss=0.5816, LR=0.000038


Epoch 273/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 273/500: Train Loss=0.5834, Val Loss=0.5782, LR=0.000041


Epoch 274/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 274/500: Train Loss=0.5955, Val Loss=0.5789, LR=0.000044


Epoch 275/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 275/500: Train Loss=0.5838, Val Loss=0.5822, LR=0.000047


Epoch 276/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 276/500: Train Loss=0.5938, Val Loss=0.5795, LR=0.000051


Epoch 277/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 277/500: Train Loss=0.5839, Val Loss=0.5814, LR=0.000054


Epoch 278/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 278/500: Train Loss=0.5934, Val Loss=0.5783, LR=0.000057


Epoch 279/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 279/500: Train Loss=0.5987, Val Loss=0.5810, LR=0.000060


Epoch 280/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 280/500: Train Loss=0.5913, Val Loss=0.5818, LR=0.000063


Epoch 281/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 281/500: Train Loss=0.5979, Val Loss=0.5824, LR=0.000066


Epoch 282/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 282/500: Train Loss=0.5866, Val Loss=0.5802, LR=0.000069


Epoch 283/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 283/500: Train Loss=0.5985, Val Loss=0.5870, LR=0.000072


Epoch 284/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 284/500: Train Loss=0.6043, Val Loss=0.5800, LR=0.000074


Epoch 285/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 285/500: Train Loss=0.5934, Val Loss=0.5828, LR=0.000077


Epoch 286/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 286/500: Train Loss=0.5966, Val Loss=0.5814, LR=0.000080


Epoch 287/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 287/500: Train Loss=0.6042, Val Loss=0.5862, LR=0.000082


Epoch 288/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 288/500: Train Loss=0.5981, Val Loss=0.5775, LR=0.000084
  New best validation loss: 0.5775. Saving model state.


Epoch 289/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 289/500: Train Loss=0.5861, Val Loss=0.5875, LR=0.000087


Epoch 290/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 290/500: Train Loss=0.5951, Val Loss=0.5790, LR=0.000089


Epoch 291/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 291/500: Train Loss=0.5911, Val Loss=0.5846, LR=0.000091


Epoch 292/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 292/500: Train Loss=0.6058, Val Loss=0.5796, LR=0.000092


Epoch 293/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 293/500: Train Loss=0.5973, Val Loss=0.5863, LR=0.000094


Epoch 294/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 294/500: Train Loss=0.5914, Val Loss=0.5802, LR=0.000095


Epoch 295/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 295/500: Train Loss=0.6236, Val Loss=0.5833, LR=0.000097


Epoch 296/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 296/500: Train Loss=0.5950, Val Loss=0.5977, LR=0.000098


Epoch 297/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 297/500: Train Loss=0.5995, Val Loss=0.5830, LR=0.000098


Epoch 298/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 298/500: Train Loss=0.5987, Val Loss=0.5864, LR=0.000099


Epoch 299/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 299/500: Train Loss=0.6344, Val Loss=0.5850, LR=0.000100


Epoch 300/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 300/500: Train Loss=0.5987, Val Loss=0.5851, LR=0.000100


Epoch 301/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 301/500: Train Loss=0.6038, Val Loss=0.5827, LR=0.000100


Epoch 302/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 302/500: Train Loss=0.5814, Val Loss=0.5830, LR=0.000100


Epoch 303/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 303/500: Train Loss=0.6008, Val Loss=0.5877, LR=0.000100


Epoch 304/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 304/500: Train Loss=0.5991, Val Loss=0.5828, LR=0.000099


Epoch 305/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 305/500: Train Loss=0.5824, Val Loss=0.5874, LR=0.000098


Epoch 306/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 306/500: Train Loss=0.5906, Val Loss=0.5823, LR=0.000098


Epoch 307/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 307/500: Train Loss=0.5926, Val Loss=0.5839, LR=0.000097


Epoch 308/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 308/500: Train Loss=0.5811, Val Loss=0.5822, LR=0.000095


Epoch 309/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 309/500: Train Loss=0.5941, Val Loss=0.5869, LR=0.000094


Epoch 310/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 310/500: Train Loss=0.6003, Val Loss=0.5816, LR=0.000092


Epoch 311/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 311/500: Train Loss=0.6045, Val Loss=0.5882, LR=0.000091


Epoch 312/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 312/500: Train Loss=0.5907, Val Loss=0.5845, LR=0.000089


Epoch 313/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 313/500: Train Loss=0.5918, Val Loss=0.5843, LR=0.000087


Epoch 314/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 314/500: Train Loss=0.5970, Val Loss=0.5851, LR=0.000084


Epoch 315/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 315/500: Train Loss=0.5890, Val Loss=0.5829, LR=0.000082


Epoch 316/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 316/500: Train Loss=0.5904, Val Loss=0.5861, LR=0.000080


Epoch 317/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 317/500: Train Loss=0.5832, Val Loss=0.5815, LR=0.000077


Epoch 318/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 318/500: Train Loss=0.6223, Val Loss=0.5808, LR=0.000074


Epoch 319/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 319/500: Train Loss=0.5995, Val Loss=0.5840, LR=0.000072


Epoch 320/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 320/500: Train Loss=0.5987, Val Loss=0.5921, LR=0.000069


Epoch 321/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 321/500: Train Loss=0.5930, Val Loss=0.5843, LR=0.000066


Epoch 322/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 322/500: Train Loss=0.5839, Val Loss=0.5809, LR=0.000063


Epoch 323/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 323/500: Train Loss=0.5860, Val Loss=0.5834, LR=0.000060


Epoch 324/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 324/500: Train Loss=0.5795, Val Loss=0.5853, LR=0.000057


Epoch 325/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 325/500: Train Loss=0.5970, Val Loss=0.5809, LR=0.000054


Epoch 326/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 326/500: Train Loss=0.5783, Val Loss=0.5826, LR=0.000051


Epoch 327/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 327/500: Train Loss=0.5920, Val Loss=0.5831, LR=0.000047


Epoch 328/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 328/500: Train Loss=0.5872, Val Loss=0.5836, LR=0.000044


Epoch 329/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 329/500: Train Loss=0.5958, Val Loss=0.5827, LR=0.000041


Epoch 330/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 330/500: Train Loss=0.5798, Val Loss=0.5815, LR=0.000038


Epoch 331/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 331/500: Train Loss=0.5973, Val Loss=0.5826, LR=0.000035


Epoch 332/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 332/500: Train Loss=0.5889, Val Loss=0.5841, LR=0.000032


Epoch 333/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 333/500: Train Loss=0.5829, Val Loss=0.5837, LR=0.000029


Epoch 334/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 334/500: Train Loss=0.5701, Val Loss=0.5850, LR=0.000027


Epoch 335/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 335/500: Train Loss=0.5794, Val Loss=0.5833, LR=0.000024


Epoch 336/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 336/500: Train Loss=0.6023, Val Loss=0.5845, LR=0.000021


Epoch 337/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 337/500: Train Loss=0.5839, Val Loss=0.5829, LR=0.000019


Epoch 338/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 338/500: Train Loss=0.5969, Val Loss=0.5830, LR=0.000017


Epoch 339/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 339/500: Train Loss=0.5707, Val Loss=0.5836, LR=0.000014


Epoch 340/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 340/500: Train Loss=0.5804, Val Loss=0.5836, LR=0.000012


Epoch 341/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 341/500: Train Loss=0.5882, Val Loss=0.5831, LR=0.000010


Epoch 342/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 342/500: Train Loss=0.5755, Val Loss=0.5836, LR=0.000009


Epoch 343/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 343/500: Train Loss=0.5707, Val Loss=0.5837, LR=0.000007


Epoch 344/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 344/500: Train Loss=0.5745, Val Loss=0.5833, LR=0.000006


Epoch 345/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 345/500: Train Loss=0.5931, Val Loss=0.5831, LR=0.000004


Epoch 346/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 346/500: Train Loss=0.5816, Val Loss=0.5829, LR=0.000003


Epoch 347/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 347/500: Train Loss=0.5809, Val Loss=0.5829, LR=0.000003


Epoch 348/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 348/500: Train Loss=0.5725, Val Loss=0.5829, LR=0.000002


Epoch 349/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 349/500: Train Loss=0.5943, Val Loss=0.5829, LR=0.000001


Epoch 350/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 350/500: Train Loss=0.5958, Val Loss=0.5832, LR=0.000001


Epoch 351/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 351/500: Train Loss=0.6019, Val Loss=0.5832, LR=0.000001


Epoch 352/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 352/500: Train Loss=0.5960, Val Loss=0.5831, LR=0.000001


Epoch 353/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 353/500: Train Loss=0.5918, Val Loss=0.5833, LR=0.000001


Epoch 354/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 354/500: Train Loss=0.5920, Val Loss=0.5835, LR=0.000002


Epoch 355/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 355/500: Train Loss=0.5854, Val Loss=0.5838, LR=0.000003


Epoch 356/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 356/500: Train Loss=0.5980, Val Loss=0.5838, LR=0.000003


Epoch 357/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 357/500: Train Loss=0.5923, Val Loss=0.5840, LR=0.000004


Epoch 358/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 358/500: Train Loss=0.5964, Val Loss=0.5841, LR=0.000006


Epoch 359/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 359/500: Train Loss=0.5853, Val Loss=0.5841, LR=0.000007


Epoch 360/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 360/500: Train Loss=0.5897, Val Loss=0.5843, LR=0.000009


Epoch 361/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 361/500: Train Loss=0.5931, Val Loss=0.5842, LR=0.000010


Epoch 362/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 362/500: Train Loss=0.5887, Val Loss=0.5856, LR=0.000012


Epoch 363/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 363/500: Train Loss=0.5756, Val Loss=0.5845, LR=0.000014


Epoch 364/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 364/500: Train Loss=0.5916, Val Loss=0.5833, LR=0.000017


Epoch 365/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 365/500: Train Loss=0.5890, Val Loss=0.5845, LR=0.000019


Epoch 366/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 366/500: Train Loss=0.5770, Val Loss=0.5857, LR=0.000021


Epoch 367/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 367/500: Train Loss=0.5747, Val Loss=0.5836, LR=0.000024


Epoch 368/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 368/500: Train Loss=0.5833, Val Loss=0.5836, LR=0.000027


Epoch 369/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 369/500: Train Loss=0.5919, Val Loss=0.5859, LR=0.000029


Epoch 370/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 370/500: Train Loss=0.6008, Val Loss=0.5832, LR=0.000032


Epoch 371/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 371/500: Train Loss=0.5921, Val Loss=0.5883, LR=0.000035


Epoch 372/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 372/500: Train Loss=0.5945, Val Loss=0.5863, LR=0.000038


Epoch 373/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 373/500: Train Loss=0.6037, Val Loss=0.5862, LR=0.000041


Epoch 374/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 374/500: Train Loss=0.5977, Val Loss=0.5841, LR=0.000044


Epoch 375/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 375/500: Train Loss=0.5648, Val Loss=0.5869, LR=0.000047


Epoch 376/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 376/500: Train Loss=0.5876, Val Loss=0.5844, LR=0.000051


Epoch 377/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 377/500: Train Loss=0.5825, Val Loss=0.5847, LR=0.000054


Epoch 378/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 378/500: Train Loss=0.5835, Val Loss=0.5829, LR=0.000057


Epoch 379/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 379/500: Train Loss=0.5802, Val Loss=0.5834, LR=0.000060


Epoch 380/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 380/500: Train Loss=0.5875, Val Loss=0.5879, LR=0.000063


Epoch 381/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 381/500: Train Loss=0.5786, Val Loss=0.5859, LR=0.000066


Epoch 382/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 382/500: Train Loss=0.5821, Val Loss=0.5847, LR=0.000069


Epoch 383/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 383/500: Train Loss=0.5917, Val Loss=0.5838, LR=0.000072


Epoch 384/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 384/500: Train Loss=0.5770, Val Loss=0.5853, LR=0.000074


Epoch 385/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 385/500: Train Loss=0.5799, Val Loss=0.5866, LR=0.000077


Epoch 386/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 386/500: Train Loss=0.5758, Val Loss=0.5848, LR=0.000080


Epoch 387/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 387/500: Train Loss=0.5724, Val Loss=0.5842, LR=0.000082


Epoch 388/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 388/500: Train Loss=0.5918, Val Loss=0.5930, LR=0.000084


Epoch 389/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 389/500: Train Loss=0.5790, Val Loss=0.5858, LR=0.000087


Epoch 390/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 390/500: Train Loss=0.6094, Val Loss=0.5863, LR=0.000089


Epoch 391/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 391/500: Train Loss=0.5773, Val Loss=0.5876, LR=0.000091


Epoch 392/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 392/500: Train Loss=0.5931, Val Loss=0.5898, LR=0.000092


Epoch 393/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 393/500: Train Loss=0.5967, Val Loss=0.5868, LR=0.000094


Epoch 394/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 394/500: Train Loss=0.5806, Val Loss=0.5918, LR=0.000095


Epoch 395/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 395/500: Train Loss=0.5724, Val Loss=0.5858, LR=0.000097


Epoch 396/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 396/500: Train Loss=0.5919, Val Loss=0.5876, LR=0.000098


Epoch 397/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 397/500: Train Loss=0.5952, Val Loss=0.5926, LR=0.000098


Epoch 398/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 398/500: Train Loss=0.6039, Val Loss=0.5939, LR=0.000099


Epoch 399/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 399/500: Train Loss=0.5695, Val Loss=0.5859, LR=0.000100


Epoch 400/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 400/500: Train Loss=0.5730, Val Loss=0.5866, LR=0.000100


Epoch 401/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 401/500: Train Loss=0.5772, Val Loss=0.5856, LR=0.000100


Epoch 402/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 402/500: Train Loss=0.5762, Val Loss=0.5894, LR=0.000100


Epoch 403/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 403/500: Train Loss=0.5876, Val Loss=0.5847, LR=0.000100


Epoch 404/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 404/500: Train Loss=0.5959, Val Loss=0.5911, LR=0.000099


Epoch 405/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 405/500: Train Loss=0.5905, Val Loss=0.5906, LR=0.000098


Epoch 406/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 406/500: Train Loss=0.6033, Val Loss=0.5912, LR=0.000098


Epoch 407/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 407/500: Train Loss=0.5871, Val Loss=0.5888, LR=0.000097


Epoch 408/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 408/500: Train Loss=0.5875, Val Loss=0.5869, LR=0.000095


Epoch 409/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 409/500: Train Loss=0.5776, Val Loss=0.5852, LR=0.000094


Epoch 410/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 410/500: Train Loss=0.5675, Val Loss=0.5875, LR=0.000092


Epoch 411/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 411/500: Train Loss=0.5835, Val Loss=0.5868, LR=0.000091


Epoch 412/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 412/500: Train Loss=0.5762, Val Loss=0.5877, LR=0.000089


Epoch 413/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 413/500: Train Loss=0.5850, Val Loss=0.5878, LR=0.000087


Epoch 414/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 414/500: Train Loss=0.5860, Val Loss=0.5876, LR=0.000084


Epoch 415/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 415/500: Train Loss=0.5922, Val Loss=0.5924, LR=0.000082


Epoch 416/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 416/500: Train Loss=0.5845, Val Loss=0.5885, LR=0.000080


Epoch 417/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 417/500: Train Loss=0.5737, Val Loss=0.5878, LR=0.000077


Epoch 418/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 418/500: Train Loss=0.5816, Val Loss=0.5867, LR=0.000074


Epoch 419/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 419/500: Train Loss=0.5935, Val Loss=0.5904, LR=0.000072


Epoch 420/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 420/500: Train Loss=0.5753, Val Loss=0.5885, LR=0.000069


Epoch 421/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 421/500: Train Loss=0.5763, Val Loss=0.5873, LR=0.000066


Epoch 422/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 422/500: Train Loss=0.5964, Val Loss=0.5875, LR=0.000063


Epoch 423/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 423/500: Train Loss=0.5868, Val Loss=0.5885, LR=0.000060


Epoch 424/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 424/500: Train Loss=0.5751, Val Loss=0.5917, LR=0.000057


Epoch 425/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 425/500: Train Loss=0.5785, Val Loss=0.5896, LR=0.000054


Epoch 426/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 426/500: Train Loss=0.5670, Val Loss=0.5878, LR=0.000051


Epoch 427/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 427/500: Train Loss=0.5976, Val Loss=0.5863, LR=0.000047


Epoch 428/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 428/500: Train Loss=0.5745, Val Loss=0.5892, LR=0.000044


Epoch 429/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 429/500: Train Loss=0.5766, Val Loss=0.5888, LR=0.000041


Epoch 430/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 430/500: Train Loss=0.5687, Val Loss=0.5882, LR=0.000038


Epoch 431/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 431/500: Train Loss=0.5871, Val Loss=0.5883, LR=0.000035


Epoch 432/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 432/500: Train Loss=0.5721, Val Loss=0.5918, LR=0.000032


Epoch 433/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 433/500: Train Loss=0.5632, Val Loss=0.5885, LR=0.000029


Epoch 434/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 434/500: Train Loss=0.5961, Val Loss=0.5890, LR=0.000027


Epoch 435/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 435/500: Train Loss=0.5769, Val Loss=0.5900, LR=0.000024


Epoch 436/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 436/500: Train Loss=0.5721, Val Loss=0.5894, LR=0.000021


Epoch 437/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 437/500: Train Loss=0.5809, Val Loss=0.5886, LR=0.000019


Epoch 438/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 438/500: Train Loss=0.5720, Val Loss=0.5888, LR=0.000017


Epoch 439/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 439/500: Train Loss=0.5638, Val Loss=0.5894, LR=0.000014


Epoch 440/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 440/500: Train Loss=0.5666, Val Loss=0.5881, LR=0.000012


Epoch 441/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 441/500: Train Loss=0.5874, Val Loss=0.5876, LR=0.000010


Epoch 442/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 442/500: Train Loss=0.5698, Val Loss=0.5881, LR=0.000009


Epoch 443/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 443/500: Train Loss=0.5791, Val Loss=0.5884, LR=0.000007


Epoch 444/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 444/500: Train Loss=0.5810, Val Loss=0.5884, LR=0.000006


Epoch 445/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 445/500: Train Loss=0.5859, Val Loss=0.5885, LR=0.000004


Epoch 446/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 446/500: Train Loss=0.5674, Val Loss=0.5886, LR=0.000003


Epoch 447/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 447/500: Train Loss=0.5804, Val Loss=0.5884, LR=0.000003


Epoch 448/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 448/500: Train Loss=0.5656, Val Loss=0.5883, LR=0.000002


Epoch 449/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 449/500: Train Loss=0.5813, Val Loss=0.5883, LR=0.000001


Epoch 450/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 450/500: Train Loss=0.5834, Val Loss=0.5884, LR=0.000001


Epoch 451/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 451/500: Train Loss=0.5764, Val Loss=0.5883, LR=0.000001


Epoch 452/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 452/500: Train Loss=0.5658, Val Loss=0.5882, LR=0.000001


Epoch 453/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 453/500: Train Loss=0.5942, Val Loss=0.5881, LR=0.000001


Epoch 454/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 454/500: Train Loss=0.5824, Val Loss=0.5880, LR=0.000002


Epoch 455/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 455/500: Train Loss=0.5773, Val Loss=0.5880, LR=0.000003


Epoch 456/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 456/500: Train Loss=0.5826, Val Loss=0.5881, LR=0.000003


Epoch 457/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 457/500: Train Loss=0.5770, Val Loss=0.5885, LR=0.000004


Epoch 458/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 458/500: Train Loss=0.5734, Val Loss=0.5885, LR=0.000006


Epoch 459/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 459/500: Train Loss=0.5737, Val Loss=0.5880, LR=0.000007


Epoch 460/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 460/500: Train Loss=0.5615, Val Loss=0.5877, LR=0.000009


Epoch 461/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 461/500: Train Loss=0.5816, Val Loss=0.5872, LR=0.000010


Epoch 462/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 462/500: Train Loss=0.5878, Val Loss=0.5881, LR=0.000012


Epoch 463/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 463/500: Train Loss=0.5970, Val Loss=0.5884, LR=0.000014


Epoch 464/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 464/500: Train Loss=0.5760, Val Loss=0.5893, LR=0.000017


Epoch 465/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 465/500: Train Loss=0.5754, Val Loss=0.5886, LR=0.000019


Epoch 466/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 466/500: Train Loss=0.5876, Val Loss=0.5887, LR=0.000021


Epoch 467/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 467/500: Train Loss=0.5719, Val Loss=0.5908, LR=0.000024


Epoch 468/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 468/500: Train Loss=0.5829, Val Loss=0.5885, LR=0.000027


Epoch 469/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 469/500: Train Loss=0.5897, Val Loss=0.5893, LR=0.000029


Epoch 470/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 470/500: Train Loss=0.5818, Val Loss=0.5903, LR=0.000032


Epoch 471/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 471/500: Train Loss=0.5884, Val Loss=0.5887, LR=0.000035


Epoch 472/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 472/500: Train Loss=0.5876, Val Loss=0.5900, LR=0.000038


Epoch 473/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 473/500: Train Loss=0.5723, Val Loss=0.5889, LR=0.000041


Epoch 474/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 474/500: Train Loss=0.5765, Val Loss=0.5867, LR=0.000044


Epoch 475/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 475/500: Train Loss=0.5966, Val Loss=0.5857, LR=0.000047


Epoch 476/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 476/500: Train Loss=0.6066, Val Loss=0.5865, LR=0.000050


Epoch 477/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 477/500: Train Loss=0.5712, Val Loss=0.5865, LR=0.000054


Epoch 478/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 478/500: Train Loss=0.6143, Val Loss=0.5864, LR=0.000057


Epoch 479/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 479/500: Train Loss=0.5641, Val Loss=0.5892, LR=0.000060


Epoch 480/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 480/500: Train Loss=0.5776, Val Loss=0.5887, LR=0.000063


Epoch 481/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 481/500: Train Loss=0.5733, Val Loss=0.5879, LR=0.000066


Epoch 482/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 482/500: Train Loss=0.5714, Val Loss=0.5868, LR=0.000069


Epoch 483/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 483/500: Train Loss=0.5680, Val Loss=0.5883, LR=0.000072


Epoch 484/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 484/500: Train Loss=0.5790, Val Loss=0.5885, LR=0.000074


Epoch 485/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 485/500: Train Loss=0.5871, Val Loss=0.5870, LR=0.000077


Epoch 486/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 486/500: Train Loss=0.5913, Val Loss=0.5879, LR=0.000080


Epoch 487/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 487/500: Train Loss=0.5597, Val Loss=0.5883, LR=0.000082


Epoch 488/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 488/500: Train Loss=0.5742, Val Loss=0.5898, LR=0.000084


Epoch 489/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 489/500: Train Loss=0.5732, Val Loss=0.5867, LR=0.000087


Epoch 490/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 490/500: Train Loss=0.5962, Val Loss=0.5874, LR=0.000089


Epoch 491/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 491/500: Train Loss=0.5914, Val Loss=0.5873, LR=0.000091


Epoch 492/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 492/500: Train Loss=0.5727, Val Loss=0.5858, LR=0.000092


Epoch 493/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 493/500: Train Loss=0.5940, Val Loss=0.5906, LR=0.000094


Epoch 494/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 494/500: Train Loss=0.5947, Val Loss=0.5941, LR=0.000095


Epoch 495/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 495/500: Train Loss=0.5694, Val Loss=0.5873, LR=0.000097


Epoch 496/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 496/500: Train Loss=0.5815, Val Loss=0.5846, LR=0.000098


Epoch 497/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 497/500: Train Loss=0.5856, Val Loss=0.5887, LR=0.000098


Epoch 498/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 498/500: Train Loss=0.5646, Val Loss=0.5865, LR=0.000099


Epoch 499/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 499/500: Train Loss=0.5727, Val Loss=0.5917, LR=0.000100


Epoch 500/500 [Train]:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 500/500: Train Loss=0.5994, Val Loss=0.5913, LR=0.000100

--- Best model state loaded (Validation Loss: 0.5775) ---
--- Baseline Model Training Finished ---

Baseline model state dictionary saved successfully to /kaggle/working/baseline_model.pth
