In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import truncnorm
import math
from tqdm.auto import tqdm
import sys
import os
import scipy
from scipy.stats import linregress
from joblib import Parallel, delayed
import time
import h5py

from LQ_RL import *

# Config

In [None]:
config = {
    'initial_n':0, 'N':200000,
    'initial_phi_1':-0.5, 'initial_phi_2':5, 
    'A': 1, 'B': 1, 'C': 1, 'D': 1,
    'Q': 1, 'H': 1, 'x_0': 1, 'T': 1, 'dt': 1/100, 
    'lr_rate':0.75, 'phi_1_rate':0.25, 'phi_2_rate':0.25, 'batch_rate':None,
    'initial_lr1': 0.05, 'initial_gamma': 1, 'initial_batch':1, 
    'initial_seed': None
}

In [None]:
simulator = LQ_RL_Simulator(**config)

# Load Data

In [None]:
file_path = '../Data_Model_Free/'

In [None]:
data = {}

with h5py.File(file_path + 'Algo_full_results.h5', 'r') as f:
    for seed, group in f.items():
        data[int(seed)] = {k: np.array(v) for k, v in group.items()}

# Check All Results

## Convergence Rate

In [None]:
def plot_log_mse(data, star, title, log_iter_start, log_iter_end, lower_percentile, upper_percentile, initial_n):
    mse_all = (np.array(data) - star) ** 2
    avg_mse = np.mean(mse_all, axis=0)
    lower_bound = np.percentile(mse_all, lower_percentile, axis=0)
    upper_bound = np.percentile(mse_all, upper_percentile, axis=0)
    log_mse = np.log(avg_mse)
    log_iterations = np.log(np.arange(initial_n + 1, initial_n + len(avg_mse) + 1))
    
    fig, ax = plt.subplots(figsize=(8, 6))
    ax.plot(log_iterations, log_mse, label='Log MSE')
    ax.fill_between(log_iterations, np.log(lower_bound), np.log(upper_bound), color='gray', alpha=0.3, label=f'{lower_percentile}th-{upper_percentile}th Percentile')

    # Perform regression in the specified range
    valid_indices = (log_iterations >= log_iter_start) & (log_iterations <= log_iter_end)
    slope, intercept, _, _, _ = linregress(log_iterations[valid_indices], log_mse[valid_indices])

    # Compute regression line for **all** log_iterations
    full_regression_line = slope * log_iterations + intercept

    # Plot regression line across all log iterations
    ax.plot(log_iterations, full_regression_line, 'r', label=f'Linear Regression\ny = {slope:.2f}x {"+" if intercept >= 0 else "-"} {abs(intercept):.2f}')
    
    # Formatting updates
    ax.set_xlabel('Log Episodes', fontsize=18)
    ax.set_ylabel(f'Log MSE of {title}', fontsize=18)
    ax.tick_params(axis='both', labelsize=18)  # Increase tick label size
    ax.legend(fontsize=16)  # Increase legend font size
    ax.set_title("")  # Set title to empty
    ax.grid(True)
    
    return fig

In [None]:
phi_1_values = np.array([result['phi_1_list'] for result in data.values()])

In [None]:
fig1 = plot_log_mse(phi_1_values, -2, '$\phi_1$', np.log(5000), np.log(200000), 25, 75, config.get('initial_n'))

In [None]:
fig1.savefig('phi1_log_mse_model-free.png')

## Regret

In [None]:
def plot_log_regret(simulator, phi_1_values, phi_2_values, phi_1_star, phi_2_star, title, log_iter_start, log_iter_end, lower_percentile, upper_percentile, initial_n, use_median=False):
    all_regrets = []
    for phi_1_mean, phi_2_mean in zip(phi_1_values, phi_2_values):
        actual_value = simulator.j_hat(np.array(phi_1_mean), np.array(phi_2_mean))
        regret = simulator.j_hat(phi_1_star, phi_2_star) - actual_value
        all_regrets.append(np.cumsum(regret))
    
    avg_regret = np.median(all_regrets, axis=0) if use_median else np.mean(all_regrets, axis=0)
    lower_bound = np.percentile(all_regrets, lower_percentile, axis=0)
    upper_bound = np.percentile(all_regrets, upper_percentile, axis=0)
    
    log_avg_regret = np.log(avg_regret)
    log_iterations = np.log(np.arange(initial_n + 1, initial_n + len(log_avg_regret) + 1))
    
    fig, ax = plt.subplots(figsize=(8, 6))
    ax.plot(log_iterations, log_avg_regret, label='Log Regret')
    ax.fill_between(log_iterations, np.log(lower_bound), np.log(upper_bound), color='gray', alpha=0.3, label=f'{lower_percentile}th-{upper_percentile}th Percentile')

    # Perform regression in the specified range
    valid_indices = (log_iterations >= log_iter_start) & (log_iterations <= log_iter_end)
    slope, intercept, _, _, _ = linregress(log_iterations[valid_indices], log_avg_regret[valid_indices])

    # Compute regression line for **all** log_iterations
    full_regression_line = slope * log_iterations + intercept

    # Plot regression line across all log iterations
    ax.plot(log_iterations, full_regression_line, 'r', label=f'Linear Regression\ny = {slope:.2f}x + {intercept:.2f}')
    
    # Formatting updates
    ax.set_xlabel('Log Episodes', fontsize=18)
    ax.set_ylabel(f'Log Expected Regret', fontsize=18)
    ax.tick_params(axis='both', labelsize=18)  # Increase tick label size
    ax.legend(fontsize=16)  # Increase legend font size
    ax.set_title("")  # Set title to empty
    ax.grid(True)
    
    return fig, all_regrets

In [None]:
phi_2_values = np.array([result['phi_2_list'] for result in data.values()])

In [None]:
fig2, all_regrets = plot_log_regret(simulator, phi_1_values, phi_2_values, -2, 0, '',
                np.log(5000), np.log(200000), 25, 75, config.get('initial_n'), use_median=False)

In [None]:
fig2.savefig('log_regret_model-free.png')