# Setup

In [2]:
import os
import sys
import numpy as np
import pandas as pd


# Check if the original directory is already saved in a variable
if 'original_directory' not in globals():
    # Save the original working directory the first time
    original_directory = os.getcwd()

# Change back to the original directory whenever the cell is executed
os.chdir(original_directory)

# Go to mother directory
os.chdir("../")

# Verify the current working directory
print("Working directory set to:", os.getcwd())

sys.path.append(os.path.abspath(os.getcwd()))

Working directory set to: c:\Users\Biebert\OneDrive - Universität St.Gallen\Dokumente\OneDrive Dokumente\02_Bildung\01_BVWL Bachelor\01_Kurse\07_Thesis\Code\Portfolio_Optimization_DDPG


In [121]:
from data_scripts import generate_data as gd
from data_scripts import plotting as pl
import datetime

tickers = ['MSFT', 'TGT', 'QCOM', 'MU', 'CAT']

# Load simulation data for the selected stocks
stock_data_dict = gd.load_simulation_data(tickers)

# Get the combined training simulation data for the selected stocks
combined_train_data = gd.get_combined_simulation(stock_data_dict, simulation_index=0, set_type='train')


# Assuming combined_train_data is a NumPy array or a DataFrame
num_days_train, num_ep_train = data = stock_data_dict[tickers[0]]['train'].shape
num_days_test, num_ep_test = data = stock_data_dict[tickers[0]]['test'].shape

def generate_time():
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    return f"{timestamp}"

In [122]:
#raise Exception("Skip this cell")

import random
import numpy as np
from trading_envs.trading_env import TradingEnv
from models.ddpg_agent import Agent
from tqdm import tqdm

save_num = 8100

# Initialize variables
train_simulations = num_ep_train
test_simulations = num_ep_test
train_days = num_days_train
test_days = num_days_test

num_episodes = 3001
max_steps_per_episode = train_days 

reward_function = 'portfolio_value'

# Initialize the environment and the agent
train_simulation_data = gd.get_combined_simulation(stock_data_dict, simulation_index=0, set_type='train')
env = TradingEnv(stock_data=train_simulation_data, reward_function=reward_function)
agent = Agent(alpha=0.0001, beta=0.001, input_dims=[env.observation_space.shape[0]], 
                tau=0.001, env=env, batch_size=64, layer1_size=400, layer2_size=300, 
                n_actions=env.action_space.shape[0])

agent.load_models(suffix='8804_20241029_015052_portfolio_value')

# Initialize DataFrames
episode_scores_df = pd.DataFrame(columns=['Episode', 'Score'])
wealth_df = pd.DataFrame(columns=['Episode', 'TimeStep', 'Simulation', 'Wealth'])
actions_columns = ['Episode', 'TimeStep'] + [f'Action_{i}' for i in range(env.action_space.shape[0])]
actions_df = pd.DataFrame(columns=actions_columns)
states_columns = ['Episode', 'TimeStep'] + [f'State_{i}' for i in range(env.observation_space.shape[0])]
states_df = pd.DataFrame(columns=states_columns)
rewards_df = pd.DataFrame(columns=['Episode', 'TimeStep', 'Reward'])

# Training loop only one time
for episode in tqdm(range(num_episodes), desc="Training Progress"):
    episode_wealth = []
    episode_actions = []
    episode_states = []
    episode_rewards = []

    # Reset environment and variables
    train_simulation_index = random.choice(range(train_simulations))
    train_simulation_data = gd.get_combined_simulation(stock_data_dict, simulation_index=train_simulation_index, set_type='train')
    # Set the new environment with this training data
    env = TradingEnv(stock_data=train_simulation_data, reward_function=reward_function)

    state, _ = env.reset()
    done = False
    score = 0
    time_step = 0

    while not done:
        action = agent.choose_action(state)
        state_, reward, done, _, _ = env.step(action)
        agent.remember(state, action, reward, state_, done)
        agent.learn()
        score += reward

        # Collect data
        episode_wealth.append({
            'Episode': episode, 
            'TimeStep': time_step, 
            'Simulation': train_simulation_index,
            'Wealth': env.get_portfolio_value()
        })

        action_record = {'Episode': episode, 'TimeStep': time_step}
        action_record.update({f'Action_{i}': a for i, a in enumerate(action)})
        episode_actions.append(action_record)

        state_record = {'Episode': episode, 'TimeStep': time_step}
        state_record.update({f'State_{i}': s for i, s in enumerate(state)})
        episode_states.append(state_record)

        episode_rewards.append({
            'Episode': episode, 
            'TimeStep': time_step, 
            'Reward': reward
        })

        state = state_
        time_step += 1

    # Append episode data to DataFrames
    episode_scores_df = pd.concat([episode_scores_df, pd.DataFrame([{'Episode': episode, 'Score': score}])], ignore_index=True)
    wealth_df = pd.concat([wealth_df, pd.DataFrame(episode_wealth)], ignore_index=True)
    actions_df = pd.concat([actions_df, pd.DataFrame(episode_actions)], ignore_index=True)
    states_df = pd.concat([states_df, pd.DataFrame(episode_states)], ignore_index=True)
    rewards_df = pd.concat([rewards_df, pd.DataFrame(episode_rewards)], ignore_index=True)

    if episode % 1000 == 0 and episode > 999:


        time = generate_time()
        time_first = time
        agent.save_models(suffix=f'{save_num}_{time}_{reward_function}')

        # Save with unique filename
        episode_scores_df.to_csv(f'data_save/{save_num}_episode_scores_{time}_{reward_function}.csv', index=False)
        wealth_df.to_csv(f'data_save/{save_num}_wealth_over_time_{time}_{reward_function}.csv', index=False)
        actions_df.to_csv(f'data_save/{save_num}_actions_taken_{time}_{reward_function}.csv', index=False)
        states_df.to_csv(f'data_save/{save_num}_states_observed_{time}_{reward_function}.csv', index=False)
        rewards_df.to_csv(f'data_save/{save_num}_rewards_received_{time}_{reward_function}.csv', index=False)

        save_num += 1




... loading checkpoint ...
... loading checkpoint ...
... loading checkpoint ...
... loading checkpoint ...


Training Progress:  33%|███▎      | 1000/3001 [5:48:02<11:53:45, 21.40s/it]

... saving checkpoint ...
... saving checkpoint ...
... saving checkpoint ...
... saving checkpoint ...


Training Progress:  67%|██████▋   | 2000/3001 [12:04:37<5:28:09, 19.67s/it]

... saving checkpoint ...
... saving checkpoint ...
... saving checkpoint ...
... saving checkpoint ...


Training Progress: 100%|█████████▉| 3000/3001 [16:51:34<00:16, 16.51s/it]   

... saving checkpoint ...
... saving checkpoint ...
... saving checkpoint ...
... saving checkpoint ...


Training Progress: 100%|██████████| 3001/3001 [16:53:17<00:00, 20.26s/it]


In [123]:
import random
import numpy as np
from trading_envs.trading_env import TradingEnv
from models.ddpg_agent import Agent
from tqdm import tqdm

save_num = 9100

# Initialize variables
train_simulations = num_ep_train
test_simulations = num_ep_test
train_days = num_days_train
test_days = num_days_test

num_episodes = 3001
max_steps_per_episode = train_days 

reward_function = 'diff_sharpe_reward'


# Initialize the environment and the agent
train_simulation_data = gd.get_combined_simulation(stock_data_dict, simulation_index=1, set_type='train')
env = TradingEnv(stock_data=train_simulation_data, reward_function=reward_function)
agent = Agent(alpha=0.0001, beta=0.001, input_dims=[env.observation_space.shape[0]], 
                tau=0.001, env=env, batch_size=64, layer1_size=400, layer2_size=300, 
                n_actions=env.action_space.shape[0])

agent.load_models(suffix='9904_20241029_232625_diff_sharpe_reward')

# Initialize DataFrames
episode_scores_df = pd.DataFrame(columns=['Episode', 'Score'])
wealth_df = pd.DataFrame(columns=['Episode', 'TimeStep', 'Simulation', 'Wealth'])
actions_columns = ['Episode', 'TimeStep'] + [f'Action_{i}' for i in range(env.action_space.shape[0])]
actions_df = pd.DataFrame(columns=actions_columns)
states_columns = ['Episode', 'TimeStep'] + [f'State_{i}' for i in range(env.observation_space.shape[0])]
states_df = pd.DataFrame(columns=states_columns)
rewards_df = pd.DataFrame(columns=['Episode', 'TimeStep', 'Reward'])

# Training loop only one time
for episode in tqdm(range(num_episodes), desc="Training Progress"):
    episode_wealth = []
    episode_actions = []
    episode_states = []
    episode_rewards = []

    # Reset environment and variables
    train_simulation_index = random.choice(range(train_simulations))
    train_simulation_data = gd.get_combined_simulation(stock_data_dict, simulation_index=train_simulation_index, set_type='train')
    
    # Set the new environment with this training data
    env = TradingEnv(stock_data=train_simulation_data, reward_function=reward_function)
    state, _ = env.reset()
    done = False
    score = 0
    time_step = 0

    while not done:
        action = agent.choose_action(state)
        state_, reward, done, _, _ = env.step(action)
        agent.remember(state, action, reward, state_, done)
        agent.learn()
        score += reward

        # Collect data
        episode_wealth.append({
            'Episode': episode, 
            'TimeStep': time_step, 
            'Simulation': train_simulation_index,
            'Wealth': env.get_portfolio_value()
        })

        action_record = {'Episode': episode, 'TimeStep': time_step}
        action_record.update({f'Action_{i}': a for i, a in enumerate(action)})
        episode_actions.append(action_record)

        state_record = {'Episode': episode, 'TimeStep': time_step}
        state_record.update({f'State_{i}': s for i, s in enumerate(state)})
        episode_states.append(state_record)

        episode_rewards.append({
            'Episode': episode, 
            'TimeStep': time_step, 
            'Reward': reward
        })

        state = state_
        time_step += 1

    # Append episode data to DataFrames
    episode_scores_df = pd.concat([episode_scores_df, pd.DataFrame([{'Episode': episode, 'Score': score}])], ignore_index=True)
    wealth_df = pd.concat([wealth_df, pd.DataFrame(episode_wealth)], ignore_index=True)
    actions_df = pd.concat([actions_df, pd.DataFrame(episode_actions)], ignore_index=True)
    states_df = pd.concat([states_df, pd.DataFrame(episode_states)], ignore_index=True)
    rewards_df = pd.concat([rewards_df, pd.DataFrame(episode_rewards)], ignore_index=True)

    if episode % 1000 == 0 and episode > 999:


        time = generate_time()
        time_first = time
        agent.save_models(suffix=f'{save_num}_{time}_{reward_function}')

        # Save with unique filename
        episode_scores_df.to_csv(f'data_save/{save_num}_episode_scores_{time}_{reward_function}.csv', index=False)
        wealth_df.to_csv(f'data_save/{save_num}_wealth_over_time_{time}_{reward_function}.csv', index=False)
        actions_df.to_csv(f'data_save/{save_num}_actions_taken_{time}_{reward_function}.csv', index=False)
        states_df.to_csv(f'data_save/{save_num}_states_observed_{time}_{reward_function}.csv', index=False)
        rewards_df.to_csv(f'data_save/{save_num}_rewards_received_{time}_{reward_function}.csv', index=False)

        save_num += 1


... loading checkpoint ...
... loading checkpoint ...
... loading checkpoint ...
... loading checkpoint ...


Training Progress:  33%|███▎      | 1000/3001 [4:54:51<10:59:20, 19.77s/it]

... saving checkpoint ...
... saving checkpoint ...
... saving checkpoint ...
... saving checkpoint ...


Training Progress:  67%|██████▋   | 2000/3001 [10:13:46<4:57:37, 17.84s/it]

... saving checkpoint ...
... saving checkpoint ...
... saving checkpoint ...
... saving checkpoint ...


Training Progress: 100%|█████████▉| 3000/3001 [16:07:51<00:28, 28.48s/it]  

... saving checkpoint ...
... saving checkpoint ...
... saving checkpoint ...
... saving checkpoint ...


Training Progress: 100%|██████████| 3001/3001 [16:11:17<00:00, 19.42s/it]


# Training performance

# Test

## First

Calculate the surplus and the market and agent wealth for each episode

In [93]:
from tqdm import tqdm
import numpy as np
from data_scripts import generate_data as gd
from data_scripts import plotting as pl

# Initialize an empty list to store the surplus values for each simulation
surplus = []
markets = []
agents = []

# Read csv data once 
#agent_wealth = pd.read_csv(f'data_save/9904_wealth_over_time_20241029_232625_diff_sharpe_reward.csv')
#states = pd.read_csv(f'data_save/9904_states_observed_20241029_232625_diff_sharpe_reward_value.csv')

agent_wealth = pd.read_csv(f'data_save/8804_wealth_over_time_20241029_015052_portfolio_value.csv')
states = pd.read_csv(f'data_save/8804_states_observed_20241029_015052_portfolio_value.csv')


#agent_wealth = pd.read_csv(f'data_save/100004_wealth_over_time_20241025_183434_portfolio_value.csv')
#states = pd.read_csv(f'data_save/100004_states_observed_20241025_183434_portfolio_value.csv')

# Loop over each simulation index
for i in tqdm(range(agent_wealth['Episode'].max()), desc="Simulations Progress"):
    # Filter agent wealth data for the current episode once
    agent_wealth_e = agent_wealth[agent_wealth['Episode'] == i].set_index('TimeStep')
    prices = states[states['Episode'] == i].set_index('TimeStep').filter(like='State_').values[:, :6]

    # Preallocate the portfolio surplus list
    portfolio_surplus = np.empty(len(prices))
    market = np.empty(len(prices))
    agent = np.empty(len(prices))

    # Iterate over the time steps
    for n in range(0, len(prices)):
        # Retrieve agent wealth data for the current timestep
        agent_wealth_day = agent_wealth_e.loc[n, 'Wealth']

        # Calculate the current market value
        market_price_day = prices[n]

        # Distribute 1000 equally among the 5 stocks and cash with the prices from teh first day
        dist_vec = 1000/ len(market_price_day) / prices[0]

        market_wealth_day = np.sum(market_price_day * dist_vec)

        # Calculate wealth difference (agent wealth - calculated wealth)
        wealth_diff = agent_wealth_day - market_wealth_day

        # Store the wealth difference
        portfolio_surplus[n] = np.round(wealth_diff, 2)
        market[n] = np.round(market_wealth_day, 2)  # Round market value to 2 decimal places
        agent[n] = np.round(agent_wealth_day, 2)


    # Append the portfolio surplus for this simulation to the overall surplus list
    surplus.append(portfolio_surplus)
    markets.append(market)
    agents.append(agent)
    


Simulations Progress: 100%|██████████| 5000/5000 [03:47<00:00, 21.98it/s]


### Surplus in training

In [67]:
import plotly.graph_objects as go
import numpy as np
def plot_surplus(surplus, title, start_index=0):
    # Create a figure
    fig = go.Figure()

    # Set up colors ranging from light to dark blue
    num_episodes = len(surplus)
    color_gradient = np.linspace(0.2, 1, num_episodes)  # Generates values from 0.2 (light blue) to 1 (dark blue)

    # Plot each episode's surplus data with progressively darker blue
    for i in tqdm(range(start_index, num_episodes), desc="Simulations Progress"):
        fig.add_trace(go.Scatter(
            y=surplus[i],
            mode='lines',
            name=f'Episode {i+1}',
            line=dict(color=f'rgba(0, 0, 255, {color_gradient[i]})', width=2)  # Increasingly dark blue
        ))

    # Update layout to make it interactive and clear
    fig.update_layout(
        title="Surplus Over Time for Each Episode First",
        xaxis_title="Time Step",
        yaxis_title="Surplus",
        hovermode="x unified",  # Shows all episode values at a given timestep
        showlegend=True
    )

    # Show the interactive plot
    fig.show()


start_idx = max(0, 4900)
plot_surplus(surplus, "Surplus Over Time for Each Episode First", start_index=start_idx)


Simulations Progress: 100%|██████████| 100/100 [00:00<00:00, 1901.91it/s]


### Average in training

In [68]:
import plotly.graph_objects as go
import numpy as np
# Function to plot the average agent wealth and market wealth across all simulations
def plot_average_wealth(surplus, markets, agents, title, start_index=0):
    # Calculate the average market and agent wealth across all simulations
    recent_markets = markets[start_index:]
    recent_agents = agents[start_index:]

    avg_market_wealth = np.mean(recent_markets, axis=0)
    avg_agent_wealth = np.mean(recent_agents, axis=0)

    # Create the figure
    fig = go.Figure()

    # Plot average market wealth
    fig.add_trace(
        go.Scatter(
            y=avg_market_wealth,
            mode='lines',
            name='Average Market Wealth',
            line=dict(color='blue', width=2)
        )
    )

    # Plot average agent wealth
    fig.add_trace(
        go.Scatter(
            y=avg_agent_wealth,
            mode='lines',
            name='Average Agent Wealth',
            line=dict(color='green', width=2)
        )
    )

    # Update layout
    fig.update_layout(
        title=title + str(len(recent_markets))+ ' Simulations',
        xaxis_title="Time Steps",
        yaxis_title="Wealth",
        hovermode="x unified",  # Improves interactivity
        showlegend=True,
        height=600
    )

    # Show the interactive plot
    fig.show()


start_idx = max(0, 4900)
plot_average_wealth(surplus,markets,agents, "Average Market and Agent Wealth Across ", start_idx)


### Sharp

In [94]:
import numpy as np
from tqdm import tqdm

# Initialize lists to store Sharpe ratios for each episode for both the agent and the market
agent_sharpe_ratios = []
market_sharpe_ratios = []

# Loop over each episode using the already populated `markets` and `agents` lists
for episode_index in tqdm(range(len(markets)), desc="Calculating Sharpe Ratios"):
    # Get the agent and market values for the current episode
    agent_values = agents[episode_index]
    market_values = markets[episode_index]
    
    # Calculate returns for agent portfolio and market for each timestep in this episode
    agent_returns = np.diff(agent_values) / agent_values[:-1]
    market_returns = np.diff(market_values) / market_values[:-1]
    
    # Calculate the Sharpe ratio for the agent's portfolio in this episode
    agent_sharpe_ratio = agent_returns.mean() / agent_returns.std() if agent_returns.std() != 0 else 0
    agent_sharpe_ratios.append(agent_sharpe_ratio)
    
    # Calculate the Sharpe ratio for the market in this episode
    market_sharpe_ratio = market_returns.mean() / market_returns.std() if market_returns.std() != 0 else 0
    market_sharpe_ratios.append(market_sharpe_ratio)

# Calculate the average Sharpe ratios across all episodes for both agent and market
average_agent_sharpe_ratio = np.mean(agent_sharpe_ratios)
average_market_sharpe_ratio = np.mean(market_sharpe_ratios)

# Print the comparison
print(f"Average Sharpe Ratio for Agent across all simulations: {average_agent_sharpe_ratio:.4f}")
print(f"Average Sharpe Ratio for Market across all simulations: {average_market_sharpe_ratio:.4f}")


Calculating Sharpe Ratios: 100%|██████████| 5000/5000 [00:00<00:00, 11204.06it/s]

Average Sharpe Ratio for Agent across all simulations: 0.0525
Average Sharpe Ratio for Market across all simulations: 0.0786





### Surplus in test

In [69]:
from data_scripts import generate_data as gd
from data_scripts import plotting as pl
import datetime

tickers = ['MSFT', 'TGT', 'QCOM', 'MU', 'CAT']

# Load simulation data for the selected stocks
stock_data_dict = gd.load_simulation_data(tickers)

# Get the combined training simulation data for the selected stocks
combined_train_data = gd.get_combined_simulation(stock_data_dict, simulation_index=0, set_type='train')


# Assuming combined_train_data is a NumPy array or a DataFrame
num_days_train, num_ep_train = data = stock_data_dict[tickers[0]]['train'].shape
num_days_test, num_ep_test = data = stock_data_dict[tickers[0]]['test'].shape

def generate_time():
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    return f"{timestamp}"

In [71]:
#raise Exception("Skip this cell")

import random
import numpy as np
from trading_envs.trading_env import TradingEnv
from models.ddpg_agent import Agent
from tqdm import tqdm

# Test on new test data
reward_function = 'portfolio_value'
test_simulations = num_ep_test
test_days = num_days_test
test_simulation_data = gd.get_combined_simulation(stock_data_dict, simulation_index=0, set_type='test')
env_test = TradingEnv(stock_data=test_simulation_data, reward_function=reward_function)


agent = Agent(alpha=0.0001, beta=0.001, input_dims=[env_test.observation_space.shape[0]], 
                tau=0.001, env=env_test, batch_size=64, layer1_size=400, layer2_size=300, 
                n_actions=env_test.action_space.shape[0])

# Load the trained model # actor_ddpg100010_20241026_163341_portfolio_value 
agent.load_models(suffix=f'8804_20241029_015052_portfolio_value')  # Load the most recent saved model

actions_columns = ['Episode', 'TimeStep'] + [f'Action_{i}' for i in range(env_test.action_space.shape[0])]

# DataFrames to store test results
test_wealth_df = pd.DataFrame(columns=['Episode', 'TimeStep', 'Simulation', 'Wealth'])
test_actions_df = pd.DataFrame(columns=actions_columns)
test_rewards_df = pd.DataFrame(columns=['Episode', 'TimeStep', 'Reward'])
states_columns = ['Episode', 'TimeStep'] + [f'State_{i}' for i in range(env_test.observation_space.shape[0])]
test_states_df = pd.DataFrame(columns=states_columns)

# Training loop only one time
for episode in tqdm(range(test_simulations), desc="Testing Progress"):
    episode_wealth = []
    episode_actions = []
    episode_states = []
    episode_rewards = []

    # Reset environment and variables
    test_simulation_data = gd.get_combined_simulation(stock_data_dict, simulation_index=episode, set_type='test')
    # Set the new environment with this training data
    env = TradingEnv(stock_data=test_simulation_data, reward_function=reward_function)

    state, _ = env.reset()
    done = False
    score = 0
    time_step = 0

    while not done:
        action = agent.choose_action(state)
        state_, reward, done, _, _ = env.step(action)
        agent.remember(state, action, reward, state_, done)
        agent.learn()
        score += reward

        # Collect data
        episode_wealth.append({
            'Episode': episode, 
            'TimeStep': time_step, 
            'Simulation': episode,
            'Wealth': env.get_portfolio_value()
        })

        state_record = {'Episode': episode, 'TimeStep': time_step}
        state_record.update({f'State_{i}': s for i, s in enumerate(state)})
        episode_states.append(state_record)

        state = state_
        time_step += 1

    # Append episode data to DataFrames
    test_wealth_df = pd.concat([test_wealth_df, pd.DataFrame(episode_wealth)], ignore_index=True)
    test_states_df = pd.concat([test_states_df, pd.DataFrame(episode_states)], ignore_index=True)



time = generate_time()
time_first = time

# Save with unique filename
test_wealth_df.to_csv(f'data_save/wealth_over_time_test_{time}_{reward_function}.csv', index=False)
test_states_df.to_csv(f'data_save/states_observed_test_{time}_{reward_function}.csv', index=False)

save_num += 1




... loading checkpoint ...
... loading checkpoint ...
... loading checkpoint ...
... loading checkpoint ...


Testing Progress: 100%|██████████| 100/100 [25:10<00:00, 15.10s/it]


testing

In [103]:
from tqdm import tqdm
import numpy as np
import pandas as pd

# Initialize lists to store surplus, market, and agent values for each simulation
surplus = []
markets = []
agents = []

# Load CSV data once
agent_wealth = pd.read_csv('data_save/wealth_over_time_test_20241030_011304_portfolio_value.csv')
states = pd.read_csv('data_save/states_observed_test_20241030_011304_portfolio_value.csv')

# wealth_over_time_test_20241030_101410_diff_sharpe_reward

#agent_wealth = pd.read_csv(f'data_save/wealth_over_time_test_{time}_{reward_function}.csv')
#states = pd.read_csv(f'data_save/states_observed_test_{time}_{reward_function}.csv')

# Loop over each simulation index
for i in tqdm(range(agent_wealth['Episode'].max()), desc="Simulations Progress"):
    # Filter agent wealth data for the current episode
    agent_wealth_e = agent_wealth[agent_wealth['Episode'] == i].set_index('TimeStep')
    prices = states[states['Episode'] == i].set_index('TimeStep').filter(like='State_').values[:, :6]

    # Preallocate arrays for each simulation's surplus
    portfolio_surplus = np.empty(len(prices))
    market = np.empty(len(prices))
    agent = np.empty(len(prices))

    # Iterate over the time steps
    for n in range(len(prices)):
        # Retrieve agent wealth data for the current timestep
        agent_wealth_day = agent_wealth_e.loc[n, 'Wealth']
        
        # Ensure it's a scalar value
        if isinstance(agent_wealth_day, pd.Series):
            agent_wealth_day = agent_wealth_day.iloc[0]

        # Calculate the current market value
        market_price_day = prices[n]

        # Distribute 1000 equally among the 5 stocks and cash with prices from the first day
        dist_vec = 1000 / len(market_price_day) / prices[0]
        market_wealth_day = np.sum(market_price_day * dist_vec)

        # Ensure market_wealth_day is a scalar value
        if isinstance(market_wealth_day, np.ndarray):
            market_wealth_day = market_wealth_day.item()

        # Calculate wealth difference (agent wealth - calculated wealth)
        wealth_diff = agent_wealth_day - market_wealth_day

        # Store the wealth difference in the preallocated arrays
        portfolio_surplus[n] = np.round(wealth_diff, 2)
        market[n] = np.round(market_wealth_day, 2)
        agent[n] = np.round(agent_wealth_day, 2)

    # Append the results for this simulation to the overall lists
    surplus.append(portfolio_surplus)
    markets.append(market)
    agents.append(agent)


Simulations Progress: 100%|██████████| 99/99 [00:08<00:00, 11.74it/s]


In [75]:
import plotly.graph_objects as go
import numpy as np
# Function to plot the average agent wealth and market wealth across all simulations
def plot_average_wealth(surplus, markets, agents, title, start_index=0):
    # Calculate the average market and agent wealth across all simulations
    recent_markets = markets[start_index:]
    recent_agents = agents[start_index:]

    avg_market_wealth = np.mean(recent_markets, axis=0)
    avg_agent_wealth = np.mean(recent_agents, axis=0)

    # Create the figure
    fig = go.Figure()

    # Plot average market wealth
    fig.add_trace(
        go.Scatter(
            y=avg_market_wealth,
            mode='lines',
            name='Average Market Wealth',
            line=dict(color='blue', width=2)
        )
    )

    # Plot average agent wealth
    fig.add_trace(
        go.Scatter(
            y=avg_agent_wealth,
            mode='lines',
            name='Average Agent Wealth',
            line=dict(color='green', width=2)
        )
    )

    # Update layout
    fig.update_layout(
        title=title + str(len(recent_markets))+ ' Simulations',
        xaxis_title="Time Steps",
        yaxis_title="Wealth",
        hovermode="x unified",  # Improves interactivity
        showlegend=True,
        height=600
    )

    # Show the interactive plot
    fig.show()


start_idx = max(0, 0)
plot_average_wealth(surplus,markets,agents, "Average Market and Agent Wealth Across test", start_idx)


### Sharp

In [105]:
import numpy as np
from tqdm import tqdm

# Initialize lists to store Sharpe ratios for each episode for both the agent and the market
agent_sharpe_ratios = []
market_sharpe_ratios = []

# Loop over each episode using the already populated `markets` and `agents` lists
for episode_index in tqdm(range(len(markets)), desc="Calculating Sharpe Ratios"):
    # Get the agent and market values for the current episode
    agent_values = agents[episode_index]
    market_values = markets[episode_index]
    
    # Calculate returns for agent portfolio and market for each timestep in this episode
    agent_returns = np.diff(agent_values) / agent_values[:-1]
    market_returns = np.diff(market_values) / market_values[:-1]
    
    # Calculate the Sharpe ratio for the agent's portfolio in this episode
    agent_sharpe_ratio = agent_returns.mean() / agent_returns.std() if agent_returns.std() != 0 else 0
    agent_sharpe_ratios.append(agent_sharpe_ratio)
    
    # Calculate the Sharpe ratio for the market in this episode
    market_sharpe_ratio = market_returns.mean() / market_returns.std() if market_returns.std() != 0 else 0
    market_sharpe_ratios.append(market_sharpe_ratio)

# Calculate the average Sharpe ratios across all episodes for both agent and market
average_agent_sharpe_ratio = np.mean(agent_sharpe_ratios)
average_market_sharpe_ratio = np.mean(market_sharpe_ratios)

# Print the comparison
print(f"Testing 1 Average Sharpe Ratio for Agent across all simulations: {average_agent_sharpe_ratio:.4f}")
print(f"Testing 1 Average Sharpe Ratio for Market across all simulations: {average_market_sharpe_ratio:.4f}")


Calculating Sharpe Ratios: 100%|██████████| 99/99 [00:00<00:00, 4524.60it/s]

Testing 1 Average Sharpe Ratio for Agent across all simulations: 0.0543
Testing 1 Average Sharpe Ratio for Market across all simulations: 0.0778





## Second

In [124]:
from tqdm import tqdm
import numpy as np
from data_scripts import generate_data as gd
from data_scripts import plotting as pl

# Initialize an empty list to store the surplus values for each simulation
surplus_2 = []
markets_2 = []
agents_2 = []

# Read csv data once ###200011_20241027_145013_diff_sharpe_reward
agent_wealth_2 = pd.read_csv(f'data_save/9102_wealth_over_time_20241101_210054_diff_sharpe_reward.csv')
states_2 = pd.read_csv(f'data_save/9102_states_observed_20241101_210054_diff_sharpe_reward.csv')

# Loop over each simulation index
for i in tqdm(range(agent_wealth_2['Episode'].max()), desc="Simulations Progress"):
    # Filter agent wealth data for the current episode once
    agent_wealth_e_2 = agent_wealth_2[agent_wealth_2['Episode'] == i].set_index('TimeStep')
    prices_2 = states_2[states_2['Episode'] == i].set_index('TimeStep').filter(like='State_').values[:, :6]

    # Preallocate the portfolio surplus list
    portfolio_surplus_2 = np.empty(len(prices_2))
    market_2 = np.empty(len(prices_2))
    agent_2 = np.empty(len(prices_2))

    # Iterate over the time steps
    for n in range(0, len(prices)):
        # Retrieve agent wealth data for the current timestep
        agent_wealth_day_2 = agent_wealth_e_2.loc[n, 'Wealth']

        # Calculate the current market value
        market_price_day_2 = prices_2[n]

        # Distribute 1000 equally among the 5 stocks and cash with the prices from teh first day
        dist_vec_2 = 1000/ len(market_price_day_2) / prices_2[0]

        market_wealth_day_2 = np.sum(market_price_day_2 * dist_vec_2)

        # Calculate wealth difference (agent wealth - calculated wealth)
        wealth_diff_2 = agent_wealth_day_2 - market_wealth_day_2

        # Store the wealth difference
        portfolio_surplus_2[n] = np.round(wealth_diff_2, 2)
        market_2[n] = np.round(market_wealth_day_2, 2)  # Round market value to 2 decimal places
        agent_2[n] = np.round(agent_wealth_day_2, 2)


    # Append the portfolio surplus for this simulation to the overall surplus list
    surplus_2.append(portfolio_surplus_2)
    markets_2.append(market_2)
    agents_2.append(agent_2)
    


Simulations Progress: 100%|██████████| 3000/3000 [03:05<00:00, 16.20it/s]


### Surplus training

In [127]:
import plotly.graph_objects as go
import numpy as np
def plot_surplus(surplus, title, start_index=0):
    # Create a figure
    fig = go.Figure()

    # Set up colors ranging from light to dark blue
    num_episodes = len(surplus)
    color_gradient = np.linspace(0.2, 1, num_episodes)  # Generates values from 0.2 (light blue) to 1 (dark blue)

    # Plot each episode's surplus data with progressively darker blue
    for i in tqdm(range(start_index, num_episodes), desc="Simulations Progress"):
        fig.add_trace(go.Scatter(
            y=surplus[i],
            mode='lines',
            name=f'Episode {i+1}',
            line=dict(color=f'rgba(0, 0, 255, {color_gradient[i]})', width=2)  # Increasingly dark blue
        ))

    # Update layout to make it interactive and clear
    fig.update_layout(
        title="Surplus Over Time for Each Episode First",
        xaxis_title="Time Step",
        yaxis_title="Surplus",
        hovermode="x unified",  # Shows all episode values at a given timestep
        showlegend=True
    )

    # Show the interactive plot
    fig.show()


start_idx = max(0, 2900)
plot_surplus(surplus_2, "Surplus Over Time for Each Episode Second", start_index=start_idx)


Simulations Progress:   0%|          | 0/100 [00:00<?, ?it/s]

Simulations Progress: 100%|██████████| 100/100 [00:00<00:00, 696.99it/s]


### Average training

In [125]:
import plotly.graph_objects as go
import numpy as np
# Function to plot the average agent wealth and market wealth across all simulations
def plot_average_wealth(surplus, markets, agents, title, start_index=0):
    # Calculate the average market and agent wealth across all simulations
    recent_markets = markets[start_index:]
    recent_agents = agents[start_index:]

    avg_market_wealth = np.mean(recent_markets, axis=0)
    avg_agent_wealth = np.mean(recent_agents, axis=0)

    # Create the figure
    fig = go.Figure()

    # Plot average market wealth
    fig.add_trace(
        go.Scatter(
            y=avg_market_wealth,
            mode='lines',
            name='Average Market Wealth',
            line=dict(color='blue', width=2)
        )
    )

    # Plot average agent wealth
    fig.add_trace(
        go.Scatter(
            y=avg_agent_wealth,
            mode='lines',
            name='Average Agent Wealth',
            line=dict(color='green', width=2)
        )
    )

    # Update layout
    fig.update_layout(
        title=title + str(len(recent_markets))+ ' Simulations',
        xaxis_title="Time Steps",
        yaxis_title="Wealth",
        hovermode="x unified",  # Improves interactivity
        showlegend=True,
        height=600
    )

    # Show the interactive plot
    fig.show()


start_idx = max(0, 2900)
plot_average_wealth(surplus_2,markets_2,agents_2, "Second Average Market and Agent Wealth Across ", start_idx)


### Sharp

In [126]:
import numpy as np
from tqdm import tqdm

# Initialize lists to store Sharpe ratios for each episode for both the agent and the market
agent_sharpe_ratios_2 = []
market_sharpe_ratios_2 = []

# Loop over each episode using the already populated `markets` and `agents` lists
for episode_index in tqdm(range(len(markets_2)), desc="Calculating Sharpe Ratios"):
    # Get the agent and market values for the current episode
    agent_values = agents_2[episode_index]
    market_values = markets_2[episode_index]
    
    # Calculate returns for agent portfolio and market for each timestep in this episode
    agent_returns = np.diff(agent_values) / agent_values[:-1]
    market_returns = np.diff(market_values) / market_values[:-1]
    
    # Calculate the Sharpe ratio for the agent's portfolio in this episode
    agent_sharpe_ratio = agent_returns.mean() / agent_returns.std() if agent_returns.std() != 0 else 0
    agent_sharpe_ratios.append(agent_sharpe_ratio)
    
    # Calculate the Sharpe ratio for the market in this episode
    market_sharpe_ratio = market_returns.mean() / market_returns.std() if market_returns.std() != 0 else 0
    market_sharpe_ratios.append(market_sharpe_ratio)

# Calculate the average Sharpe ratios across all episodes for both agent and market
average_agent_sharpe_ratio = np.mean(agent_sharpe_ratios)
average_market_sharpe_ratio = np.mean(market_sharpe_ratios)

# Print the comparison
print(f"Average Sharpe Ratio for Agent across all simulations: {average_agent_sharpe_ratio:.4f}")
print(f"Average Sharpe Ratio for Market across all simulations: {average_market_sharpe_ratio:.4f}")


Calculating Sharpe Ratios:   0%|          | 0/3000 [00:00<?, ?it/s]

Calculating Sharpe Ratios: 100%|██████████| 3000/3000 [00:00<00:00, 3531.63it/s]

Average Sharpe Ratio for Agent across all simulations: 0.0519
Average Sharpe Ratio for Market across all simulations: 0.0782





# Test

In [3]:
from data_scripts import generate_data as gd
from data_scripts import plotting as pl
import datetime

tickers = ['MSFT', 'TGT', 'QCOM', 'MU', 'CAT']

# Load simulation data for the selected stocks
stock_data_dict = gd.load_simulation_data(tickers)

# Get the combined training simulation data for the selected stocks
combined_train_data = gd.get_combined_simulation(stock_data_dict, simulation_index=0, set_type='train')


# Assuming combined_train_data is a NumPy array or a DataFrame
num_days_train, num_ep_train = data = stock_data_dict[tickers[0]]['train'].shape
num_days_test, num_ep_test = data = stock_data_dict[tickers[0]]['test'].shape

def generate_time():
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    return f"{timestamp}"

In [101]:
#raise Exception("Skip this cell")

import random
import numpy as np
from trading_envs.trading_env import TradingEnv
from models.ddpg_agent import Agent
from tqdm import tqdm

# Test on new test data
reward_function = 'diff_sharpe_reward'
test_simulations = num_ep_test
test_days = num_days_test
test_simulation_data = gd.get_combined_simulation(stock_data_dict, simulation_index=0, set_type='test')
env_test = TradingEnv(stock_data=test_simulation_data, reward_function=reward_function)


agent = Agent(alpha=0.0001, beta=0.001, input_dims=[env_test.observation_space.shape[0]], 
                tau=0.001, env=env_test, batch_size=64, layer1_size=400, layer2_size=300, 
                n_actions=env_test.action_space.shape[0])

# Load the trained model # 9904_states_observed_20241029_232625_diff_sharpe_reward_value

agent.load_models(suffix=f'9904_20241029_232625_diff_sharpe_reward')  # Load the most recent saved model

actions_columns = ['Episode', 'TimeStep'] + [f'Action_{i}' for i in range(env_test.action_space.shape[0])]

# DataFrames to store test results
test_wealth_df = pd.DataFrame(columns=['Episode', 'TimeStep', 'Simulation', 'Wealth'])
test_actions_df = pd.DataFrame(columns=actions_columns)
test_rewards_df = pd.DataFrame(columns=['Episode', 'TimeStep', 'Reward'])
states_columns = ['Episode', 'TimeStep'] + [f'State_{i}' for i in range(env_test.observation_space.shape[0])]
test_states_df = pd.DataFrame(columns=states_columns)

# Training loop only one time
for episode in tqdm(range(test_simulations), desc="Testing Progress"):
    episode_wealth = []
    episode_actions = []
    episode_states = []
    episode_rewards = []

    # Reset environment and variables
    test_simulation_data = gd.get_combined_simulation(stock_data_dict, simulation_index=episode, set_type='test')
    # Set the new environment with this training data
    env = TradingEnv(stock_data=test_simulation_data, reward_function=reward_function)

    state, _ = env.reset()
    done = False
    score = 0
    time_step = 0

    while not done:
        action = agent.choose_action(state)
        state_, reward, done, _, _ = env.step(action)
        agent.remember(state, action, reward, state_, done)
        agent.learn()
        score += reward

        # Collect data
        episode_wealth.append({
            'Episode': episode, 
            'TimeStep': time_step, 
            'Simulation': episode,
            'Wealth': env.get_portfolio_value()
        })

        state_record = {'Episode': episode, 'TimeStep': time_step}
        state_record.update({f'State_{i}': s for i, s in enumerate(state)})
        episode_states.append(state_record)

        state = state_
        time_step += 1

    # Append episode data to DataFrames
    test_wealth_df = pd.concat([test_wealth_df, pd.DataFrame(episode_wealth)], ignore_index=True)
    test_states_df = pd.concat([test_states_df, pd.DataFrame(episode_states)], ignore_index=True)



time = generate_time()
time_first = time

# Save with unique filename
test_wealth_df.to_csv(f'data_save/wealth_over_time_test_{time}_{reward_function}.csv', index=False)
test_states_df.to_csv(f'data_save/states_observed_test_{time}_{reward_function}.csv', index=False)


save_num += 1




... loading checkpoint ...
... loading checkpoint ...
... loading checkpoint ...
... loading checkpoint ...


Testing Progress: 100%|██████████| 100/100 [35:29<00:00, 21.29s/it]


In [108]:
from tqdm import tqdm
import numpy as np
import pandas as pd

# Initialize lists to store surplus, market, and agent values for each simulation
surplus = []
markets = []
agents = []

# Load CSV data once
agent_wealth = pd.read_csv('data_save/wealth_over_time_test_20241030_134110_diff_sharpe_reward.csv')
states = pd.read_csv('data_save/states_observed_test_20241030_134110_diff_sharpe_reward.csv')

#agent_wealth = pd.read_csv(f'data_save/wealth_over_time_test_{time}_{reward_function}.csv')
#states = pd.read_csv(f'data_save/states_observed_test_{time}_{reward_function}.csv')

# Loop over each simulation index
for i in tqdm(range(agent_wealth['Episode'].max()), desc="Simulations Progress"):
    # Filter agent wealth data for the current episode
    agent_wealth_e = agent_wealth[agent_wealth['Episode'] == i].set_index('TimeStep')
    prices = states[states['Episode'] == i].set_index('TimeStep').filter(like='State_').values[:, :6]

    # Preallocate arrays for each simulation's surplus
    portfolio_surplus = np.empty(len(prices))
    market = np.empty(len(prices))
    agent = np.empty(len(prices))

    # Iterate over the time steps
    for n in range(len(prices)):
        # Retrieve agent wealth data for the current timestep
        agent_wealth_day = agent_wealth_e.loc[n, 'Wealth']
        
        # Ensure it's a scalar value
        if isinstance(agent_wealth_day, pd.Series):
            agent_wealth_day = agent_wealth_day.iloc[0]

        # Calculate the current market value
        market_price_day = prices[n]

        # Distribute 1000 equally among the 5 stocks and cash with prices from the first day
        dist_vec = 1000 / len(market_price_day) / prices[0]
        market_wealth_day = np.sum(market_price_day * dist_vec)

        # Ensure market_wealth_day is a scalar value
        if isinstance(market_wealth_day, np.ndarray):
            market_wealth_day = market_wealth_day.item()

        # Calculate wealth difference (agent wealth - calculated wealth)
        wealth_diff = agent_wealth_day - market_wealth_day

        # Store the wealth difference in the preallocated arrays
        portfolio_surplus[n] = np.round(wealth_diff, 2)
        market[n] = np.round(market_wealth_day, 2)
        agent[n] = np.round(agent_wealth_day, 2)

    # Append the results for this simulation to the overall lists
    surplus.append(portfolio_surplus)
    markets.append(market)
    agents.append(agent)


Simulations Progress: 100%|██████████| 99/99 [00:08<00:00, 12.00it/s]


In [110]:
import plotly.graph_objects as go
import numpy as np
# Function to plot the average agent wealth and market wealth across all simulations
def plot_average_wealth(surplus, markets, agents, title, start_index=0):
    # Calculate the average market and agent wealth across all simulations
    recent_markets = markets[start_index:]
    recent_agents = agents[start_index:]

    avg_market_wealth = np.mean(recent_markets, axis=0)
    avg_agent_wealth = np.mean(recent_agents, axis=0)

    # Create the figure
    fig = go.Figure()

    # Plot average market wealth
    fig.add_trace(
        go.Scatter(
            y=avg_market_wealth,
            mode='lines',
            name='Average Market Wealth',
            line=dict(color='blue', width=2)
        )
    )

    # Plot average agent wealth
    fig.add_trace(
        go.Scatter(
            y=avg_agent_wealth,
            mode='lines',
            name='Average Agent Wealth',
            line=dict(color='green', width=2)
        )
    )

    # Update layout
    fig.update_layout(
        title=title + str(len(recent_markets))+ ' Simulations',
        xaxis_title="Time Steps",
        yaxis_title="Wealth",
        hovermode="x unified",  # Improves interactivity
        showlegend=True,
        height=600
    )

    # Show the interactive plot
    fig.show()


start_idx = max(0, 0)
plot_average_wealth(surplus,markets,agents, "Average Market Second test and Agent Wealth Across ", start_idx)


In [109]:
import numpy as np
from tqdm import tqdm

# Initialize lists to store Sharpe ratios for each episode for both the agent and the market
agent_sharpe_ratios = []
market_sharpe_ratios = []

# Loop over each episode using the already populated `markets` and `agents` lists
for episode_index in tqdm(range(len(markets)), desc="Calculating Sharpe Ratios"):
    # Get the agent and market values for the current episode
    agent_values = agents[episode_index]
    market_values = markets[episode_index]
    
    # Calculate returns for agent portfolio and market for each timestep in this episode
    agent_returns = np.diff(agent_values) / agent_values[:-1]
    market_returns = np.diff(market_values) / market_values[:-1]
    
    # Calculate the Sharpe ratio for the agent's portfolio in this episode
    agent_sharpe_ratio = agent_returns.mean() / agent_returns.std() if agent_returns.std() != 0 else 0
    agent_sharpe_ratios.append(agent_sharpe_ratio)
    
    # Calculate the Sharpe ratio for the market in this episode
    market_sharpe_ratio = market_returns.mean() / market_returns.std() if market_returns.std() != 0 else 0
    market_sharpe_ratios.append(market_sharpe_ratio)

# Calculate the average Sharpe ratios across all episodes for both agent and market
average_agent_sharpe_ratio = np.mean(agent_sharpe_ratios)
average_market_sharpe_ratio = np.mean(market_sharpe_ratios)

# Print the comparison
print(f"Testing 1 Average Sharpe Ratio for Agent across all simulations: {average_agent_sharpe_ratio:.4f}")
print(f"Testing 1 Average Sharpe Ratio for Market across all simulations: {average_market_sharpe_ratio:.4f}")


Calculating Sharpe Ratios: 100%|██████████| 99/99 [00:00<00:00, 9903.32it/s]

Testing 1 Average Sharpe Ratio for Agent across all simulations: 0.0388
Testing 1 Average Sharpe Ratio for Market across all simulations: 0.0778



