In [1]:
import numpy as np
import gymnasium as gym
from quantrl_lab.custom_envs.stock.env_single_stock import StockTradingEnv
from quantrl_lab.custom_envs.stock.env_config import EnvConfig
from quantrl_lab.custom_envs.stock.strategies.actions.types.basic_market_actions import BasicActionStrategy
from quantrl_lab.custom_envs.stock.strategies.rewards.components import (
    PortfolioValueChangeReward,
    InvalidActionPenalty,
    TrendFollowingReward,
    HoldPenalty,
    BaseRewardStrategy
)
from quantrl_lab.custom_envs.stock.strategies.rewards.weighted_composite_reward import WeightedCompositeReward

In [2]:
portfolio_reward = PortfolioValueChangeReward()
invalid_penalty = InvalidActionPenalty(penalty=-1.0)
trend_reward = TrendFollowingReward()
hold_penalty = HoldPenalty(penalty=-0.005)

reward_components = [
    portfolio_reward,
    invalid_penalty,
    trend_reward,
    hold_penalty
]


component_weights = [
    1.0,  
    1.0,  
    0.5,  
    0.1   
]

composite_reward_strategy = WeightedCompositeReward(
    strategies=reward_components,
    weights=component_weights
)

In [7]:
def run_simple_test():

    print("=== 1. Setting up configuration and data ===")


    PRICE_COLUMN_INDEX = 3
    
    # Create dummy data for the environment
    data_size = 100
    my_data = np.random.rand(data_size, 5).astype(np.float32)
    my_data[:, PRICE_COLUMN_INDEX] = 50 + np.arange(data_size) * 0.2 + np.random.randn(data_size) * 0.5
    
    # Create the Pydantic config object
    env_configuration = EnvConfig(
        price_column_index=PRICE_COLUMN_INDEX,
        window_size=10,
        initial_balance=10000.0,
        transaction_cost_pct=0.001,
        slippage=0.0005            
    )

    # Instantiate the desired action strategy
    basic_action_strategy = BasicActionStrategy()

    print("Configuration, data, and strategy are ready.\n")

    # === 2. Initialize the Environment ===
    print("--- 2. Initializing the environment ---")
    try:
        env = StockTradingEnv(
            data=my_data,
            config=env_configuration,
            action_strategy=basic_action_strategy,
            reward_strategy=composite_reward_strategy
        )
        print("✅ Environment created successfully with FullActionStrategy!")
        print(f"Action Space: {env.action_space}")
        print(f"Observation Space Shape: {env.observation_space.shape}\n")
    except Exception as e:
        print(f"❌ Failed to create environment: {e}")
        return

    # === Run a few steps with random actions ===
    print("--- 3. Running a few steps with random actions ---")
    observation, info = env.reset()
    
    print("\n>>> Initial State after reset <<<")
    env.render(mode="human")

    num_random_steps = 50
    for i in range(num_random_steps):
        random_action = env.action_space.sample()

        print(f"\n{'='*40}")
        print(f"Step {i+1}/{num_random_steps} - Taking a random action")
        print(f"Random Action Array: {random_action}")
        print(f"{'='*40}")

        # Perform the step
        observation, reward, terminated, truncated, info = env.step(random_action)
        
        # Render the new state
        print("\n>>> State after action <<<")
        env.render(mode="human")

        # Print the key results from the info dictionary
        print(f"Reward received: {reward:.4f}")
        print(f"Decoded Action Info: {info['action_decoded']}")
        
        if terminated or truncated:
            print("\nEpisode finished early.")
            break
            
    print("\n\n=== Test finished successfully! ===")
    env.close()


In [8]:
run_simple_test()

=== 1. Setting up configuration and data ===
Configuration, data, and strategy are ready.

--- 2. Initializing the environment ---
✅ Environment created successfully with FullActionStrategy!
Action Space: Box([0.  0.  0.9], [6.  1.  1.1], (3,), float32)
Observation Space Shape: (59,)

--- 3. Running a few steps with random actions ---

>>> Initial State after reset <<<
----------------------------------------
Step:         10/99
Current Price:          52.12
Balance:             10000.00
Shares Held:                0 (Free)
Total Shares:               0 (Free + Reserved)
Portfolio Val:       10000.00
----------------------------------------
Active Orders:
  Pending Limit:    0
  Stop Loss:        0
  Take Profit:      0
----------------------------------------

Step 1/50 - Taking a random action
Random Action Array: [0.18362254 0.88148165 1.015645  ]

>>> State after action <<<
----------------------------------------
Step:         11/99
Current Price:          51.25
Balance:          