In [1]:
import numpy as np
import polars as pl
from pathlib import Path
from envs.stock_trading_env_using_tensor import StockTradingEnv
import random
import torch
from ppo_agent import Agent
from gymnasium.vector import SyncVectorEnv
from utils import (
    load_data,
    add_past_hours,
    train_test_split,
    create_torch_array,
    make_env,
)

In [9]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
TICKERS = "SBIN.NS"
SEED = 1337
NUM_ENVS = 2
TRAINED_MODEL_DIR = Path("trained_models")
TENSORBOARD_LOG_DIR = Path("tensorboard_log")
MODEL_SAVE_FILE = TRAINED_MODEL_DIR / "clean_rl_agent_ppo.pt"

In [10]:
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [11]:
df = load_data()
df = add_past_hours(df)
df = df.with_columns(pl.lit(0.0).alias("Buy/Sold/Hold"))
train_df, trade_df = train_test_split(df)

assert train_df.columns == trade_df.columns

trade_arrays = create_torch_array(trade_df, device=DEVICE)
trade_envs = SyncVectorEnv(
    [
        make_env(StockTradingEnv, trade_arrays, TICKERS)
        for _ in range(NUM_ENVS)
    ]
)
trade_agent = Agent(trade_envs).to(DEVICE)
trade_agent.load_state_dict(torch.load(MODEL_SAVE_FILE, map_location=DEVICE))
trade_agent.eval()

Agent(
  (critic): Sequential(
    (0): Linear(in_features=17, out_features=64, bias=True)
    (1): Tanh()
    (2): Linear(in_features=64, out_features=64, bias=True)
    (3): Tanh()
    (4): Linear(in_features=64, out_features=1, bias=True)
  )
  (actor): Sequential(
    (0): Linear(in_features=17, out_features=64, bias=True)
    (1): Tanh()
    (2): Linear(in_features=64, out_features=64, bias=True)
    (3): Tanh()
    (4): Linear(in_features=64, out_features=3, bias=True)
  )
)

In [12]:
obs, info = trade_envs.reset(seed=SEED)
obs

tensor([[10000.0000,   591.9500,   595.3000,   593.0000,   586.6500,   585.4500,
           584.9000,   582.9000,   582.8000,   579.7500,   577.9500,   577.5000,
           572.8500,   573.1500,   571.7500,   572.3000,     0.0000],
        [10000.0000,   591.9500,   595.3000,   593.0000,   586.6500,   585.4500,
           584.9000,   582.9000,   582.8000,   579.7500,   577.9500,   577.5000,
           572.8500,   573.1500,   571.7500,   572.3000,     0.0000]],
       device='cuda:0')

In [17]:
new_info = {}
for k, v in info.items():
    if k.startswith("_"):
        continue
    new_info[k] = v[0]
new_info

{'index': 0,
 'close_price': tensor(591.9500, device='cuda:0'),
 'available_amount': tensor(10000., device='cuda:0'),
 'shares_holdings': tensor(0., device='cuda:0'),
 'cummulative_profit_loss': 0,
 'good_buys': 0,
 'good_sells': 0,
 'good_holds': 0,
 'bad_buys': 0,
 'bad_sells': 0,
 'bad_holds': 0,
 'reward': 0.0,
 'portfolio_value': tensor(10000., device='cuda:0')}

In [22]:
(obs[0][1] * 5 + 20) / 5

tensor(595.9500, device='cuda:0')

In [23]:
10 # shares
50 # share price



500

In [24]:
def combine_avg_buy_prices(previous_avg_buy_price, current_avg_buy_price, previous_shares, current_shares):
    # Step 2: Check for Initial Conditions
    if previous_avg_buy_price is None and current_avg_buy_price is None:
        return None, 0
    elif previous_avg_buy_price is None:
        return current_avg_buy_price, current_shares
    elif current_avg_buy_price is None:
        return previous_avg_buy_price, previous_shares
    
    # Step 3: Weighted Combination of Previous and Current Average Buy Prices
    total_cost_previous = previous_avg_buy_price * previous_shares
    total_cost_current = current_avg_buy_price * current_shares
    total_shares = previous_shares + current_shares  # Step 4: Update Number of Shares
    
    combined_avg_buy_price = (total_cost_previous + total_cost_current) / total_shares
    
    return combined_avg_buy_price, total_shares

# Test the function
prev_price = 50  # Example previous average buy price
curr_price = 60  # Example current average buy price
prev_shares = 10  # Number of shares bought at previous average buy price
curr_shares = 5  # Number of shares bought at current average buy price

new_avg_price, new_total_shares = combine_avg_buy_prices(prev_price, curr_price, prev_shares, curr_shares)
print("New Average Buy Price:", new_avg_price)
print("New Total Shares:", new_total_shares)

New Average Buy Price: 53.333333333333336
New Total Shares: 15


In [29]:
a = [10,10,10,10,10]
a.index(0)

TypeError: index expected at least 1 argument, got 0

In [31]:
a = [10,10,10,10,10]
b = a[:2]
a = a[2:]

In [33]:
b

[10, 10]