In [1]:
from pandas import read_csv

In [2]:
initial_cash = 100000
buy_min_percent = 0.05  # Assuming BUY_MIN% is 5%
buy_max_percent = 1.00  # Assuming BUY_MAX% is 100%
sell_min_percent = 0.05  # Assuming SELL_MIN% is 5%
sell_max_percent = 1.00  # Assuming SELL_MAX% is 100%
transaction_fee_percent = 0.001 # Assuming TRANSACTION_FEE% is 0.1%
transaction_session_limit = 5   # Assuming TRANSACTION_SESSION is 5
transaction_penalty = 100 # Assuming TRANSACTION_PENALTY is 100 USD
total_assets_loss_threshold = 10000  # Assuming TOTAL_ASSETS is 10,000 USD
cash_loss_threshold = -5000
win_condition_total_assets = 1000000

# Load the stock data
stock_df = read_csv('stock.csv')

# Display the first few rows and column names of the stock data
print("Stock Data Head:")
print(stock_df.head())
print("\nStock Data Columns:")
print(stock_df.columns)

Stock Data Head:
         Date        Open        High         Low       Close   Adj Close  \
0  2020-06-08  126.453156  129.904404  126.453156  129.780121  103.926979   
1  2020-06-09  127.648186  127.724663  125.258125  126.070747  100.956551   
2  2020-06-10  126.338432  126.414917  124.091782  124.158699   99.425400   
3  2020-06-11  120.487572  120.554497  112.657745  112.820267   90.345634   
4  2020-06-12  115.917786  117.705544  114.034416  116.548759   93.331390   

     Volume  
0   5617543  
1   5641706  
2   4952601  
3  12304726  
4   6506434  

Stock Data Columns:
Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')


In [3]:
# Define the reinforcement learning environment
class StockTradingEnv:
    def __init__(self, df, initial_cash, buy_min_percent, buy_max_percent,
                 sell_min_percent, sell_max_percent, transaction_fee_percent,
                 transaction_session_limit, transaction_penalty,
                 total_assets_loss_threshold, cash_loss_threshold,
                 win_condition_total_assets):
        self.df = df
        self.initial_cash = initial_cash
        self.cash = initial_cash
        self.shares = 0
        self.buy_min_percent = buy_min_percent
        self.buy_max_percent = buy_max_percent
        self.sell_min_percent = sell_min_percent
        self.sell_max_percent = sell_max_percent
        self.transaction_fee_percent = transaction_fee_percent
        self.transaction_session_limit = transaction_session_limit
        self.transaction_penalty = transaction_penalty
        self.total_assets_loss_threshold = total_assets_loss_threshold
        self.cash_loss_threshold = cash_loss_threshold
        self.win_condition_total_assets = win_condition_total_assets
        self.current_step = 0
        self.no_transaction_count = 0
        self.done = False
        self.reward = 0

    def reset(self):
        self.cash = self.initial_cash
        self.shares = 0
        self.current_step = 0
        self.no_transaction_count = 0
        self.done = False
        self.reward = 0
        return self._get_state()

    def _get_state(self):
        # State includes current cash, current shares, and current stock price (using 'Close' price)
        # Only get price if current_step is within bounds
        if self.current_step < len(self.df):
            current_price = self.df.iloc[self.current_step]['Close']
        else:
            current_price = 0  # Or handle this case appropriately, e.g., by indicating end of episode
        return [self.cash, self.shares, current_price]

    def _calculate_total_assets(self):
        if self.current_step < len(self.df):
            current_price = self.df.iloc[self.current_step]['Close']
        else:
            current_price = self.df.iloc[len(self.df)-1]['Close'] # Use last known price if episode ended
        return self.cash + (self.shares * current_price)

    def step(self, action):
        # Check if the episode is already done or if current_step is out of bounds
        if self.done or self.current_step >= len(self.df):
            self.done = True
            return self._get_state(), self.reward, self.done, {}

        current_price = self.df.iloc[self.current_step]['Close']

        # Apply no-transaction penalty
        if action == 0: # Hold
            self.no_transaction_count += 1
            if self.no_transaction_count >= self.transaction_session_limit:
                self.cash -= self.transaction_penalty
                self.no_transaction_count = 0 # Reset count after applying penalty
            self.reward = -0.001 # Small penalty for holding to encourage action
        else:
            self.no_transaction_count = 0

        if action == 1:  # Buy
            buy_amount_min = self.cash * self.buy_min_percent
            buy_amount_max = self.cash * self.buy_max_percent
            buy_value = min(buy_amount_max, self.cash) # Try to buy up to BUY_MAX% or all available cash

            if buy_value >= buy_amount_min and self.cash > 0 and current_price > 0:
                shares_to_buy = int(buy_value / current_price)
                cost = shares_to_buy * current_price
                transaction_fee = cost * self.transaction_fee_percent
                if self.cash >= (cost + transaction_fee) and shares_to_buy > 0:
                    self.cash -= (cost + transaction_fee)
                    self.shares += shares_to_buy
                    self.reward = 0.1 # Small positive reward for buying
                else:
                    self.reward = -0.01 # Small penalty for failed transaction
            else:
                self.reward = -0.01 # Small penalty for not meeting buy_min_percent or no cash/invalid price

        elif action == 2:  # Sell
            sell_amount_min_shares = self.shares * self.sell_min_percent
            sell_amount_max_shares = self.shares * self.sell_max_percent
            shares_to_sell = min(int(sell_amount_max_shares), self.shares) # Try to sell up to SELL_MAX% or all held shares

            if shares_to_sell >= sell_amount_min_shares and self.shares > 0 and current_price > 0:
                revenue = shares_to_sell * current_price
                transaction_fee = revenue * self.transaction_fee_percent
                self.cash += (revenue - transaction_fee)
                self.shares -= shares_to_sell
                self.reward = 0.1 # Small positive reward for selling
            else:
                self.reward = -0.01 # Small penalty for not meeting sell_min_percent or no shares/invalid price

        # Increment current step AFTER processing action for the current step's data
        self.current_step += 1

        # Check for termination conditions *after* incrementing step
        if self.current_step >= len(self.df):
            self.done = True
            # Reward for end of episode based on final total assets
            total_assets = self._calculate_total_assets() # Calculate with last known price
            self.reward += (total_assets - self.initial_cash) / self.initial_cash * 10 # Scale reward

        # Check win/loss conditions
        if not self.done: # Only check if not already done by end of data
            total_assets = self._calculate_total_assets()
            if total_assets >= self.win_condition_total_assets:
                self.reward = 100 # Large positive reward for winning
                self.done = True
            elif total_assets < self.total_assets_loss_threshold or self.cash < self.cash_loss_threshold:
                self.reward = -100 # Large negative reward for losing
                self.done = True

        next_state = self._get_state()
        return next_state, self.reward, self.done, {}

In [4]:
# Example Usage (Training Loop Outline)
env = StockTradingEnv(stock_df, initial_cash, buy_min_percent, buy_max_percent,
                      sell_min_percent, sell_max_percent, transaction_fee_percent,
                      transaction_session_limit, transaction_penalty,
                      total_assets_loss_threshold, cash_loss_threshold,
                      win_condition_total_assets)

# This is a very basic outline. A full reinforcement learning implementation
# would involve an agent (e.g., DQN, PPO) interacting with this environment.

# Example of a simplified agent (very basic, not a true RL agent)
class SimpleAgent:
    def __init__(self, actions_n):
        self.actions_n = actions_n

    def choose_action(self, state):
        # In a real RL agent, this would be based on a policy (e.g., neural network output)
        # For this simple example, we'll just pick a random action
        import random
        return random.randint(0, self.actions_n - 1)

# Training loop
num_episodes = 1000
agent = SimpleAgent(actions_n=3) # 3 actions: Hold, Buy, Sell

In [5]:
print("\n--- Starting Training Simulation ---")
for episode in range(num_episodes):
    state = env.reset()
    episode_reward = 0
    done = False
    step_count = 0
    while not done:
        action = agent.choose_action(state)
        next_state, reward, done, _ = env.step(action)
        episode_reward += reward
        state = next_state
        step_count += 1
        if done:
            final_total_assets = env._calculate_total_assets()
            print(f"Episode {episode + 1}: Final Total Assets = {final_total_assets:.2f} USD, Final Cash = {env.cash:.2f} USD, Final Shares = {env.shares}, Steps = {step_count}, Episode Reward = {episode_reward:.2f}")

print("\n--- Simulation Complete ---")


--- Starting Training Simulation ---
Episode 1: Final Total Assets = 69188.81 USD, Final Cash = 69188.81 USD, Final Shares = 0, Steps = 1256, Episode Reward = 15.83
Episode 2: Final Total Assets = 90659.58 USD, Final Cash = 90659.58 USD, Final Shares = 0, Steps = 1256, Episode Reward = 10.74
Episode 3: Final Total Assets = 91984.70 USD, Final Cash = 91984.70 USD, Final Shares = 0, Steps = 1256, Episode Reward = 9.67
Episode 4: Final Total Assets = 91433.37 USD, Final Cash = 167.26 USD, Final Shares = 342, Steps = 1256, Episode Reward = 9.71
Episode 5: Final Total Assets = 96872.80 USD, Final Cash = 2.62 USD, Final Shares = 363, Steps = 1256, Episode Reward = 20.26
Episode 6: Final Total Assets = 98492.92 USD, Final Cash = 21.59 USD, Final Shares = 369, Steps = 1256, Episode Reward = 15.17
Episode 7: Final Total Assets = 88682.37 USD, Final Cash = 84.85 USD, Final Shares = 332, Steps = 1256, Episode Reward = 14.87
Episode 8: Final Total Assets = 95125.16 USD, Final Cash = 123.00 USD, F