# DEMO 3

Decision Trees applied to Stock Market Data


In [1]:
import sys
import os

# Manually set the path relative to the py file's location that you want to import
func_lib_path = os.path.abspath(os.path.join(os.getcwd(), '../'))# Add the path to sys.path
sys.path.append(func_lib_path)

# Now you can import func_lib
import func_lib
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier

In [None]:
import gymnasium as gym
import numpy as np
import pandas as pd
from stable_baselines3 import DQN
from gymnasium import spaces

In [None]:
# Generate the training dataset
np.random.seed(42)
days = 100
milk_consumption = np.random.uniform(0, 10, size=days)  # Random milk consumption between 0 and 10 ounces
discomfort = (milk_consumption > 5).astype(int)  # Discomfort turns to 1 if consumption > 5 ounces

# Create a DataFrame for the training dataset
data = pd.DataFrame({
    'day': range(1, days + 1),
    'milk_consumption': milk_consumption,
    'discomfort': discomfort
})

# Generate the testing dataset
np.random.seed(24)
days_test = 50
milk_consumption_test = np.random.uniform(0, 10, size=days_test)  # Random milk consumption between 0 and 10 ounces
discomfort_test = (milk_consumption_test > 5).astype(int)  # Discomfort turns to 1 if consumption > 5 ounces



# Create a DataFrame for the testing dataset
data_test = pd.DataFrame({
    'day': range(1, days_test + 1),
    'milk_consumption': milk_consumption_test,
    'discomfort': discomfort_test
})

In [None]:

# Define the custom environment for milk consumption
class MilkConsumptionEnv(gym.Env):
    def __init__(self, data):
        super(MilkConsumptionEnv, self).__init__()
        self.data = data
        self.current_day = 0
        self.action_space = spaces.Discrete(2)  # 0: consume less milk, 1: consume more milk
        self.observation_space = spaces.Box(low=0, high=10, shape=(1,), dtype=np.float32)
        
    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_day = 0
        obs = np.array([self.data.iloc[self.current_day]['milk_consumption']], dtype=np.float32)
        return obs, {}
    
    def step(self, action):
        self.current_day += 1
        
        if action == 0:  # consume less milk
            next_obs = max(0, self.data.iloc[self.current_day]['milk_consumption'] - 1)
        else:  # consume more milk
            next_obs = min(10, self.data.iloc[self.current_day]['milk_consumption'] + 1)
        
        next_obs = np.array([next_obs], dtype=np.float32)
        done = self.current_day >= len(self.data) - 1
        
        # Reward is -1 if discomfort (1), 0 otherwiseS
        reward = -1 if self.data.iloc[self.current_day]['discomfort'] == 1 else 0
        
        return next_obs, reward, done, False, {}
    
    def render(self):
        pass
    
    def close(self):
        pass

In [None]:
# Initialize the environment with the training dataset
env = MilkConsumptionEnv(data)

# Create the agent using DQN
agent = DQN('MlpPolicy', env, verbose=0)

# Train the agent
agent.learn(total_timesteps=10000)


In [None]:

# Initialize the testing environment with the new dataset
env_test = MilkConsumptionEnv(data_test)

# Variables to keep track of correct predictions
correct_predictions = 0
total_predictions = 0

# Test the trained agent on the new dataset
obs, _ = env_test.reset()
done = False
while not done:
    action, _ = agent.predict(obs)
    obs, reward, done, _, _ = env_test.step(action)
    
    # The agent's action is based on whether it predicts discomfort or not
    predicted_discomfort = 1 if obs[0] > 5 else 0
    
    # Compare prediction with actual discomfort
    actual_discomfort = env_test.data.iloc[env_test.current_day - 1]['discomfort']
    if predicted_discomfort == actual_discomfort:
        correct_predictions += 1
    total_predictions += 1

# Calculate prediction accuracy
accuracy = correct_predictions / total_predictions * 100
print(f"Prediction accuracy on the new dataset: {accuracy:.2f}%")

