In [None]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np

class MindfulnessBanditEnv(gym.Env):
    """
    Gymnasium environment for recommending mindfulness exercises using a contextual bandit approach.
    Each state represents user features, and each action corresponds to recommending a specific exercise.
    """
    metadata = {'render_modes': ['human', 'rgb_array'], 'render_fps': 30}

    def __init__(self, num_exercises, num_features):
        super(MindfulnessBanditEnv, self).__init__()
        self.num_exercises = num_exercises  # Number of different exercises available
        self.num_features = num_features  # Number of features in each state vector

        # Define action space and observation space
        self.action_space = spaces.Discrete(num_exercises)
        self.observation_space = spaces.Box(low=-1.0, high=1.0, shape=(num_features,), dtype=np.float32)

        # Random weights for simplicity, representing the effectiveness of each exercise per feature
        self.feature_weights = np.random.rand(num_exercises, num_features)
        self.state = None

    def step(self, action):
        assert self.action_space.contains(action), "Invalid Action"
        reward = np.dot(self.state, self.feature_weights[action])  # Reward is a dot product of state and action weights
        done = True  # In bandit problems, each decision is one episode
        return self.state, reward, done, {}

    def reset(self):
        # Generate a new state vector for each episode
        self.state = np.random.normal(0, 1, self.num_features)
        return self.state

    def render(self, mode='human'):
        if mode == 'rgb_array':
            return np.array([])  # Placeholder for actual rendering
        elif mode == 'human':
            print(f"State: {self.state}")
            return None

    def close(self):
        pass
