Tabular Q-learning uses a table to store the Q-values for each state-action pair. This approach works well for small to medium-sized problems, where the number of states and actions is relatively small.

In [55]:
!pip install matplotlib
!pip install pymysql sqlalchemy
!pip install mysql-connector-python python-dotenv
!pip install sqlalchemy mysql-connector-python




In [56]:
import os
import matplotlib.pyplot as plt
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy import text


from dotenv import load_dotenv

class Config:
    def __init__(self):
        load_dotenv()
        self.tidb_host = os.getenv("TIDB_HOST", "gateway01.us-east-1.prod.aws.tidbcloud.com")
        self.tidb_port = int(os.getenv("TIDB_PORT", "4000"))
        self.tidb_user = os.getenv("TIDB_USER", "EcFsmzHzn16sz32.root")
        self.tidb_password = os.getenv("TIDB_PASSWORD", "4UTXzVBKxU10w2Z1")
       # self.tidb_db_name = os.getenv("TIDB_DB_NAME", "embracepath")
        self.tidb_db_name = "embracepath"
        self.ca_path = os.getenv("CA_PATH", "")

In [57]:
class LuTypeTable:
    def __init__(self, TypeID, TypeName, TypeNameVector, Description, DescriptionVector, create_by, create_dt, modified_by, modified_dt, active_flg):
        self.TypeID = TypeID
        self.TypeName = TypeName
        self.TypeNameVector = TypeNameVector
        self.Description = Description
        self.DescriptionVector = DescriptionVector
        self.create_by = create_by
        self.create_dt = create_dt
        self.modified_by = modified_by
        self.modified_dt = modified_dt
        self.active_flg = active_flg

    def to_dict(self):
        return {
            'TypeID': self.TypeID,
            'TypeName': self.TypeName,
            'TypeNameVector': self.TypeNameVector,
            'Description': self.Description,
            'DescriptionVector': self.DescriptionVector,
            'create_by': self.create_by,
            'create_dt': self.create_dt,
            'modified_by': self.modified_by,
            'modified_dt': self.modified_dt,
            'active_flg': self.active_flg
        }

In [58]:
from sqlalchemy import create_engine
import pandas as pd
import logging

class DatabaseService:
    def __init__(self, config):
        self.config = config
        logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
        self.engine = self.create_engine()

    def create_engine(self):
        # Construct the database URL
        database_url = f"mysql+mysqlconnector://{self.config.tidb_user}:{self.config.tidb_password}@{self.config.tidb_host}:{self.config.tidb_port}/{self.config.tidb_db_name}"
        if self.config.ca_path:
            connect_args = {
                'ssl_ca': self.config.ca_path,
                'ssl_verify_cert': True,
                'ssl_verify_identity': True
            }
            return create_engine(database_url, connect_args=connect_args)
        return create_engine(database_url)
    
    # def fetch_data_with_cursor(self, query: str) -> Dict:
    #     self.connect()
    #     try:
    #         cursor = self.connection.cursor()
    #         cursor.execute(query)
    #         result = cursor.fetchall()
    #         cursor.close()
    #         return {row[0]: dict(zip([col[0] for col in cursor.description[1:]], row[1:])) for row in result}
    #     except Error as e:
    #         print(f"Error: {e}")
    #         return {}
    #     finally:
    #         self.disconnect()

    def fetch_data(self, query: str, params=None):
        try:
            if params:
                print("Params Type:", {type(p) for p in params})
                print("Params Value:", params)
                print("with params")
                print("query is :", query)
                df = pd.read_sql(query, self.engine, params=params)
                return df
            else:
                print("without params")
                return pd.read_sql(query, self.engine)
        except Exception as e:
            logging.error("Failed to fetch data: %s", e)
            return pd.DataFrame()  # Return an empty DataFrame on failure
    
    def execute(self, query: str, params=None):
        try:
            print("Executing SQL Query:")
            print("Query:", query)
            print("Parameters:", params)

            with self.engine.begin() as connection:  # Transaction management
                result = connection.execute(query, params)
                affected_rows = result.rowcount
                print(f"Rows affected: {affected_rows}")
                return affected_rows
        except Exception as e:
            logging.error("Failed to execute query: %s", str(e))
            print("Error during SQL execution:", str(e))
            return None


    def get_reward(self, user_id: int, exercise_id: int, emotion_id: int) -> float:
        user_id=1 # will need to remove once data is in user_exercise_feedback
        exercise_id=2956 # willl need to remove once data is in user_exercise_feedback
        emotion_id = 3 # willl need to remove once data is in user_exercise_feedback
        print("in get_reward")
        query = """
        SELECT * FROM user_exercise_feedback
        WHERE user_id = %s AND exercise_id = %s AND emotion_id = %s
        """
        
        print("user_id is ", user_id)
        print("exercise_id is ", exercise_id)
        print("emotion_id is ", emotion_id)
        
       # feedback_df = self.fetch_data(query, params=(user_id, exercise_id, emotion_id))
        feedback_df = self.fetch_data(query, params=[(user_id, exercise_id, emotion_id)])

        print("feedback_df is ", feedback_df.head)
        if not feedback_df.empty:
            return feedback_df['feedback'].iloc[0] / 5.0
        else:
            return 0.0  # Return a default value when no feedback is available

    def fetch_num_exercises(self):
        query = "SELECT COUNT(*) FROM recommended_exercise WHERE active_flg = TRUE"
        df = self.fetch_data(query)
        if not df.empty:
            return df.iloc[0, 0]
        return 0

    def fetch_recommended_exercises(self):
        query = """
        SELECT recommended_exercise_id, exercise_id, initial_q_value
        FROM recommended_exercise
        WHERE active_flg = TRUE
        """
        return self.fetch_data(query)

    def get_emotion_vector_by_typename(self, typeName: str):
        query = """
        SELECT TypeID, TypeName, TypeNameVector, Description, DescriptionVector,
        create_by, create_dt, modified_by, modified_dt, active_flg
        FROM lu_emotion_type WHERE TypeName = %s
        """
        return self.fetch_data(query, params=(typeName,))
    
    def update_q_value(self, user_id, core_emotion_id, core_emotion, emotion_vector, action, q_value, modified_by):
        query = """
        UPDATE q_table
        SET q_value = %s, modified_by = %s, modified_dt = CURRENT_TIMESTAMP
        WHERE user_id = %s AND core_emotion_id = %s AND core_emotion = %s AND emotion_vector = %s AND action = %s
        """
        self.execute(query, (q_value, modified_by, user_id, core_emotion_id, core_emotion, emotion_vector, action))
        result = connection.execute(query)
        return self.execute(query, (q_table_id, user_id, core_emotion_id, core_emotion, emotion_vector, action, q_value, create_by))

    def insert_q_value(self, q_table_id, user_id, core_emotion_id, core_emotion, emotion_vector, action, q_value, create_by):
        query = """
        INSERT INTO q_table (
            q_table_id, user_id, core_emotion_id, core_emotion, emotion_vector, action, q_value, create_by, create_dt
        ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, CURRENT_TIMESTAMP)
        """
        params = (q_table_id, user_id, core_emotion_id, core_emotion, emotion_vector, action, q_value, create_by)

        try:
            # Use raw DB-API connection accessible via `connection.connection`
            with self.engine.raw_connection() as raw_conn:
                cursor = raw_conn.cursor()
                cursor.execute(query, params)
                raw_conn.commit()  # Commit the transaction manually
                affected_rows = cursor.rowcount
                cursor.close()
                return affected_rows
        except SQLAlchemyError as e:
            logging.error(f"Failed to execute query: {e}")
            return None

    def get_q_values(self):
        query = """
        SELECT user_id, core_emotion_id, core_emotion, emotion_vector, action, q_value
        FROM q_table
        WHERE active_flg = TRUE
        """
        return self.fetch_data(query)
    
    def fetch_exercise_data(self):
        query = "SELECT * FROM exercise"
        return self.fetch_data(query)
    
    def fetch_latest_emotional_state(self):
        query = "SELECT * FROM embracepath.user_emotional_state ORDER BY create_dt LIMIT 1"
        return self.fetch_data(query)
    

    # def fetch_exercises(self):
    #     query = """
    #     SELECT * FROM exercise
    #     """
    #     exercise_records = self.fetch_data(query)
    #     exercises = [
    #         Exercise(
    #             exercise_id=rec[0], exercise_name=rec[1], exercise_vector=rec[2], 
    #             exercise_location=rec[3], exercise_type=rec[4], exercise_description=rec[5], 
    #             description_vector=rec[6], exercise_parent_child_type_id=rec[7], 
    #             create_by=rec[8], create_dt=rec[9], modified_by=rec[10], 
    #             modified_dt=rec[11], active_flg=rec[12]
    #         )
    #         for rec in exercise_records
    #     ]
    #     return exercises
    
        
        
#     def get_exercises(cursor: MySQLCursor) -> List[Exercise]:
#         try:
#             tableName = 'exercise'
#             cursor.execute(f"SELECT * FROM {tableName}")
#             rows = cursor.fetchall()  # Retrieve all rows at once
#             exercises = []  # List to store Exercise objects

#             for row in rows:
#                 exercise = Exercise(
#                     exercise_id=row[0],
#                     exercise_name=row[1],
#                     exercise_vector=json.loads(row[2]),  # Converting JSON string back to list
#                     exercise_location=row[3],
#                     exercise_type=row[4],
#                     exercise_description=row[5],
#                     description_vector=json.loads(row[6]),  # Converting JSON string back to list
#                     exercise_parent_child_type_id=row[7],
#                     create_by=row[8],
#                     create_dt=row[9],
#                     modified_by=row[10],
#                     modified_dt=row[11],
#                     active_flg=row[12]
#                 )
#                 exercises.append(exercise)  # Add the Exercise object to the list

#             return exercises  # Return the list of Exercise objects
#         except mysql.connector.Error as e:
#             logging.error("Error fetching records: %s", e)
#             return []  # Return an empty list in case of an error

In [59]:
class EpsilonGreedyAgent:
    def __init__(self, epsilon, environment):
        self.epsilon = epsilon
        self.environment = environment
        self.exercises_df = environment._fetch_recommended_exercises()
        self.Q_values = np.zeros(len(self.exercises_df))  # Adjust size based on fetched data

    def select_action(self, state):
        if np.random.rand() < self.epsilon:
            # Explore: choose a random action from the available exercises
            action_idx = np.random.randint(len(self.exercises_df))
        else:
            # Exploit: choose the best known action
            action_idx = np.argmax(self.Q_values)
        action = self.exercises_df.iloc[action_idx]
        return (action['recommended_exercise_id'], action['exercise_id'], action['initial_q_value'])

    def update(self, action_idx, reward):
        # Incremental update to the Q-value for the chosen action
        self.Q_values[action_idx] += (reward - self.Q_values[action_idx]) / (self.environment.action_counts[action_idx] + 1)
        self.environment.action_counts[action_idx] += 1


In [66]:
class State:
    def __init__(self, core_emotion=None, user_id=None, emotion_vector=None, core_emotion_id=None, location_vector=None):
        self.core_emotion = core_emotion
        self.core_emotion_id = core_emotion_id
        self.user_id = user_id
        self.emotion_vector = emotion_vector
        self.location_vector = location_vector

In [70]:
import gymnasium as gym
from gymnasium.spaces import Discrete, Box
import numpy as np

class MindfulnessBanditEnv(gym.Env):
    def __init__(self, num_features:int, db_service:DatabaseService, num_actions:int):
        self.num_features = num_features
        self.db_service = db_service
        self.action_space = Discrete(num_actions)  # 10 possible actions (mindfulness exercises)
        self.observation_space = Box(low=0, high=1, shape=(num_features,), dtype=np.float32)
        self.state = None
        self.done = False
        self.Q_A = {}  # Q-value estimate for action selection
        self.Q_B = {}  # Q-value estimate for updating
        self.alpha = 0.1  # Learning rate
        self.gamma = 0.9  # Discount factor
        self.q_values_history = []  # To store Q-values for plotting
        self.actions = self.get_exercises()
        self.update_action_space()  # Update action space based on retrieved exercises
    
    def reset(self):
        core_emotion = 1 # Happiness
        user_id = 1
        emotion_vector_df = self.db_service.get_emotion_vector_by_typename("Happy")

        # Create an instance of LuTypeTable from the first row of the DataFrame
        emotion_vector = LuTypeTable(
            TypeID=emotion_vector_df.iloc[0]['TypeID'],
            TypeName=emotion_vector_df.iloc[0]['TypeName'],
            TypeNameVector=emotion_vector_df.iloc[0]['TypeNameVector'],
            Description=emotion_vector_df.iloc[0]['Description'],
            DescriptionVector=emotion_vector_df.iloc[0]['DescriptionVector'],
            create_by=emotion_vector_df.iloc[0]['create_by'],
            create_dt=emotion_vector_df.iloc[0]['create_dt'],
            modified_by=emotion_vector_df.iloc[0]['modified_by'],
            modified_dt=emotion_vector_df.iloc[0]['modified_dt'],
            active_flg=emotion_vector_df.iloc[0]['active_flg']
        )
        #print("emotion_vector is", emotion_vector.DescriptionVector)
      #  print("before self.state")
        
#         emotion_vector_list = [float(x) for x in emotion_vector.TypeNameVector.strip('[]').split(',')]

#         # Print each element of the list
#         for element in emotion_vector_list:
#             print(element)
        

        
        
        st = State()
        st.core_emotion = emotion_vector.TypeName
        st.user_id = user_id
        st.emotion_vector = emotion_vector.TypeNameVector
        st.core_emotion_id = emotion_vector.TypeID
        st.location_vector = [0] * 384
        self.state = st
        
        print("state variables\n")
        print("core_emotion is", user_id)
       # print("TypeNameVector is", emotion_vector.TypeNameVector)
        print("TypeID is", emotion_vector.TypeID)

        
        #self.state = State(core_emotion=emotion_vector.TypeName, user_id=user_id, emotion_vector=[float(x) for x in emotion_vector.TypeNameVector.split(',')], core_emotion_id=emotion_vector.TypeID)

        #self.state = State(core_emotion=emotion_vector.TypeName, user_id=user_id, emotion_vector=emotion_vector.TypeNameVector, core_emotion_id=emotion_vector.TypeID)
        observation = np.array([self.state.core_emotion, self.state.user_id, *self.state.emotion_vector, self.state.core_emotion_id])
       # print("after self.state")
        return observation
    
    
    def save_q_value(self, state, action, reward):
        state_tuple = (state.core_emotion, state.user_id, tuple(state.emotion_vector), state.core_emotion_id)
        q_dict = self.Q_B if action % 2 == 0 else self.Q_A
        q_value = q_dict.get((state_tuple, action), 0)
        q_dict[(state_tuple, action)] = q_value + reward
        # Record the maximum Q-value for this state for plotting purposes
        self.q_values_history.append(max(q_dict.values()))
    # def save_q_value(self, state, action, reward):
    #     state_tuple = (state.core_emotion, state.user_id, tuple(state.emotion_vector), state.core_emotion_id)
    #     q_dict = self.Q_B if action % 2 == 0 else self.Q_A
    #     q_value = q_dict.get((state_tuple, action), 0)
    #     q_dict[(state_tuple, action)] = q_value + reward  # Example update, adjust your learning rule

        # Save the Q-value to the database
       # self.db_service.insert_q_value(1, state.user_id, state.core_emotion_id, state.core_emotion, state.emotion_vector, action, q_value, "RL_Model")
    
    def step(self, action):
        # Simulate the delay between the selected mindfulness exercise and the user's rating
        rating = self.get_user_rating(action) # this will be switched out by the feedback table. TODO!!!!!
        reward = self.db_service.get_reward(self.state, action, rating)
        print("reward is ", reward)
        # Update Q-values using Double Q-learning
        self.update_Q_values(self.state, action, reward)
        self.done = True
        print("Q Values are updated")
        self.save_q_value(self.state, action, reward) # this throws the bugs
        return self.state, reward, self.done, {}    

    def get_user_rating(self, action):
        # Simulate the user's rating (1-5)
        rating = np.random.randint(1, 6)
        return rating
    
    def update_Q_values(self, state, action, reward):
        print("in update_Q_values")
        # Get Q-values for state-action pair
        q_value_A = self.Q_A.get((state, action), 0)
       # q_value_B = self.get_second_q_value(state, action) # i will need to add this back, TODO!!!!
        q_value_B = 0.1

        # Update Q-value estimate for action selection (Q_A)
        self.Q_A[(state, action)] = reward

        # Update Q-value estimate for updating (Q_B)
        self.Q_B[(state, action)] = reward

    def get_second_q_value(self, state, action):
        # Get second Q-value using vector search query
        query = """
        SELECT q_value
        FROM q_table
        WHERE state = %s AND action = %s
        """
        result = self.db_service.fetch_data(query, (state, action))
        if result.empty:
            return 0  # Return a default Q-value if none is found
        return result.iloc[0, 0]
    
    # def load_exercises(self):
    #     # Get a cursor from the database service
    #     cursor = self.db_service.get_cursor()
    #     # Retrieve exercises from the database
    #     exercises = fetch_exercises
    #     return exercises
    
    def update_action_space(self):
        # Update the number of actions based on the number of exercises retrieved
        self.num_actions = len(self.actions)
        self.action_space = Discrete(self.num_actions)  # Reset action space with the actual number of exercises
        
    def get_exercises(self):
        exercise_df =  self.db_service.fetch_exercise_data()
        exercises = [
            Exercise(
                exercise_id=row['exercise_id'], exercise_name=row['exercise_name'], exercise_vector=row['exercise_vector'],
                exercise_location=row['exercise_location'], exercise_type=row['exercise_type'], exercise_description=row['exercise_description'],
                description_vector=row['description_vector'], exercise_parent_child_type_id=row['exercise_parent_child_type_id'],
                create_by=row['create_by'], create_dt=row['create_dt'], modified_by=row['modified_by'],
                modified_dt=row['modified_dt'], active_flg=row['active_flg']
            )
            for index, row in exercise_df.iterrows()
        ]
        return exercises
  
    def get_most_recent_state(self):
        df_user_state = self.db_service.fetch_latest_emotional_state()
        state = State(
            emotion_vector=df_user_state.iloc[0]['emotion_vector'],
            location_vector=df_user_state.iloc[0]['location_vector']
        )
        return state
        
    
    

    # def get_exercise_count(self):
    #     result = self.db_service.fetch_data(query, (state, action))
    #     if result.empty:
    #         return 0  # Return a default Q-value if none is found
    #     return result

    def close(self):
        pass

In [62]:
class Exercise:
    def __init__(self, exercise_id, exercise_name, exercise_vector, exercise_location, exercise_type, 
                 exercise_description, description_vector, exercise_parent_child_type_id, 
                 create_by, create_dt, modified_by=None, modified_dt=None, active_flg=True):
        self.exercise_id = exercise_id
        self.exercise_name = exercise_name
        self.exercise_vector = exercise_vector
        self.exercise_location = exercise_location
        self.exercise_type = exercise_type
        self.exercise_description = exercise_description
        self.description_vector = description_vector
        self.exercise_parent_child_type_id = exercise_parent_child_type_id
        self.create_by = create_by
        self.create_dt = create_dt  # Will be set to current datetime in the database
        self.modified_by = modified_by
        self.modified_dt = modified_dt
        self.active_flg = active_flg

    def to_dict(self):
        return {
            "exercise_id": self.exercise_id,
            "exercise_name": self.exercise_name,
            "exercise_vector": self.exercise_vector,
            "exercise_location": self.exercise_location,
            "exercise_type": self.exercise_type,
            "exercise_description": self.exercise_description,
            "description_vector": self.description_vector,
            "exercise_parent_child_type_id": self.exercise_parent_child_type_id,
            "create_by": self.create_by,
            "create_dt": self.create_dt,  # Although it's set in the DB, it can be included if set in Python
            "modified_by": self.modified_by,
            "modified_dt": self.modified_dt,
            "active_flg": self.active_flg
        }

In [71]:
import os
import matplotlib.pyplot as plt
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy import text


from dotenv import load_dotenv

class Config:
    def __init__(self):
        load_dotenv()
        self.tidb_host = os.getenv("TIDB_HOST", "gateway01.us-east-1.prod.aws.tidbcloud.com")
        self.tidb_port = int(os.getenv("TIDB_PORT", "4000"))
        self.tidb_user = os.getenv("TIDB_USER", "EcFsmzHzn16sz32.root")
        self.tidb_password = os.getenv("TIDB_PASSWORD", "4UTXzVBKxU10w2Z1")
       # self.tidb_db_name = os.getenv("TIDB_DB_NAME", "embracepath")
        self.tidb_db_name = "embracepath"
        self.ca_path = os.getenv("CA_PATH", "")

# Create environment and agent
# Function to plot the learning history
def plot_learning_history(history):
    fig = plt.figure(1, figsize=(14, 10))
    ax = fig.add_subplot(2, 1, 1)
    episodes = np.arange(len(history))
    moves = np.array([h[0] for h in history])
    plt.plot(episodes, moves, lw=4, marker='o', markersize=10)
    ax.tick_params(axis='both', which='major', labelsize=15)
    plt.xlabel('Episodes', size=20)
    plt.ylabel('# moves', size=20)
    ax = fig.add_subplot(2, 1, 2)
    rewards = np.array([h[1] for h in history])
    plt.step(episodes, rewards, lw=4)
    ax.tick_params(axis='both', which='major', labelsize=15)
    plt.xlabel('Episodes', size=20)
    plt.ylabel('Final rewards', size=20)
    plt.savefig('q-learning-history.png', dpi=300)
    plt.show()
    
def plot_q_value_evolution(episodes, q_values):
    plt.figure(figsize=(10, 5))
    plt.plot(episodes, q_values, marker='o', linestyle='-', color='b')
    plt.title('Q-Value Evolution Over Time')
    plt.xlabel('Episodes')
    plt.ylabel('Q-Value')
    plt.grid(True)
    plt.show()
    
def main():
    config = Config()  # Make sure this is defined or imported correctly
    db_service = DatabaseService(config)  # Ensure DatabaseService is correctly implemented
    env = MindfulnessBanditEnv(num_features=3, db_service=db_service, num_actions=10)
    
    most_recent_state = env.get_most_recent_state()
    print("most_recent_state is", most_recent_state)
    
    
    history = []
    q_values_history = []  # List to store max Q-value of each episode for plotting
    for episode in range(5):  # Adjust number of episodes for more significant learning data
        state = env.reset()
        print("current number of actions in the env ", env.num_actions)
        
        epsilon = 0.1  # 10% of the time actions are chosen randomly
        if np.random.rand() < epsilon:
            action = np.random.randint(0, 10)  # Random action
        else:
            action = np.argmax(env.Q_A)  # Best action based on current knowledge
            
        next_state, reward, done, _ = env.step(action)
        history.append((episode, reward))
        q_values_history.append(max(env.Q_A.values(), default=0))  # Capture max Q-value from Q_A or Q_B
    env.close()
    
    
    
    
    #actions = env.
    #actions = env.get_exercises();
    #print("actions length ", len(env.actions))
    #print("number of actions is ", env.num_actions)
   # env.update_action_space();
   # print("actions length ", len(actions))
    
        
        
    # for episode in range(5):  # Adjust number of episodes for more significant learning data
    #     state = env.reset()
    #     action = np.random.randint(0, 10)  # Randomly choose an action
    #     next_state, reward, done, _ = env.step(action)
    #     history.append((episode, reward))
    #     q_values_history.append(max(env.Q_A.values(), default=0))  # Capture max Q-value from Q_A or Q_B

    #env.close()
    # episodes = np.arange(len(q_values_history))  # Generate an array of episode numbers
    # plot_q_value_evolution(episodes, q_values_history)
    # plot_learning_history(history)  # Ensure this function is defined to plot rewards

if __name__ == "__main__":
    main()

    
    
    
    
    
#     config = Config()
#     # https://www.meta.ai/c/a04b014a-ece0-4c51-9aa0-c51efb911727
#     # for second set of eyes coding https://chatgpt.com/c/5264785b-a953-4241-b977-7a96b811d4bd
#     db_service = DatabaseService(config)
#     # The features are angry, sad, happy
#     env = MindfulnessBanditEnv(num_features=3, db_service=db_service, num_actions=10)
#     history = []
#     for episode in range(5):  # Simulate 5 episodes
#       #  print("before error")
#         state = env.reset()
#       #  print("after error")
#         action = np.random.randint(0, 10)  # Randomly choose an action
#      #   print("action is ", action)
#         next_state, reward, done, _ = env.step(action)
#         plot_q_value_evolution(episodes, q_values)

#         history.append((episode, reward))
#         print(f"Episode {episode}: Action {action}, Reward {reward}")
#     env.close()
#     plot_learning_history(history)    
    
# main()    

without params
without params
most_recent_state is <__main__.State object at 0x7f320d377a60>
Params Type: {<class 'str'>}
Params Value: ('Happy',)
with params
query is : 
        SELECT TypeID, TypeName, TypeNameVector, Description, DescriptionVector,
        create_by, create_dt, modified_by, modified_dt, active_flg
        FROM lu_emotion_type WHERE TypeName = %s
        
state variables

core_emotion is 1
TypeID is 1
current number of actions in the env  9647
in get_reward
user_id is  1
exercise_id is  2956
emotion_id is  3
Params Type: {<class 'tuple'>}
Params Value: [(1, 2956, 3)]
with params
query is : 
        SELECT * FROM user_exercise_feedback
        WHERE user_id = %s AND exercise_id = %s AND emotion_id = %s
        
feedback_df is  <bound method NDFrame.head of    user_exercise_feedback_id  user_id  exercise_id  emotion_id  feedback  \
0                          1        1         2956           3         4   

  create_by           create_dt modified_by modified_dt  activ