In [3]:
import gym
from gym import spaces
from selenium import webdriver
from selenium.webdriver.common.by import By
import numpy as np
import time

class FormEnv(gym.Env):
    def __init__(self):
        super(FormEnv, self).__init__()
        self.driver = webdriver.Chrome()
        self.driver.get("https://www.w3schools.com/html/html_forms.asp")
        
        # Action: 0 = click FirstName, 1 = type "John", 2 = click LastName, 3 = type "Doe", 4 = click Submit
        self.action_space = spaces.Discrete(5)
        
        # Observation: dummy state (could be improved with OCR or UI detection)
        self.observation_space = spaces.Box(low=0, high=1, shape=(5,), dtype=np.float32)
        
        self.state = np.zeros(5)
        self.step_count = 0

    def reset(self):
        self.driver.refresh()
        time.sleep(2)
        self.state = np.zeros(5)
        self.step_count = 0
        return self.state

    def step(self, action):
        reward = 0
        done = False

        if action == 0:
            self.driver.find_element(By.NAME, "firstname").click()
            self.state[0] = 1
        elif action == 1:
            self.driver.find_element(By.NAME, "firstname").send_keys("John")
            self.state[1] = 1
        elif action == 2:
            self.driver.find_element(By.NAME, "lastname").click()
            self.state[2] = 1
        elif action == 3:
            self.driver.find_element(By.NAME, "lastname").send_keys("Doe")
            self.state[3] = 1
        elif action == 4:
            self.driver.find_element(By.XPATH, '//form[@action="/action_page.php"]/input[@type="submit"]').click()
            self.state[4] = 1

        self.step_count += 1
        if np.all(self.state):
            reward = 10  # Completed all steps
            done = True
        elif self.step_count >= 10:
            done = True

        return self.state, reward, done, {}

    def render(self, mode='human'):
        pass

    def close(self):
        self.driver.quit()



In [16]:
import gym
from gym import spaces
import numpy as np
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
import random

# === ĐỊNH NGHĨA MÔI TRƯỜNG ===
class FormEnv(gym.Env):
    def __init__(self):
        super(FormEnv, self).__init__()
        options = webdriver.ChromeOptions()
        options.add_argument("--headless")  # Xóa nếu muốn xem trình duyệt hoạt động
        self.driver = webdriver.Chrome(options=options)
        self.driver.get("https://www.w3schools.com/html/html_forms.asp")

        # Action: 0 = click FirstName, 1 = type "John", 2 = click LastName, 3 = type "Doe", 4 = click Submit
        self.action_space = spaces.Discrete(5)

        # State: vector 5 chiều biểu diễn trạng thái đã thực hiện các bước
        self.observation_space = spaces.Box(low=0, high=1, shape=(5,), dtype=np.float32)

        self.state = np.zeros(5)
        self.step_count = 0

    def seed(self, seed=None):
        np.random.seed(seed)
        random.seed(seed)

    def reset(self, seed=None, options=None):
        if seed is not None:
            self.seed(seed)
        self.driver.get("https://www.w3schools.com/html/html_forms.asp")
        time.sleep(1)
        self.state = np.zeros(5)
        self.step_count = 0
        return self.state, {}
    
    def step(self, action):
        reward = 0
        done = False

        try:
            if action == 0:
                self.driver.find_element(By.NAME, "firstname").click()
                self.state[0] = 1
            elif action == 1:
                self.driver.find_element(By.NAME, "firstname").send_keys("John")
                self.state[1] = 1
            elif action == 2:
                self.driver.find_element(By.NAME, "lastname").click()
                self.state[2] = 1
            elif action == 3:
                self.driver.find_element(By.NAME, "lastname").send_keys("Doe")
                self.state[3] = 1
            elif action == 4:
                self.driver.find_element(By.XPATH, '//form[@action="/action_page.php"]/input[@type="submit"]').click()
                self.state[4] = 1
        except Exception as e:
            print(f"Action {action} failed: {e}")

        self.step_count += 1

        if np.all(self.state):
            reward = 10
            done = True
        elif self.step_count >= 10:
            done = True

        self.step_count += 1
        terminated = self.step_count >= 5  # ví dụ: hoàn thành nhiệm vụ
        truncated = False  # hoặc dùng timeout nào đó
        return self.state, reward, terminated, truncated, {}

    def render(self, mode="human"):
        pass

    def close(self):
        self.driver.quit()

# === HUẤN LUYỆN PPO AGENT ===
def train_agent():
    env = make_vec_env(FormEnv, n_envs=1)
    model = PPO("MlpPolicy", env, verbose=1)
    model.learn(total_timesteps=5000)
    model.save("ppo_form_filler")
    env.close()

# === CHẠY AGENT PPO SAU KHI HỌC ===
def run_agent():
    env = FormEnv()
    model = PPO.load("ppo_form_filler")

    obs = env.reset()
    done = False
    while not done:
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        print(f"Action: {action}, Reward: {reward}, Done: {done}")
        time.sleep(1)  # để dễ quan sát
    env.close()

# === MAIN ===
if __name__ == "__main__":
    mode = input("Chọn chế độ (train/run): ").strip().lower()
    if mode == "train":
        train_agent()
    elif mode == "run":
        run_agent()
    else:
        print("Vui lòng chọn 'train' hoặc 'run'")


Using cuda device
Action 4 failed: Message: element click intercepted: Element <input type="submit" value="Submit"> is not clickable at point (82, 13). Other element would receive the click: <a onclick="TopNavBar.openNavItem('tutorials')" class="tnb-nav-btn w3-bar-item w3-button barex bar-item-hover w3-padding-16 ga-top ga-top-tut-and-ref" href="javascript:void(0)" id="navbtn_tutorials" title="Tutorials and References" role="button">...</a>
  (Session info: chrome=136.0.7103.49)
Stacktrace:
	GetHandleVerifier [0x00007FF65FACA145+76773]
	GetHandleVerifier [0x00007FF65FACA1A0+76864]
	(No symbol) [0x00007FF65F888F7A]
	(No symbol) [0x00007FF65F8E7419]
	(No symbol) [0x00007FF65F8E4DD2]
	(No symbol) [0x00007FF65F8E1E71]
	(No symbol) [0x00007FF65F8E0D71]
	(No symbol) [0x00007FF65F8D2444]
	(No symbol) [0x00007FF65F90735A]
	(No symbol) [0x00007FF65F8D1CF6]
	(No symbol) [0x00007FF65F907570]
	(No symbol) [0x00007FF65F92F0CF]
	(No symbol) [0x00007FF65F907133]
	(No symbol) [0x00007FF65F8D04D1]
	(No