In [1]:
from selenium import webdriver
from bs4 import BeautifulSoup
import numpy as np
from tqdm import tqdm


import time

import random

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

In [2]:
# Replace '/path/to/chromedriver' with the actual path to the chromedriver executable
driver = webdriver.Chrome('chromedriver')
url = 'https://minesweeperonline.com/'  # Replace with the URL of the webpage you want to extract HTML from
driver.get(url)

In [3]:
class Minesweeper():
    def __init__(self, driver):
        self.driver = driver
        self.reset()
        self.cell_state = np.zeros((16,30,10), dtype=np.int8)
        self.update()
        
        self.end = 0
        self.win = 0
        self.lose = 0
        
        return None
    
    def update_action(self, action):
        reward = 0
        self.action = np.reshape(action, (16,30))
        cell_ij = np.unravel_index(np.argmax(self.action), self.action.shape)
        # print(cell_ij)
        # Find the element by ID

        element = self.driver.find_element_by_id(f"{cell_ij[0] + 1}_{cell_ij[1] + 1}")  # Replace "element_id" with the actual ID of your element

        # Click on the element
        element.click()
        self.update()

        if self.end:
            if self.win:
                reward = 100
            else:
                reward = -10
        else:
            reward = 1
        return self.cell_state[:,:,:], reward, self.end, self.win, self.lose
    
    
    def update(self):
        html_content = self.driver.page_source
        self.soup = BeautifulSoup(html_content, 'html.parser')
        self.game = self.soup.find("html").find("body").find("table").find("tbody").find("tr").find("td").find("div").find("div", id="center-column").find('div', id= "game-container").find('div', id = 'game')
        self.game1 = self.game.find_all(class_="square")
        self.end = 0 if self.game.find("div", class_="facesmile") else 1
        self.win = 1 if self.game.find("div", class_="facewin") else 0
        self.lose = 1 if self.game.find("div", class_="facedead") else 0
        self.cell_state[:,:,:] = 0
        time0 = time.time()
        for i in range(1, 17):
            for j in range(1, 31):
                state = self.game1[(i - 1) * 30+j - 1]["class"][-1]
                match state:
                    case "blank":
                        # print(f"block {i} {j}: is blank")
                        self.cell_state[i-1,j-1,0] = 1
                        # Perform actions specific to case 1
                    case value if value.startswith("open"):
                        # print(f"block {i} {j}: is open {value[-1]}")
                        self.cell_state[i-1,j-1, int(value[-1])] = 1
                        # Perform actions specific to case 2
                    case value if value.startswith("bombflagged"):
                        # print(f"block {i} {j}: is bombflagged")
                        self.cell_state[i-1,j-1,9] = 1
                        # Perform actions specific to case 3
                    case _:
                        # print(f"Invalid case for block {i} {j}: {state}")
                        return 1
                        # Handle an invalid or unexpected case
        # /html/body/table/tbody/tr/td/div/div[2]/div[1]/div[2]/div[1]
        # facewin
        # facesmile
        # facedead
        # print(time.time() - time0)
        return 1
    
    def reset(self):
        # Find the element by ID
        element = self.driver.find_element_by_id(f"face")  # Replace "element_id" with the actual ID of your element

        # Click on the element
        element.click()
        return 1


In [4]:
gamer = Minesweeper(driver)


In [5]:
def create_dqn_model(input_size, output_size):
    model = Sequential()
    model.add(Dense(512, input_dim=input_size, activation='relu'))
    model.add(Dense(512, activation='relu'))
    model.add(Dense(output_size, activation='softmax'))
    model.compile(loss='mse', optimizer=Adam())
    return model


In [25]:
class DQNAgent:
    def __init__(self, state_size, action_size, gamma, epsilon, epsilon_decay, epsilon_min, model):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = []
        self.gamma = gamma  # Discount factor
        self.epsilon = epsilon  # Exploration rate
        self.epsilon_decay = epsilon_decay
        self.epsilon_min = epsilon_min
        if model is None:
            self.model = create_dqn_model(state_size, action_size)
        else:
            self.model = model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return np.random.randint(self.action_size)
        q_values = self.model.predict(state, verbose=0)
        return np.argmax(q_values[0])

    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        minibatch = np.random.choice(len(self.memory), batch_size, replace=False)
        for index in minibatch:
            state, action, reward, next_state, done = self.memory[index]
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(next_state, verbose = 0)[0])
            target_f = self.model.predict(state, verbose=0)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay


In [26]:
def train_minesweeper(driver, model, runs):
    # Initialize the environment
    state_size = 16 * 30 * 10
    action_size = 16 * 30
    env = Minesweeper(driver)  # Replace with your Minesweeper environment

    # Define hyperparameters
    gamma = 0.95  # Discount factor
    epsilon = 1.0  # Exploration rate
    epsilon_decay = 0.995
    epsilon_min = 0.01
    batch_size = 32
    episodes = runs

    # Create the DQN agent
    agent = DQNAgent(state_size, action_size, gamma, epsilon, epsilon_decay, epsilon_min, model)

    
    # Create a tqdm progress bar
    progress_bar = tqdm(range(episodes))

    for episode in range(episodes):
        env.reset()
        state = env.cell_state  # Replace with your function to get the current game state
        state = np.reshape(state, [1, state_size])
        done = False
        score = 0

        while not done:
            action = np.zeros(480, dtype=np.int8)
            action[agent.act(state)] = 1

            next_state, reward, done, win, lose = env.update_action(action)  # Replace with your function to perform an action in the environment
            next_state = np.reshape(next_state, [1, state_size])
            # reward = 
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            score += reward

        agent.replay(batch_size)
        progress_bar.set_description(f"Episode: {episode}, Score: {score}, Epsilon: {epsilon:.2f}")
        progress_bar.update()
        # if episode % 10 == 0:
        #     print("Episode: {}, Score: {}, Epsilon: {:.2f}".format(episode, score, agent.epsilon))

    # Save the trained model
    agent.model.save("minesweeper_dqn_model.h5")
    return agent.model



In [28]:
model = train_minesweeper(driver, model, 10)


Episode: 9, Score: -8, Epsilon: 1.00: 100%|██████████| 10/10 [00:23<00:00,  2.31s/it]


In [19]:
# driver.quit()
state = np.zeros((1,4800))
state [0,0] = 1
res = model.predict(state, verbose=0)
print(np.unravel_index(np.argmax(res), (16,30)))
res = np.reshape(res, (16,30))
res[13,28]
# res.max()
# res.flatten()
# print(np.unravel_index(np.argmax(res), (16,30)))

(13, 28)


0.0023581334