For actual usage "simulate_performance" need to be modified according to application. It can be to maximize validation accuracy in most of the applications. And of course hyperparameters can be expanded as per requirement. Just for illustrative purpose performance is just sum of selected hyperparameters.

For Fun : 
1. Change iterations
2. Run last cell multiple times // To feel how choice matters

In [3]:
import random
from collections import defaultdict
from math import log

In [8]:
class Node:
    def __init__(self, hyperparameters, parent=None):
        self.hyperparameters = hyperparameters
        self.parent = parent
        self.children = []
        self.visits = 0
        self.value = 0  

In [9]:
def simulate_performance(hyperparameters):
    return sum(hyperparameters.values())

In [10]:
def select_node(node):
    # Select node with highest UCB value
    best_score = float('-inf')
    best_child = None
    for child in node.children:
        if child.visits == 0:
            ucb_value = float('inf')
        else:
            ucb_value = child.value / child.visits + 2 * (2 * log(node.visits) / child.visits) ** 0.5
        
        if ucb_value > best_score:
            best_score = ucb_value
            best_child = child
    return best_child

In [11]:
def expand_node(node, hyperparameters_space):
    new_hyperparameters = node.hyperparameters.copy()
    for hp, values in hyperparameters_space.items():
        if random.random() < 0.5:  
            new_hyperparameters[hp] = random.choice(values)
    
    new_node = Node(new_hyperparameters, parent=node)
    node.children.append(new_node)
    return new_node

def backpropagate(node, value):
    while node is not None:
        node.visits += 1
        node.value += value
        node = node.parent

In [18]:
def MCTS(hyperparameters_space, iterations=10000):
    root = Node({hp: random.choice(values) for hp, values in hyperparameters_space.items()})
    
    for _ in range(iterations):
        node = root
        while node.children:
            node = select_node(node)
        if node.visits > 0:
            node = expand_node(node, hyperparameters_space)
        performance = simulate_performance(node.hyperparameters)
        backpropagate(node, performance)
    
    best_node = max(root.children, key=lambda x: x.value / x.visits if x.visits else 0)
    return best_node.hyperparameters

In [19]:
hyperparameters_space = {
    "learning_rate": [0.001, 0.01, 0.1],
    "batch_size": [16, 32, 64],
    "num_layers": [1, 2, 3]
}

In [21]:
best_hyperparameters = MCTS(hyperparameters_space, iterations=100)
best_hyperparameters

{'learning_rate': 0.1, 'batch_size': 32, 'num_layers': 2}