In [None]:
from tianshou_drl.tianshou_ddpg import DDPGTrainer
from tianshou_setup import get_env_continous

ddpgt = DDPGTrainer(get_env_continous)

In [None]:
import hashlib

def md5_equal_string(s):
  # 将字符串转换为字节
  b = s.encode("utf-8")
  # 计算md5值
  m = hashlib.md5(b).hexdigest()
  print(m)
  # 比较md5值和字符串的值
  return m == s

# 测试一个例子
s = "ce114e4501d2f4e2dcea3e17b546f339"
print(md5_equal_string(s)) # True


In [None]:
from argparse import ArgumentParser

import os
import time

from malib.runner import run
from malib.agent import IndependentAgent
from malib.scenarios.psro_scenario import PSROScenario
from malib.rl.dqn import DQNPolicy, DQNTrainer, DEFAULT_CONFIG
from malib.rollout.envs.open_spiel import env_desc_gen
from malib.rollout.envs.pettingzoo import PettingZooEnv




In [None]:
class PSROTrainer:
    def __init__(self, log_dir="./logs/", env=None):
        self.log_dir = log_dir
        self.env = env
        self.trainer_config = DEFAULT_CONFIG["training_config"].copy()
        self.trainer_config["total_timesteps"] = int(1e6)
        self.training_config = {
            "type": IndependentAgent,
            "trainer_config": self.trainer_config,
            "custom_config": {},
        }
        self.rollout_config = {
            "fragment_length": 2000,  # every thread
            "max_step": 200,
            "num_eval_episodes": 10,
            "num_threads": 2,
            "num_env_per_thread": 10,
            "num_eval_threads": 1,
            "use_subproc_env": False,
            "batch_mode": "time_step",
            "postprocessor_types": ["defaults"],
            # every # rollout epoch run evaluation.
            "eval_interval": 1,
            "inference_server": "ray",  # three kinds of inference server: `local`, `pipe` and `ray`
        }
        self.agent_mapping_func = lambda agent: agent
        self.algorithms = {
            "default": (
                DQNPolicy,
                DQNTrainer,
                # model configuration, None for default
                {},
                {},
            )
        }
        self.env_description = PettingZooEnv(env=self.env)
        self.runtime_logdir = os.path.join(self.log_dir, f"psro_{self.env.spec.id}/{time.time()}")
        if not os.path.exists(self.runtime_logdir):
            os.makedirs(self.runtime_logdir)
    
    def train(self):
        scenario = PSROScenario(
            name=f"psro_{self.env.spec.id}",
            log_dir=self.runtime_logdir,
            algorithms=self.algorithms,
            env_description=self.env_description,
            training_config=self.training_config,
            rollout_config=self.rollout_config,
            # control the outer loop.
            global_stopping_conditions={"max_iteration": 50},
            agent_mapping_func=self.agent_mapping_func,
            # for the training of best response.
            stopping_conditions={
                "training": {"max_iteration": int(1e4)},
                "rollout": {"max_iteration": 100},
            },
        )

        run(scenario)


In [None]:
from environment.Environment import VehicleJobSchedulingEnvACE, VehicleJobSchedulingParameters
from pettingzoo.utils.wrappers import BaseWrapper
def get_env():
    env = VehicleJobSchedulingEnvACE()
    env = BaseWrapper(env)
    env = PettingZooEnv(env)
    return env

In [None]:
psro = PSROTrainer(env=get_env())

In [None]:
psro.train()

In [None]:
import numpy as np
from numba import cuda
from numba.cuda.random import create_xoroshiro128p_states, xoroshiro128p_uniform_float32
import hashlib

# Define the size and capacity of the hash table
SIZE = 1000000
CAPACITY = 2 * SIZE

# Define a simple hash function using hashlib.md5
@cuda.jit(device=True)
def hash_func(key):
    return int(hashlib.md5(key.encode()).hexdigest(), 16) % CAPACITY

# Define a kernel function to insert key-value pairs into the hash table
@cuda.jit
def insert_kernel(keys, values, table, states):
    # Get the thread index
    i = cuda.grid(1)
    # Check the bounds
    if i < keys.size:
        # Get the key and value
        key = keys[i]
        value = values[i]
        # Compute the hash of the key
        hash = hash_func(key)
        # Probe the table until an empty slot is found
        while True:
            # Get the slot index using atomic add to avoid collisions
            slot = cuda.atomic.add(table, hash, 1) - 1
            # Check if the slot is empty or occupied by the same key
            if slot == -1 or keys[slot] == key:
                # Store the key and value in the slot
                keys[slot] = key
                values[slot] = value
                break
            # Increment the hash and wrap around the table capacity
            hash += 1
            if hash == CAPACITY:
                hash = 0

# Define a kernel function to lookup values by keys from the hash table
@cuda.jit
def lookup_kernel(keys, values, table, results):
    # Get the thread index
    i = cuda.grid(1)
    # Check the bounds
    if i < keys.size:
        # Get the key
        key = keys[i]
        # Compute the hash of the key
        hash = hash_func(key)
        # Probe the table until the key is found or an empty slot is encountered
        while True:
            # Get the slot index from the table
            slot = table[hash]
            # Check if the slot is empty
            if slot == -1:
                # The key is not in the table, return None
                results[i] = None
                break
            # Check if the slot contains the key
            if keys[slot] == key:
                # The key is found, return the value
                results[i] = values[slot]
                break
            # Increment the hash and wrap around the table capacity
            hash += 1
            if hash == CAPACITY:
                hash = 0

# Create some random keys and values as numpy arrays
keys = np.array([f"key{i}" for i in range(SIZE)])
values = np.random.rand(SIZE)

# Create a hash table as a numpy array of integers
# The table stores the slot indices for each hash value, initialized to -1
table = np.full(CAPACITY, -1, dtype=np.int32)

# Create a random number generator state for each thread
threads_per_block = 256
blocks_per_grid = (SIZE + threads_per_block - 1) // threads_per_block
rng_states = create_xoroshiro128p_states(threads_per_block * blocks_per_grid, seed=1)

# Copy the keys, values and table to the device memory
d_keys = cuda.to_device(keys)
d_values = cuda.to_device(values)
d_table = cuda.to_device(table)

# Launch the insert kernel on the device
insert_kernel[blocks_per_grid, threads_per_block](d_keys, d_values, d_table, rng_states)

# Copy the table back to the host memory
table = d_table.copy_to_host()

# Print some statistics of the hash table
print(f"Hash table size: {SIZE}")
print(f"Hash table capacity: {CAPACITY}")
print(f"Hash table load factor: {np.count_nonzero(table != -1) / CAPACITY}")

# Create some query keys as numpy arrays, some are in the table and some are not
query_keys = np.array([f"key{i}" for i in np.random.randint(0, 2 * SIZE, 10)])

# Create a result array to store the lookup results
results = np.empty(10, dtype=np.float64)

# Copy the query keys and result array to the device memory
d_query_keys = cuda.to_device(query_keys)
d_results = cuda.to_device(results)

# Launch the lookup kernel on the device
lookup_kernel[blocks_per_grid, threads_per_block](d_keys, d_values, d_table, d_results)

# Copy the result array back to the host memory
results = d_results.copy_to_host()

# Print the lookup results
for key, result in zip(query_keys, results):
    print(f"{key} -> {result}")
