# This is a notebook to train TRPO agent in SurfRecon environment

In [1]:
%load_ext autoreload
%autoreload 2
%env MKL_NUM_THREADS=1
%env OMP_NUM_THREADS=1
%env NUMEXPR_NUM_THREADS=1
%env MKL_DEBUG_CPU_TYPE=5

env: MKL_NUM_THREADS=1
env: OMP_NUM_THREADS=1
env: NUMEXPR_NUM_THREADS=1
env: MKL_DEBUG_CPU_TYPE=5


In [2]:
import tensorflow as tf
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
seed = 30
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

2025-02-23 17:40:44.991769: I tensorflow/core/platform/cpu_feature_guard.cc:143] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2025-02-23 17:40:45.262336: I tensorflow/core/platform/profile_utils/cpu_utils.cc:102] CPU Frequency: 2400005000 Hz
2025-02-23 17:40:45.284541: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fb62c000b70 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2025-02-23 17:40:45.284597: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2025-02-23 17:40:45.329143: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2025-02-23 17:40:45.828924: E tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-02-23 17:40:45.82

# Set up the environment

In [3]:
import sys
import os
# 强制加入正确的路径
sys.path.insert(0, '/root/apps/surfrecon')

import gym
from surface_seg.envs.surfrecon_env import MCSEnv
from surface_seg.utils.callback_new import Callback
from tensorforce.execution import Runner
import gym.wrappers
import numpy as np
import tensorforce
import copy
import json

  LARGE_SPARSE_SUPPORTED = LooseVersion(scipy_version) >= '0.14.0'


In [4]:
timesteps = 500
# Substitute your own directory for saving result during training
save_dir = './result_multi_env/test_surfrecon'
num_parallel = 32
thermal_threshold = 3

In [5]:
def setup_env(recording=True, structure=None, structure_idx=None):
    
    # Set up gym
    MCS_gym = MCSEnv(observation_fingerprints=True, 
                     observation_forces=True,
                     permute_seed=42, 
                     save_dir = save_dir,
                     timesteps = timesteps,
                     thermal_threshold = thermal_threshold,
                     save_every_min = 1,
                     save_every = 50,
                     step_size = 0.1,                    
                    )
    
    if recording:
    # Wrap the gym to provide video rendering every 50 steps
        MCS_gym = gym.wrappers.Monitor(MCS_gym, 
                                         os.path.join(save_dir, 'vid'), 
                                         force=True,
                                        video_callable = lambda episode_id: (episode_id+1)%50==0) #every 50, starting at 51
    
    #Convert gym to tensorforce environment
    env = tensorforce.environments.OpenAIGym(MCS_gym,
                                         max_episode_timesteps=timesteps,
                                         visualize=False)
    
    return env


"""
Create a environment for checking the intial energy and thermal energy
"""
env = setup_env().environment.env
print('initial energy', env.initial_energy)
print('thermal energy', env.thermal_energy)
n =thermal_threshold
print('%dKT' %n, n * env.thermal_energy)

initial energy 5.822787452652696
thermal energy 0.930636
3KT 2.7919080000000003




# Set up the agent in tensorforce

In [6]:
from tensorforce.agents import Agent
tf.random.set_seed(seed)
agent = Agent.create(
    agent=dict(
        type='ppo',
        likelihood_ratio_clipping=0.2,
        entropy_regularization=0.01
    ),
    environment=setup_env(recording=False), 
    batch_size=64,
    learning_rate=3e-4,
    memory = 100000,
    max_episode_timesteps = timesteps,
    exploration=dict(
        type='decaying', unit='timesteps', decay='exponential',
        initial_value=0.8, decay_steps=2000, decay_rate=0.3 #10000, 50000, 1000000
    ),
    parallel_interactions = num_parallel,
)
    

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


2025-02-23 17:40:55.490395: E tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-02-23 17:40:55.490456: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce RTX 2060 computeCapability: 7.5
coreClock: 1.2GHz coreCount: 30 deviceMemorySize: 6.00GiB deviceMemoryBandwidth: 245.91GiB/s
2025-02-23 17:40:55.490498: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2025-02-23 17:40:55.490512: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2025-02-23 17:40:55.490525: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
2025-02-23 17:40:55.490536: I tensorflow/stream_executo

Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.while_loop(c, b, vars, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.while_loop(c, b, vars))
INFO:tensorflow:Graph was finalized.


2025-02-23 17:41:13.692321: E tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-02-23 17:41:13.692378: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce RTX 2060 computeCapability: 7.5
coreClock: 1.2GHz coreCount: 30 deviceMemorySize: 6.00GiB deviceMemoryBandwidth: 245.91GiB/s
2025-02-23 17:41:13.692430: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2025-02-23 17:41:13.692442: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2025-02-23 17:41:13.692452: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
2025-02-23 17:41:13.692462: I tensorflow/stream_executo

INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [7]:
# Check agent specifications
print(agent.spec)

OrderedDict([('agent', 'trpo'), ('states', {'TS': {'type': 'float', 'shape': (1,)}, 'energy': {'type': 'float', 'shape': (1,)}, 'fingerprints': {'type': 'float', 'shape': (252,)}, 'forces': {'type': 'float', 'shape': (54,)}, 'positions': {'type': 'float', 'shape': (54,)}}), ('actions', {'action_type': {'type': 'int', 'shape': (), 'num_values': 4}, 'atom_selection': {'type': 'int', 'shape': (), 'num_values': 8}, 'movement': {'type': 'float', 'shape': (1, 3), 'min_value': -0.019999999552965164, 'max_value': 0.019999999552965164}}), ('max_episode_timesteps', 500), ('batch_size', 1), ('network', 'auto'), ('use_beta_distribution', True), ('memory', 50000), ('update_frequency', None), ('learning_rate', 0.001), ('discount', 0.99), ('estimate_terminal', False), ('critic_network', None), ('critic_optimizer', None), ('preprocessing', None), ('exploration', {'type': 'decaying', 'unit': 'timesteps', 'decay': 'exponential', 'initial_value': 0.2, 'decay_steps': 50000, 'decay_rate': 0.5}), ('variable

# Run the DRL method in parallel (multiple environments)

In [8]:
num_episode = num_parallel*200

callback = Callback(num_episode, save_dir).episode_finish

runner = Runner( 
    agent=agent,
    environments=[setup_env(recording=False) for _ in range(num_parallel)],
    num_parallel=num_parallel,
    remote='multiprocessing',
    max_episode_timesteps=timesteps,
)

"""
Multi-env training does not close after being trained for specified num_episodes.
Manual termination required.
"""
runner.run(num_episodes=num_episode, callback=callback, callback_episode_frequency=1)
runner.close()

Episodes:   0%|               | 0/6400 [00:00, reward=0.00, ts/ep=0, sec/ep=0.00, ms/ts=0.0, agent=0.0%, comm=0.0%]2025-02-23 17:41:24.207477: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
Episodes:   2%|       | 96/6400 [21:42, reward=0.00, ts/ep=500, sec/ep=451.34, ms/ts=902.7, agent=2.6%, comm=73.6%]Exception in thread Thread-51287:
Traceback (most recent call last):
  File "/root/anaconda3/envs/surfrecon/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/root/anaconda3/envs/surfrecon/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/root/anaconda3/envs/surfrecon/lib/python3.7/site-packages/tensorforce/environments/environment.py", line 569, in finish_execute
    self.observation = self.execute(actions=actions)
  File "/root/anaconda3/envs/surfrecon/lib/python3.7/site-packages/tensorforce/environments/environment.py", line 539, in e

KeyboardInterrupt: 

# Run the DRL method in single environment (use when parallel not available)

In [None]:
# num_episode = 300
# callback = Callback(num_episode, save_dir).episode_finish

# runner = Runner( 
#     agent=agent,
#     environment=setup_env(recording=False),
#     max_episode_timesteps=timesteps,
# )

# runner.run(num_episodes=num_episode, callback=callback, callback_episode_frequency=1)
# runner.close()

# Save the trained agent

In [None]:
from tensorforce.agents.agent import TensorforceJSONEncoder
from collections import OrderedDict

save_agent_dir = os.path.join(save_dir, 'saved_agent')
agent_name = 'agent'

agent.model.save(directory=save_agent_dir, filename=agent_name, format='tensorflow', append=None)
spec_path = os.path.join(save_agent_dir, agent_name + '.json')
try:
    with open(spec_path, 'w') as fp:
        spec = OrderedDict(agent.spec)
        spec['internals'] = agent.internals_spec
        spec['initial_internals'] = agent.initial_internals()
        json.dump(obj=spec, fp=fp, cls=TensorforceJSONEncoder)
except BaseException:
    try:
        with open(spec_path, 'w') as fp:
            spec = OrderedDict()
            spec['states'] = agent.spec['states']
            spec['actions'] = agent.spec['actions']
            spec['internals'] = agent.internals_spec
            spec['initial_internals'] = agent.initial_internals()
            json.dump(obj=spec, fp=fp, cls=TensorforceJSONEncoder)
    except BaseException:
        os.remove(spec_path)
        print('Agent saving failed')