-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Reproducible issue: cannot converge to a valid grasp policy #7
Comments
If you are working on PointCloud RL without imagined Point Cloud, you can check the following script for PPO training: from pathlib import Path
import torch.nn as nn
from stable_baselines3.common.torch_layers import PointNetStateExtractor
from stable_baselines3.common.vec_env.subproc_vec_env import SubprocVecEnv
from stable_baselines3.ppo import PPO
def create_relocate_env(object_name, use_visual_obs, object_category="YCB", use_gui=False, is_eval=False,
randomness_scale=1, pc_noise=True):
if object_name == "mustard_bottle":
robot_name = "allegro_hand_xarm6_wrist_mounted_face_front"
elif object_name in ["tomato_soup_can", "potted_meat_can"]:
robot_name = "allegro_hand_xarm6_wrist_mounted_face_down"
elif object_category == "egad":
robot_name = "allegro_hand_xarm6_wrist_mounted_face_down"
elif object_category.isnumeric() and object_category == "02876657":
robot_name = "allegro_hand_xarm6_wrist_mounted_face_front"
elif object_category.isnumeric() and object_category == "02946921":
robot_name = "allegro_hand_xarm6_wrist_mounted_face_down"
else:
print(object_name)
raise NotImplementedError
rotation_reward_weight = 1
frame_skip = 10
env_params = dict(object_name=object_name, robot_name=robot_name, rotation_reward_weight=rotation_reward_weight,
randomness_scale=randomness_scale, use_visual_obs=use_visual_obs, use_gui=use_gui, no_rgb=True,
object_category=object_category, frame_skip=frame_skip)
if is_eval:
env_params["no_rgb"] = False
env_params["need_offscreen_render"] = True
# Specify rendering device if the computing device is given
if "CUDA_VISIBLE_DEVICES" in os.environ:
env_params["device"] = "cuda"
env = LabArmAllegroRelocateRLEnv(**env_params)
if use_visual_obs:
# Create camera and setup visual modality
env.setup_camera_from_config(task_setting.CAMERA_CONFIG["relocate"])
if pc_noise:
env.setup_visual_obs_config(task_setting.OBS_CONFIG["relocate_noise"])
else:
env.setup_visual_obs_config(task_setting.OBS_CONFIG["relocate"])
if is_eval:
env.setup_camera_from_config(task_setting.CAMERA_CONFIG["viz_only"])
add_default_scene_light(env.scene, env.renderer)
return env
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--n', type=int, default=100)
parser.add_argument('--workers', type=int, default=10)
parser.add_argument('--lr', type=float, default=3e-4)
parser.add_argument('--ep', type=int, default=10)
parser.add_argument('--bs', type=int, default=1000)
parser.add_argument('--seed', type=int, default=100)
parser.add_argument('--iter', type=int, default=1000)
parser.add_argument('--randomness', type=float, default=1.0)
parser.add_argument('--exp', type=str)
parser.add_argument('--object_name', type=str)
parser.add_argument('--object_cat', default="YCB", type=str)
parser.add_argument('--use_bn', type=bool, default=True)
parser.add_argument('--noise_pc', type=bool, default=True)
args = parser.parse_args()
object_name = args.object_name
object_cat = args.object_cat
randomness = args.randomness
exp_keywords = ["ppo_pc", object_name, args.exp, str(args.seed)]
horizon = 200
env_iter = args.iter * horizon * args.n
config = {
'n_env_horizon': args.n,
'object_name': object_name,
'object_category': object_cat,
'update_iteration': args.iter,
'total_step': env_iter,
'randomness': randomness,
}
exp_name = "-".join(exp_keywords)
result_path = Path("./results") / exp_name
result_path.mkdir(exist_ok=True, parents=True)
def create_env_fn():
environment = create_relocate_env(object_name, use_visual_obs=True, object_category=object_cat,
randomness_scale=randomness, pc_noise=args.noise_pc)
return environment
def create_eval_env_fn():
environment = create_relocate_env(object_name, use_visual_obs=True, is_eval=True, object_category=object_cat,
randomness_scale=randomness, pc_noise=args.noise_pc)
return environment
env = SubprocVecEnv([create_env_fn] * args.workers, "spawn")
print(env.observation_space, env.action_space)
feature_extractor_class = PointNetStateExtractor
feature_extractor_kwargs = {
"pc_key": "relocate-point_cloud",
"local_channels": (64, 128, 256),
"global_channels": (256,),
"use_bn": args.use_bn,
"state_mlp_size": (64, 64),
}
policy_kwargs = {
"features_extractor_class": feature_extractor_class,
"features_extractor_kwargs": feature_extractor_kwargs,
"net_arch": [dict(pi=[64, 64], vf=[64, 64])],
"activation_fn": nn.ReLU,
}
config = {'n_env_horizon': args.n, 'object_name': args.object_name, 'update_iteration': args.iter,
'total_step': env_iter, "use_bn": args.use_bn, "policy_kwargs": policy_kwargs}
model = PPO("PointCloudPolicy", env, verbose=1,
n_epochs=args.ep,
n_steps=(args.n // args.workers) * horizon,
learning_rate=args.lr,
batch_size=args.bs,
seed=args.seed,
policy_kwargs=policy_kwargs,
min_lr=args.lr,
max_lr=args.lr,
adaptive_kl=0.02,
target_kl=0.2,
)
model.learn(
total_timesteps=int(env_iter),
) |
Thanku! |
Closed
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi yz:
I am currently following your dex-series work, nice job!
However, I follow the instrument to copy the dexpoint env under dexart training root. The main training code are organized like this, some env setting code are omitted and the policy is trained by ycb dataset instead of shapenet:
`
`
And the experiment result just like this:
Is the training code organized well? Or can you give some advice to reproduce the experiment result?
The text was updated successfully, but these errors were encountered: