Reproducible issue: cannot converge to a valid grasp policy #7

wslgqq277g · 2023-12-26T01:26:02Z

Hi yz:
I am currently following your dex-series work, nice job!
However, I follow the instrument to copy the dexpoint env under dexart training root. The main training code are organized like this, some env setting code are omitted and the policy is trained by ycb dataset instead of shapenet:
`

def get_3d_policy_kwargs(extractor_name):
    feature_extractor_class = PointNetImaginationExtractorGP
    feature_extractor_kwargs = {"pc_key": "relocate-point_cloud",
                                # "pc_key": "instance_1-point_cloud",
                                # "gt_key": "instance_1-seg_gt",
                                "extractor_name": extractor_name,
                                "imagination_keys": [f'imagination_{key}' for key in IMG_CONFIG['relocate_goal_robot'].keys()],
                                "state_key": "state"
    }
    policy_kwargs = {
        "features_extractor_class": feature_extractor_class,
        "features_extractor_kwargs": feature_extractor_kwargs,
        # "net_arch": [dict(pi=[64, 64], vf=[64, 64])],
        "activation_fn": nn.ReLU,
    }
    return policy_kwargs
def training():
        env_params = dict(object_name=object_name, rotation_reward_weight=rotation_reward_weight,
                          randomness_scale=1, use_visual_obs=use_visual_obs, use_gui=False,
                          # no_rgb=True
                          )
        if "CUDA_VISIBLE_DEVICES" in os.environ:
            env_params["device"] = "cuda"
        environment = AllegroRelocateRLEnv(**env_params)
        model = PPO("PointCloudPolicy", env, verbose=1,
                    n_epochs=args.ep,
                    n_steps=(args.n // args.workers) * horizon,
                    learning_rate=args.lr,
                    batch_size=args.bs,
                    seed=seed,
                    policy_kwargs=get_3d_policy_kwargs(extractor_name=extractor_name),
                    min_lr=args.lr,
                    max_lr=args.lr,
                    adaptive_kl=0.02,
                    target_kl=0.2,
                    )
        obs=env.reset()
        if pretrain_path is not None:
            state_dict: OrderedDict = torch.load(pretrain_path)
            model.policy.features_extractor.extractor.load_state_dict(state_dict, strict=False)
            print("load pretrained model: ", pretrain_path)
        rollout = int(model.num_timesteps / (horizon * args.n))
        if args.freeze:
            model.policy.features_extractor.extractor.eval()
            for param in model.policy.features_extractor.extractor.parameters():
                param.requires_grad = False
            print("freeze model!")
        model.learn(
            total_timesteps=int(env_iter),
            reset_num_timesteps=False,
            iter_start=rollout,
            callback=None
    )

`
And the experiment result just like this:

Is the training code organized well? Or can you give some advice to reproduce the experiment result?

The text was updated successfully, but these errors were encountered:

yzqin · 2023-12-28T00:24:13Z

If you are working on PointCloud RL without imagined Point Cloud, you can check the following script for PPO training:

from pathlib import Path

import torch.nn as nn

from stable_baselines3.common.torch_layers import PointNetStateExtractor
from stable_baselines3.common.vec_env.subproc_vec_env import SubprocVecEnv
from stable_baselines3.ppo import PPO

def create_relocate_env(object_name, use_visual_obs, object_category="YCB", use_gui=False, is_eval=False,
                        randomness_scale=1, pc_noise=True):
    if object_name == "mustard_bottle":
        robot_name = "allegro_hand_xarm6_wrist_mounted_face_front"
    elif object_name in ["tomato_soup_can", "potted_meat_can"]:
        robot_name = "allegro_hand_xarm6_wrist_mounted_face_down"
    elif object_category == "egad":
        robot_name = "allegro_hand_xarm6_wrist_mounted_face_down"
    elif object_category.isnumeric() and object_category == "02876657":
        robot_name = "allegro_hand_xarm6_wrist_mounted_face_front"
    elif object_category.isnumeric() and object_category == "02946921":
        robot_name = "allegro_hand_xarm6_wrist_mounted_face_down"
    else:
        print(object_name)
        raise NotImplementedError
    rotation_reward_weight = 1
    frame_skip = 10
    env_params = dict(object_name=object_name, robot_name=robot_name, rotation_reward_weight=rotation_reward_weight,
                      randomness_scale=randomness_scale, use_visual_obs=use_visual_obs, use_gui=use_gui, no_rgb=True,
                      object_category=object_category, frame_skip=frame_skip)
    if is_eval:
        env_params["no_rgb"] = False
        env_params["need_offscreen_render"] = True

    # Specify rendering device if the computing device is given
    if "CUDA_VISIBLE_DEVICES" in os.environ:
        env_params["device"] = "cuda"
    env = LabArmAllegroRelocateRLEnv(**env_params)

    if use_visual_obs:
        # Create camera and setup visual modality
        env.setup_camera_from_config(task_setting.CAMERA_CONFIG["relocate"])
        if pc_noise:
            env.setup_visual_obs_config(task_setting.OBS_CONFIG["relocate_noise"])
        else:
            env.setup_visual_obs_config(task_setting.OBS_CONFIG["relocate"])

    if is_eval:
        env.setup_camera_from_config(task_setting.CAMERA_CONFIG["viz_only"])
        add_default_scene_light(env.scene, env.renderer)

    return env

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--n', type=int, default=100)
    parser.add_argument('--workers', type=int, default=10)
    parser.add_argument('--lr', type=float, default=3e-4)
    parser.add_argument('--ep', type=int, default=10)
    parser.add_argument('--bs', type=int, default=1000)
    parser.add_argument('--seed', type=int, default=100)
    parser.add_argument('--iter', type=int, default=1000)
    parser.add_argument('--randomness', type=float, default=1.0)
    parser.add_argument('--exp', type=str)
    parser.add_argument('--object_name', type=str)
    parser.add_argument('--object_cat', default="YCB", type=str)
    parser.add_argument('--use_bn', type=bool, default=True)
    parser.add_argument('--noise_pc', type=bool, default=True)

    args = parser.parse_args()
    object_name = args.object_name
    object_cat = args.object_cat
    randomness = args.randomness
    exp_keywords = ["ppo_pc", object_name, args.exp, str(args.seed)]
    horizon = 200
    env_iter = args.iter * horizon * args.n

    config = {
        'n_env_horizon': args.n,
        'object_name': object_name,
        'object_category': object_cat,
        'update_iteration': args.iter,
        'total_step': env_iter,
        'randomness': randomness,
    }

    exp_name = "-".join(exp_keywords)
    result_path = Path("./results") / exp_name
    result_path.mkdir(exist_ok=True, parents=True)

    def create_env_fn():
        environment = create_relocate_env(object_name, use_visual_obs=True, object_category=object_cat,
                                          randomness_scale=randomness, pc_noise=args.noise_pc)
        return environment


    def create_eval_env_fn():
        environment = create_relocate_env(object_name, use_visual_obs=True, is_eval=True, object_category=object_cat,
                                          randomness_scale=randomness, pc_noise=args.noise_pc)
        return environment


    env = SubprocVecEnv([create_env_fn] * args.workers, "spawn")

    print(env.observation_space, env.action_space)

    feature_extractor_class = PointNetStateExtractor
    feature_extractor_kwargs = {
        "pc_key": "relocate-point_cloud",
        "local_channels": (64, 128, 256),
        "global_channels": (256,),
        "use_bn": args.use_bn,
        "state_mlp_size": (64, 64),
    }
    policy_kwargs = {
        "features_extractor_class": feature_extractor_class,
        "features_extractor_kwargs": feature_extractor_kwargs,
        "net_arch": [dict(pi=[64, 64], vf=[64, 64])],
        "activation_fn": nn.ReLU,
    }

    config = {'n_env_horizon': args.n, 'object_name': args.object_name, 'update_iteration': args.iter,
              'total_step': env_iter, "use_bn": args.use_bn, "policy_kwargs": policy_kwargs}

    model = PPO("PointCloudPolicy", env, verbose=1,
                n_epochs=args.ep,
                n_steps=(args.n // args.workers) * horizon,
                learning_rate=args.lr,
                batch_size=args.bs,
                seed=args.seed,
                policy_kwargs=policy_kwargs,
                min_lr=args.lr,
                max_lr=args.lr,
                adaptive_kl=0.02,
                target_kl=0.2,
                )

    model.learn(
        total_timesteps=int(env_iter),
    )

wslgqq277g · 2023-12-28T01:34:28Z

Thanku！

wslgqq277g closed this as completed Dec 28, 2023

Janebek mentioned this issue Aug 7, 2024

Reproduce training stage #10

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Reproducible issue: cannot converge to a valid grasp policy #7

Reproducible issue: cannot converge to a valid grasp policy #7

wslgqq277g commented Dec 26, 2023 •

edited

Loading

yzqin commented Dec 28, 2023

wslgqq277g commented Dec 28, 2023

Reproducible issue: cannot converge to a valid grasp policy #7

Reproducible issue: cannot converge to a valid grasp policy #7

Comments

wslgqq277g commented Dec 26, 2023 • edited Loading

yzqin commented Dec 28, 2023

wslgqq277g commented Dec 28, 2023

wslgqq277g commented Dec 26, 2023 •

edited

Loading