Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reproducible issue: cannot converge to a valid grasp policy #7

Closed
wslgqq277g opened this issue Dec 26, 2023 · 2 comments
Closed

Reproducible issue: cannot converge to a valid grasp policy #7

wslgqq277g opened this issue Dec 26, 2023 · 2 comments

Comments

@wslgqq277g
Copy link

wslgqq277g commented Dec 26, 2023

Hi yz:
I am currently following your dex-series work, nice job!
However, I follow the instrument to copy the dexpoint env under dexart training root. The main training code are organized like this, some env setting code are omitted and the policy is trained by ycb dataset instead of shapenet:
`

def get_3d_policy_kwargs(extractor_name):
    feature_extractor_class = PointNetImaginationExtractorGP
    feature_extractor_kwargs = {"pc_key": "relocate-point_cloud",
                                # "pc_key": "instance_1-point_cloud",
                                # "gt_key": "instance_1-seg_gt",
                                "extractor_name": extractor_name,
                                "imagination_keys": [f'imagination_{key}' for key in IMG_CONFIG['relocate_goal_robot'].keys()],
                                "state_key": "state"
    }
    policy_kwargs = {
        "features_extractor_class": feature_extractor_class,
        "features_extractor_kwargs": feature_extractor_kwargs,
        # "net_arch": [dict(pi=[64, 64], vf=[64, 64])],
        "activation_fn": nn.ReLU,
    }
    return policy_kwargs
def training():
        env_params = dict(object_name=object_name, rotation_reward_weight=rotation_reward_weight,
                          randomness_scale=1, use_visual_obs=use_visual_obs, use_gui=False,
                          # no_rgb=True
                          )
        if "CUDA_VISIBLE_DEVICES" in os.environ:
            env_params["device"] = "cuda"
        environment = AllegroRelocateRLEnv(**env_params)
        model = PPO("PointCloudPolicy", env, verbose=1,
                    n_epochs=args.ep,
                    n_steps=(args.n // args.workers) * horizon,
                    learning_rate=args.lr,
                    batch_size=args.bs,
                    seed=seed,
                    policy_kwargs=get_3d_policy_kwargs(extractor_name=extractor_name),
                    min_lr=args.lr,
                    max_lr=args.lr,
                    adaptive_kl=0.02,
                    target_kl=0.2,
                    )
        obs=env.reset()
        if pretrain_path is not None:
            state_dict: OrderedDict = torch.load(pretrain_path)
            model.policy.features_extractor.extractor.load_state_dict(state_dict, strict=False)
            print("load pretrained model: ", pretrain_path)
        rollout = int(model.num_timesteps / (horizon * args.n))
        if args.freeze:
            model.policy.features_extractor.extractor.eval()
            for param in model.policy.features_extractor.extractor.parameters():
                param.requires_grad = False
            print("freeze model!")
        model.learn(
            total_timesteps=int(env_iter),
            reset_num_timesteps=False,
            iter_start=rollout,
            callback=None
    )

`
And the experiment result just like this:
image

Is the training code organized well? Or can you give some advice to reproduce the experiment result?

@yzqin
Copy link
Owner

yzqin commented Dec 28, 2023

If you are working on PointCloud RL without imagined Point Cloud, you can check the following script for PPO training:

from pathlib import Path

import torch.nn as nn

from stable_baselines3.common.torch_layers import PointNetStateExtractor
from stable_baselines3.common.vec_env.subproc_vec_env import SubprocVecEnv
from stable_baselines3.ppo import PPO

def create_relocate_env(object_name, use_visual_obs, object_category="YCB", use_gui=False, is_eval=False,
                        randomness_scale=1, pc_noise=True):
    if object_name == "mustard_bottle":
        robot_name = "allegro_hand_xarm6_wrist_mounted_face_front"
    elif object_name in ["tomato_soup_can", "potted_meat_can"]:
        robot_name = "allegro_hand_xarm6_wrist_mounted_face_down"
    elif object_category == "egad":
        robot_name = "allegro_hand_xarm6_wrist_mounted_face_down"
    elif object_category.isnumeric() and object_category == "02876657":
        robot_name = "allegro_hand_xarm6_wrist_mounted_face_front"
    elif object_category.isnumeric() and object_category == "02946921":
        robot_name = "allegro_hand_xarm6_wrist_mounted_face_down"
    else:
        print(object_name)
        raise NotImplementedError
    rotation_reward_weight = 1
    frame_skip = 10
    env_params = dict(object_name=object_name, robot_name=robot_name, rotation_reward_weight=rotation_reward_weight,
                      randomness_scale=randomness_scale, use_visual_obs=use_visual_obs, use_gui=use_gui, no_rgb=True,
                      object_category=object_category, frame_skip=frame_skip)
    if is_eval:
        env_params["no_rgb"] = False
        env_params["need_offscreen_render"] = True

    # Specify rendering device if the computing device is given
    if "CUDA_VISIBLE_DEVICES" in os.environ:
        env_params["device"] = "cuda"
    env = LabArmAllegroRelocateRLEnv(**env_params)

    if use_visual_obs:
        # Create camera and setup visual modality
        env.setup_camera_from_config(task_setting.CAMERA_CONFIG["relocate"])
        if pc_noise:
            env.setup_visual_obs_config(task_setting.OBS_CONFIG["relocate_noise"])
        else:
            env.setup_visual_obs_config(task_setting.OBS_CONFIG["relocate"])

    if is_eval:
        env.setup_camera_from_config(task_setting.CAMERA_CONFIG["viz_only"])
        add_default_scene_light(env.scene, env.renderer)

    return env

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--n', type=int, default=100)
    parser.add_argument('--workers', type=int, default=10)
    parser.add_argument('--lr', type=float, default=3e-4)
    parser.add_argument('--ep', type=int, default=10)
    parser.add_argument('--bs', type=int, default=1000)
    parser.add_argument('--seed', type=int, default=100)
    parser.add_argument('--iter', type=int, default=1000)
    parser.add_argument('--randomness', type=float, default=1.0)
    parser.add_argument('--exp', type=str)
    parser.add_argument('--object_name', type=str)
    parser.add_argument('--object_cat', default="YCB", type=str)
    parser.add_argument('--use_bn', type=bool, default=True)
    parser.add_argument('--noise_pc', type=bool, default=True)

    args = parser.parse_args()
    object_name = args.object_name
    object_cat = args.object_cat
    randomness = args.randomness
    exp_keywords = ["ppo_pc", object_name, args.exp, str(args.seed)]
    horizon = 200
    env_iter = args.iter * horizon * args.n

    config = {
        'n_env_horizon': args.n,
        'object_name': object_name,
        'object_category': object_cat,
        'update_iteration': args.iter,
        'total_step': env_iter,
        'randomness': randomness,
    }

    exp_name = "-".join(exp_keywords)
    result_path = Path("./results") / exp_name
    result_path.mkdir(exist_ok=True, parents=True)

    def create_env_fn():
        environment = create_relocate_env(object_name, use_visual_obs=True, object_category=object_cat,
                                          randomness_scale=randomness, pc_noise=args.noise_pc)
        return environment


    def create_eval_env_fn():
        environment = create_relocate_env(object_name, use_visual_obs=True, is_eval=True, object_category=object_cat,
                                          randomness_scale=randomness, pc_noise=args.noise_pc)
        return environment


    env = SubprocVecEnv([create_env_fn] * args.workers, "spawn")

    print(env.observation_space, env.action_space)

    feature_extractor_class = PointNetStateExtractor
    feature_extractor_kwargs = {
        "pc_key": "relocate-point_cloud",
        "local_channels": (64, 128, 256),
        "global_channels": (256,),
        "use_bn": args.use_bn,
        "state_mlp_size": (64, 64),
    }
    policy_kwargs = {
        "features_extractor_class": feature_extractor_class,
        "features_extractor_kwargs": feature_extractor_kwargs,
        "net_arch": [dict(pi=[64, 64], vf=[64, 64])],
        "activation_fn": nn.ReLU,
    }

    config = {'n_env_horizon': args.n, 'object_name': args.object_name, 'update_iteration': args.iter,
              'total_step': env_iter, "use_bn": args.use_bn, "policy_kwargs": policy_kwargs}

    model = PPO("PointCloudPolicy", env, verbose=1,
                n_epochs=args.ep,
                n_steps=(args.n // args.workers) * horizon,
                learning_rate=args.lr,
                batch_size=args.bs,
                seed=args.seed,
                policy_kwargs=policy_kwargs,
                min_lr=args.lr,
                max_lr=args.lr,
                adaptive_kl=0.02,
                target_kl=0.2,
                )

    model.learn(
        total_timesteps=int(env_iter),
    )

@wslgqq277g
Copy link
Author

Thanku!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants