omniisaacgymenvs/tasks/shadow_hand.py

# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


from omniisaacgymenvs.tasks.base.rl_task import RLTask
from omniisaacgymenvs.tasks.shared.in_hand_manipulation import InHandManipulationTask
from omniisaacgymenvs.robots.articulations.shadow_hand import ShadowHand
from omniisaacgymenvs.robots.articulations.views.shadow_hand_view import ShadowHandView

from omni.isaac.core.utils.prims import get_prim_at_path
from omni.isaac.core.utils.torch import *

import numpy as np
import torch
import math


class ShadowHandTask(InHandManipulationTask):
    def __init__(
        self,
        name,
        sim_config,
        env,
        offset=None
    ) -> None:
        self._sim_config = sim_config
        self._cfg = sim_config.config
        self._task_cfg = sim_config.task_config

        self.object_type = self._task_cfg["env"]["objectType"]
        assert self.object_type in ["block"]

        self.obs_type = self._task_cfg["env"]["observationType"]
        if not (self.obs_type in ["openai", "full_no_vel", "full", "full_state"]):
            raise Exception(
                "Unknown type of observations!\nobservationType should be one of: [openai, full_no_vel, full, full_state]")
        print("Obs type:", self.obs_type)
        self.num_obs_dict = {
            "openai": 42,
            "full_no_vel": 77,
            "full": 157,
            "full_state": 187,
        }

        self.asymmetric_obs = self._task_cfg["env"]["asymmetric_observations"]
        self.use_vel_obs = False

        self.fingertip_obs = True
        self.fingertips = ["robot0:ffdistal", "robot0:mfdistal", "robot0:rfdistal", "robot0:lfdistal", "robot0:thdistal"]
        self.num_fingertips = len(self.fingertips)

        self.object_scale = torch.tensor([1.0, 1.0, 1.0])
        self.force_torque_obs_scale = 10.0

        num_states = 0
        if self.asymmetric_obs:
            num_states = 187

        self._num_observations = self.num_obs_dict[self.obs_type]
        self._num_actions = 20
        self._num_states = num_states

        InHandManipulationTask.__init__(self, name=name, env=env)
        return

    def get_hand(self):
        hand_start_translation = torch.tensor([0.0, 0.0, 0.5], device=self.device)
        hand_start_orientation = torch.tensor([0.0, 0.0, -0.70711, 0.70711], device=self.device)

        shadow_hand = ShadowHand(
            prim_path=self.default_zero_env_path + "/shadow_hand", 
            name="shadow_hand",
            translation=hand_start_translation, 
            orientation=hand_start_orientation,
        )
        self._sim_config.apply_articulation_settings(
            "shadow_hand", 
            get_prim_at_path(shadow_hand.prim_path), 
            self._sim_config.parse_actor_config("shadow_hand"),
        )
        shadow_hand.set_shadow_hand_properties(stage=self._stage, shadow_hand_prim=shadow_hand.prim)
        shadow_hand.set_motor_control_mode(stage=self._stage, shadow_hand_path=shadow_hand.prim_path)
        pose_dy, pose_dz = -0.39, 0.10
        return hand_start_translation, pose_dy, pose_dz
    
    def get_hand_view(self, scene):
        hand_view = ShadowHandView(prim_paths_expr="/World/envs/.*/shadow_hand", name="shadow_hand_view")
        scene.add(hand_view._fingers)
        return hand_view

    def get_observations(self):
        self.get_object_goal_observations()

        self.fingertip_pos, self.fingertip_rot = self._hands._fingers.get_world_poses(clone=False)
        self.fingertip_pos -= self._env_pos.repeat((1, self.num_fingertips)).reshape(self.num_envs * self.num_fingertips, 3)
        self.fingertip_velocities = self._hands._fingers.get_velocities(clone=False)

        self.hand_dof_pos = self._hands.get_joint_positions(clone=False)
        self.hand_dof_vel = self._hands.get_joint_velocities(clone=False)

        if self.obs_type == "full_state" or self.asymmetric_obs:
            self.vec_sensor_tensor = self._hands._physics_view.get_force_sensor_forces().reshape(self.num_envs, 6*self.num_fingertips)

        if self.obs_type == "openai":
            self.compute_fingertip_observations(True)
        elif self.obs_type == "full_no_vel":
            self.compute_full_observations(True)
        elif self.obs_type == "full":
            self.compute_full_observations()
        elif self.obs_type == "full_state":
            self.compute_full_state(False)
        else:
            print("Unkown observations type!")
        
        if self.asymmetric_obs:
            self.compute_full_state(True)

        observations = {
            self._hands.name: {
                "obs_buf": self.obs_buf
            }
        }
        return observations
    

    def compute_fingertip_observations(self, no_vel=False):
        if no_vel:
            # Per https://arxiv.org/pdf/1808.00177.pdf Table 2
            #   Fingertip positions
            #   Object Position, but not orientation
            #   Relative target orientation

            # 3*self.num_fingertips = 15
            self.obs_buf[:, 0:15] = self.fingertip_pos.reshape(self.num_envs, 15)
            self.obs_buf[:, 15:18] = self.object_pos
            self.obs_buf[:, 18:22] = quat_mul(self.object_rot, quat_conjugate(self.goal_rot))
            self.obs_buf[:, 22:42] = self.actions
        else:
            # 13*self.num_fingertips = 65
            self.obs_buf[:, 0:65] = self.fingertip_state.reshape(self.num_envs, 65)

            self.obs_buf[:, 0:15] = self.fingertip_pos.reshape(self.num_envs, 3*self.num_fingertips)
            self.obs_buf[:, 15:35] = self.fingertip_rot.reshape(self.num_envs, 4*self.num_fingertips)
            self.obs_buf[:, 35:65] = self.fingertip_velocities.reshape(self.num_envs, 6*self.num_fingertips)

            self.obs_buf[:, 65:68] = self.object_pos
            self.obs_buf[:, 68:72] = self.object_rot
            self.obs_buf[:, 72:75] = self.object_linvel
            self.obs_buf[:, 75:78] = self.vel_obs_scale * self.object_angvel

            self.obs_buf[:, 78:81] = self.goal_pos
            self.obs_buf[:, 81:85] = self.goal_rot
            self.obs_buf[:, 85:89] = quat_mul(self.object_rot, quat_conjugate(self.goal_rot))
            self.obs_buf[:, 89:109] = self.actions


    def compute_full_observations(self, no_vel=False):
        if no_vel:
            self.obs_buf[:, 0:self.num_hand_dofs] = unscale(self.hand_dof_pos,
                self.hand_dof_lower_limits, self.hand_dof_upper_limits)
            
            self.obs_buf[:, 24:37] = self.object_pos
            self.obs_buf[:, 27:31] = self.object_rot
            self.obs_buf[:, 31:34] = self.goal_pos
            self.obs_buf[:, 34:38] = self.goal_rot
            self.obs_buf[:, 38:42] = quat_mul(self.object_rot, quat_conjugate(self.goal_rot))
            self.obs_buf[:, 42:57] = self.fingertip_pos.reshape(self.num_envs, 3*self.num_fingertips)
            self.obs_buf[:, 57:77] = self.actions
        else:
            self.obs_buf[:, 0:self.num_hand_dofs] = unscale(self.hand_dof_pos,
                self.hand_dof_lower_limits, self.hand_dof_upper_limits)
            self.obs_buf[:, self.num_hand_dofs:2*self.num_hand_dofs] = self.vel_obs_scale * self.hand_dof_vel
            
            self.obs_buf[:, 48:51] = self.object_pos
            self.obs_buf[:, 51:55] = self.object_rot
            self.obs_buf[:, 55:58] = self.object_linvel
            self.obs_buf[:, 58:61] = self.vel_obs_scale * self.object_angvel

            self.obs_buf[:, 61:64] = self.goal_pos
            self.obs_buf[:, 64:68] = self.goal_rot
            self.obs_buf[:, 68:72] = quat_mul(self.object_rot, quat_conjugate(self.goal_rot))

            # (7+6)*self.num_fingertips = 65
            self.obs_buf[:, 72:87] = self.fingertip_pos.reshape(self.num_envs, 3*self.num_fingertips)
            self.obs_buf[:, 87:107] = self.fingertip_rot.reshape(self.num_envs, 4*self.num_fingertips)
            self.obs_buf[:, 107:137] = self.fingertip_velocities.reshape(self.num_envs, 6*self.num_fingertips)
           
            self.obs_buf[:, 137:157] = self.actions
    

    def compute_full_state(self, asymm_obs=False):
        if asymm_obs:
            self.states_buf[:, 0:self.num_hand_dofs] = unscale(self.hand_dof_pos,
                self.hand_dof_lower_limits, self.hand_dof_upper_limits)
            self.states_buf[:, self.num_hand_dofs:2*self.num_hand_dofs] = self.vel_obs_scale * self.hand_dof_vel
            # self.states_buf[:, 2*self.num_hand_dofs:3*self.num_hand_dofs] = self.force_torque_obs_scale * self.dof_force_tensor

            obj_obs_start = 2*self.num_hand_dofs  # 48
            self.states_buf[:, obj_obs_start:obj_obs_start + 3] = self.object_pos
            self.states_buf[:, obj_obs_start + 3:obj_obs_start + 7] = self.object_rot
            self.states_buf[:, obj_obs_start + 7:obj_obs_start + 10] = self.object_linvel
            self.states_buf[:, obj_obs_start + 10:obj_obs_start + 13] = self.vel_obs_scale * self.object_angvel

            goal_obs_start = obj_obs_start + 13  # 61
            self.states_buf[:, goal_obs_start:goal_obs_start + 3] = self.goal_pos
            self.states_buf[:, goal_obs_start + 3:goal_obs_start + 7] = self.goal_rot
            self.states_buf[:, goal_obs_start + 7:goal_obs_start + 11] = quat_mul(self.object_rot, quat_conjugate(self.goal_rot))

            # fingertip observations, state(pose and vel) + force-torque sensors
            num_ft_states = 13 * self.num_fingertips  # 65
            num_ft_force_torques = 6 * self.num_fingertips  # 30

            fingertip_obs_start = goal_obs_start + 11  # 72
            self.states_buf[:, fingertip_obs_start:fingertip_obs_start + 3*self.num_fingertips] = self.fingertip_pos.reshape(self.num_envs, 3*self.num_fingertips)
            self.states_buf[:, fingertip_obs_start + 3*self.num_fingertips:fingertip_obs_start + 7*self.num_fingertips] = self.fingertip_rot.reshape(self.num_envs, 4*self.num_fingertips)
            self.states_buf[:, fingertip_obs_start + 7*self.num_fingertips:fingertip_obs_start + 13*self.num_fingertips] = self.fingertip_velocities.reshape(self.num_envs, 6*self.num_fingertips)

            self.states_buf[:, fingertip_obs_start + num_ft_states:fingertip_obs_start + num_ft_states + num_ft_force_torques] = self.force_torque_obs_scale * self.vec_sensor_tensor

            # obs_end = 72 + 65 + 30 = 167
            # obs_total = obs_end + num_actions = 187
            obs_end = fingertip_obs_start + num_ft_states + num_ft_force_torques
            self.states_buf[:, obs_end:obs_end + self.num_actions] = self.actions
        else:
            self.obs_buf[:, 0:self.num_hand_dofs] = unscale(self.hand_dof_pos,
                self.hand_dof_lower_limits, self.hand_dof_upper_limits)
            self.obs_buf[:, self.num_hand_dofs:2*self.num_hand_dofs] = self.vel_obs_scale * self.hand_dof_vel
            self.obs_buf[:, 2*self.num_hand_dofs:3*self.num_hand_dofs] = self.force_torque_obs_scale * self.dof_force_tensor

            obj_obs_start = 3*self.num_hand_dofs  # 48
            self.obs_buf[:, obj_obs_start:obj_obs_start + 3] = self.object_pos
            self.obs_buf[:, obj_obs_start + 3:obj_obs_start + 7] = self.object_rot
            self.obs_buf[:, obj_obs_start + 7:obj_obs_start + 10] = self.object_linvel
            self.obs_buf[:, obj_obs_start + 10:obj_obs_start + 13] = self.vel_obs_scale * self.object_angvel

            goal_obs_start = obj_obs_start + 13  # 61
            self.obs_buf[:, goal_obs_start:goal_obs_start + 3] = self.goal_pos
            self.obs_buf[:, goal_obs_start + 3:goal_obs_start + 7] = self.goal_rot
            self.obs_buf[:, goal_obs_start + 7:goal_obs_start + 11] = quat_mul(self.object_rot, quat_conjugate(self.goal_rot))

            # fingertip observations, state(pose and vel) + force-torque sensors
            num_ft_states = 13 * self.num_fingertips  # 65
            num_ft_force_torques = 6 * self.num_fingertips  # 30

            fingertip_obs_start = goal_obs_start + 11  # 72
            self.obs_buf[:, fingertip_obs_start:fingertip_obs_start + 3*self.num_fingertips] = self.fingertip_pos.reshape(self.num_envs, 3*self.num_fingertips)
            self.obs_buf[:, fingertip_obs_start + 3*self.num_fingertips:fingertip_obs_start + 7*self.num_fingertips] = self.fingertip_rot.reshape(self.num_envs, 4*self.num_fingertips)
            self.obs_buf[:, fingertip_obs_start + 7*self.num_fingertips:fingertip_obs_start + 13*self.num_fingertips] = self.fingertip_velocities.reshape(self.num_envs, 6*self.num_fingertips)
            self.obs_buf[:, fingertip_obs_start + num_ft_states:fingertip_obs_start + num_ft_states + num_ft_force_torques] = self.force_torque_obs_scale * self.vec_sensor_tensor

            # obs_end = 96 + 65 + 30 = 167
            # obs_total = obs_end + num_actions = 187
            obs_end = fingertip_obs_start + num_ft_states + num_ft_force_torques
            self.obs_buf[:, obs_end:obs_end + self.num_actions] = self.actions