"""Defines the `MjCambrianEye` class, which is used to define an eye for the cambrianenvironment. The eye is essentially a camera that is attached to a body in theenvironment. The eye can render images and provide observations to the agent."""fromtypingimportCallable,List,Optional,Self,Tuplefromxml.etree.ElementTreeimportElementimportmujocoasmjimportnumpyasnpimporttorchfromgymnasiumimportspacesfromhydra_configimportHydraContainerConfig,config_wrapperfromscipy.spatial.transformimportRotationasRfromcambrian.rendererimportMjCambrianRenderer,MjCambrianRendererConfigfromcambrian.renderer.overlaysimportMjCambrianCursor,MjCambrianViewerOverlayfromcambrian.renderer.render_utilsimportconvert_depth_distances,convert_depth_to_rgbfromcambrian.utilsimportMjCambrianGeometry,device,get_loggerfromcambrian.utils.cambrian_xmlimportMjCambrianXMLfromcambrian.utils.specimportMjCambrianSpecfromcambrian.utils.typesimportObsType@config_wrapper
[docs]classMjCambrianEyeConfig(HydraContainerConfig):"""Defines the config for an eye. Used for type hinting. Attributes: instance (Callable[[Self, str], MjCambrianEye]): The class instance to use when creating the eye. Takes the config and the name of the eye as arguments. fov (Tuple[float, float]): Independent of the `fovy` field in the MJCF xml. Used to calculate the sensorsize field. Specified in degrees. Mutually exclusive with `fovy`. If `focal` is unset, it is set to 1, 1. Will override `sensorsize`, if set. Fmt: fovy fovx. focal (Tuple[float, float]): The focal length of the camera. Fmt: focal_y focal_x. sensorsize (Tuple[float, float]): The size of the sensor. Fmt: height width. resolution (Tuple[int, int]): The width and height of the rendered image. Fmt: height width. coord (Tuple[float, float]): The x and y coordinates of the eye. This is used to determine the placement of the eye on the agent. Specified in degrees. This attr isn't actually used by eye, but by the agent. The eye has no knowledge of the geometry it's trying to be placed on. Fmt: lat lon orthographic (bool): Whether the camera is orthographic renderer (MjCambrianRendererConfig): The renderer config to use for the underlying renderer. """instance:Callable[[Self,str],"MjCambrianEye"]fov:Tuple[float,float]focal:Tuple[float,float]sensorsize:Tuple[float,float]resolution:Tuple[int,int]coord:Tuple[float,float]orthographic:boolrenderer:MjCambrianRendererConfig
[docs]classMjCambrianEye:"""Defines an eye for the cambrian environment. It essentially wraps a mujoco Camera object and provides some helper methods for rendering and generating the XML. The eye is attached to the parent body such that movement of the parent body will move the eye. Args: config (MjCambrianEyeConfig): The configuration for the eye. name (str): The name of the eye. Keyword Args: disable_render (bool): Whether to disable rendering. Defaults to False. This is useful for derived classes which don't intend to use the default rendering mechanism. """def__init__(self,config:MjCambrianEyeConfig,name:str,*,disable_render:bool=False):self._config=configself._name=nameself._renders_rgb="rgb_array"inself._config.renderer.render_modesself._renders_depth="depth_array"inself._config.renderer.render_modesassert(self._renders_rgborself._renders_depth),"Need at least one render mode."self._prev_obs_shape=self.observation_space.shapeself._prev_obs:torch.Tensor=Noneself._fixedcamid=-1self._spec:MjCambrianSpec=Noneself._renderer:MjCambrianRenderer=Noneifnotdisable_render:self._renderer=MjCambrianRenderer(self._config.renderer)
[docs]defgenerate_xml(self,parent_xml:MjCambrianXML,geom:MjCambrianGeometry,parent_body_name:Optional[str]=None,parent:Optional[List[Element]|Element]=None,)->MjCambrianXML:"""Generate the xml for the eye. In order to combine the xml for an eye with the xml for the agent that it's attached to, we need to replicate the path with which we want to attach the eye. For instance, if the body with which we want to attach the eye to is at `mujoco/worldbody/torso`, then we need to replicate that path in the new xml. This is kind of difficult with the `xml` library, but we'll utilize the `CambrianXML` helpers for this. Args: parent_xml (MjCambrianXML): The xml of the parent body. Used as a reference to extract the path of the parent body. geom (MjCambrianGeometry): The geometry of the parent body. Used to calculate the pos and quat of the eye. parent_body_name (Optional[str]): The name of the parent body. Will search for the body tag with this name, i.e. <body name="<parent_body_name>" ...>. Either this or `parent` must be set. parent (Optional[List[Element] | Element]): The parent element to attach the eye to. If set, `parent_body_name` will be ignored. Either this or `parent_body_name` must be set. """xml=MjCambrianXML.make_empty()ifparentisNone:# Get the parent body referenceparent_body=parent_xml.find(".//body",name=parent_body_name)assertparent_bodyisnotNone,f"Could not find body '{parent_body_name}'."# Iterate through the path and add the parent elements to the new xmlparent=Noneelements,_=parent_xml.get_path(parent_body)forelementinelements:if(temp_parent:=xml.find(f".//{element.tag}",**element.attrib))isnotNone:# If the element already exists, then we'll use that as the parentparent=temp_parentcontinueparent=xml.add(parent,element.tag,**element.attrib)assertparentisnotNone,f"Could not find parent for '{parent_body_name}'"# Finally add the camera element at the endpos,quat=self._calculate_pos_quat(geom,self._config.coord)resolution=[1,1]ifself._rendererisnotNone:resolution=[self._renderer.config.width,self._renderer.config.height]xml.add(parent,"camera",name=self._name,mode="fixed",pos=" ".join(map(str,pos)),quat=" ".join(map(str,quat)),focal=" ".join(map(str,self._config.focal)),sensorsize=" ".join(map(str,self._config.sensorsize)),resolution=" ".join(map(str,resolution)),orthographic=str(self._config.orthographic).lower(),)returnxml
def_calculate_pos_quat(self,geom:MjCambrianGeometry,coord:Tuple[float,float])->Tuple[torch.Tensor,torch.Tensor]:"""Calculates the position and quaternion of the eye based on the geometry of the parent body. The position is calculated by moving the eye to the edge of the geometry in the negative x direction. The quaternion is calculated by rotating the eye to face the center of the geometry. Todo: rotations are weird. fix this. """lat,lon=torch.deg2rad(torch.tensor(coord))lon+=torch.pi/2default_rot=R.from_euler("z",torch.pi/2)pos_rot=default_rot*R.from_euler("yz",[lat,lon])rot_rot=R.from_euler("z",lat)*R.from_euler("y",-lon)*default_rotpos=pos_rot.apply([-geom.rbound,0,0])+geom.posquat=rot_rot.as_quat()returnpos,quat
[docs]defreset(self,spec:MjCambrianSpec)->ObsType:"""Sets up the camera for rendering. This should be called before rendering the first time."""self._spec=specifself._rendererisNone:returnself.step()resolution=[self._renderer.config.width,self._renderer.config.height]self._renderer.reset(spec,*resolution)self._fixedcamid=spec.get_camera_id(self._name)assertself._fixedcamid!=-1,f"Camera '{self._name}' not found."self._renderer.viewer.camera.type=mj.mjtCamera.mjCAMERA_FIXEDself._renderer.viewer.camera.fixedcamid=self._fixedcamidself._prev_obs=torch.zeros(self._prev_obs_shape,dtype=torch.float32,device=device,)obs=self.step()ifobs.device!=self._prev_obs.device:get_logger().warning("Device mismatch. obs.device: "f"{obs.device}, self._prev_obs.device: {self._prev_obs.device}")returnobs
[docs]defstep(self,obs:ObsType=None)->ObsType:"""Simply calls `render` and sets the last observation. See `render()` for more information. Args: obs (Optional[ObsType]): The observation to set. Defaults to None. This can be used by derived classes to set the observation directly. """ifobsisNone:assertself._rendererisnotNone,"Cannot step without a renderer."obs=self._renderer.render()ifself._renders_rgbandself._renders_depth:# If both are rendered, then we only return the rgbget_logger().warning("Both rgb and depth are rendered. Using only rgb.",extra={"once":True},)obs=obs[0]returnself._update_obs(obs)
def_update_obs(self,obs:ObsType)->ObsType:"""Update the observation space."""self._prev_obs.copy_(obs,non_blocking=True)returnself._prev_obs
[docs]defrender(self)->List[MjCambrianViewerOverlay]:"""Render the image from the camera. Will always only return the rgb array. This differs from step in that this is a debug method. The rendered image here will be used to visualize the eye in the viewer. """ifself._renders_depthandnotself._renders_rgb:image=convert_depth_to_rgb(convert_depth_distances(self._spec.model,self._prev_obs),znear=0,zfar=self._spec.model.stat.extent,)image=self._prev_obsposition=MjCambrianCursor.Position.BOTTOM_LEFTlayer=MjCambrianCursor.Layer.BACKcursor=MjCambrianCursor(position=position,x=0,y=0,layer=layer)return[MjCambrianViewerOverlay.create_image_overlay(image,cursor=cursor)]
@property
[docs]defconfig(self)->MjCambrianEyeConfig:"""The config for the eye."""returnself._config
@property
[docs]defname(self)->str:"""The name of the eye."""returnself._name
@property
[docs]defobservation_space(self)->spaces.Box:"""Constructs the observation space for the eye. The observation space is a `spaces.Box` with the shape of the resolution of the eye."""shape=((*self._config.resolution,3)ifself._renders_rgbelseself._config.resolution)returnspaces.Box(0.0,1.0,shape=shape,dtype=np.float32)
@property
[docs]defprev_obs(self)->torch.Tensor:"""The last observation returned by `self.render()`."""returnself._prev_obs