Source code for cambrian.renderer.render_utils

"""Rendering utilities."""

from typing import Dict

import mujoco as mj
import torch
import torch.nn.functional as F


[docs] def resize(images: torch.Tensor, height: int, width: int) -> torch.Tensor: """Resize the image to the specified height and width.""" squeeze = False if images.ndim == 3: squeeze = True images = images.unsqueeze(0) resized_images = F.interpolate( images.permute(0, 3, 1, 2), size=(height, width), mode="nearest", align_corners=None, ).permute(0, 2, 3, 1) return resized_images.squeeze(0) if squeeze else resized_images
[docs] def resize_with_aspect_fill( images: torch.Tensor, height: int, width: int, ) -> torch.Tensor: """Resize the image while maintaining the aspect ratio and filling the rest with black.""" squeeze = False if images.ndim == 3: squeeze = True images = images.unsqueeze(0) original_height, original_width = images.shape[1:3] ratio_original = original_width / original_height ratio_new = width / height transpose = False if ratio_original < ratio_new: # Transpose for taller images transpose = True images = images.permute(0, 2, 1, 3) height, width = width, height ratio_new = width / height ratio_original = original_height / original_width resize_height = max(1, round(width / ratio_original)) resized_images = resize(images, resize_height, width) pad_top = (height - resize_height) // 2 pad_bottom = height - resize_height - pad_top padded_images = F.pad( resized_images.permute(0, 3, 1, 2), (0, 0, pad_top, pad_bottom), mode="constant", value=0, ).permute(0, 2, 3, 1) if transpose: # Transpose back padded_images = padded_images.permute(0, 2, 1, 3) return padded_images.squeeze(0) if squeeze else padded_images
def add_border( images: torch.Tensor, border_size: int, color: tuple = (0, 0, 0), ) -> torch.Tensor: squeeze = False if images.ndim == 3: squeeze = True images = images.unsqueeze(0) color = torch.tensor(color, device=images.device).unsqueeze(-1).unsqueeze(-1) pad = (border_size, border_size, border_size, border_size) images = images.permute(0, 3, 1, 2) padded = F.pad(images, pad, value=0.0) padded[..., :border_size, :] = color padded[..., -border_size:, :] = color padded[..., :, :border_size] = color padded[..., :, -border_size:] = color return padded.squeeze(0).permute(1, 2, 0) if squeeze else padded.permute(0, 2, 3, 1)
[docs] def generate_composite(images: Dict[float, Dict[float, torch.Tensor]]) -> torch.Tensor: """This is a debug method which renders the images as a composite image. Will appear as a compound eye. For example, if we have a 3x3 grid of eyes: TL T TR ML M MR BL B BR Each eye has a red border around it. Note: This assumes that the images have the same dimensions. """ composite = torch.stack( [ images[lat][lon] for lat in sorted(images.keys()) for lon in sorted(images[lat].keys())[::-1] ] ) _, H, W, _ = composite.shape if H > W: h = max(H, 10) w = int(W * h / H) else: w = max(W, 10) h = int(H * w / W) composite = resize_with_aspect_fill(composite, h, w) composite = add_border(composite, 1, color=(1, 0, 0)) # Resize the composite image while maintaining the spatial position _, H, W, C = composite.shape nrows, ncols = len(images), len(next(iter(images.values()))) composite = ( composite.view(nrows, ncols, H, W, C) .permute(0, 2, 1, 3, 4) .reshape(nrows * H, ncols * W, C) ) return composite
[docs] def convert_depth_distances(model: mj.MjModel, depth: torch.Tensor) -> torch.Tensor: """Converts depth values from OpenGL to metric depth values using PyTorch. Args: model (mj.MjModel): The model. depth (torch.Tensor): The depth values to convert. Returns: torch.Tensor: The converted depth values. Note: This function is based on [this code](https://github.com/google-deepmind/mujoco/blob/main/\ python/mujoco/renderer.py). It is adapted to use PyTorch instead of NumPy. """ # Get the distances to the near and far clipping planes. extent = model.stat.extent near = model.vis.map.znear * extent far = model.vis.map.zfar * extent # Calculate OpenGL perspective matrix values in float32 precision # so they are close to what glFrustum returns # https://registry.khronos.org/OpenGL-Refpages/gl2.1/xhtml/glFrustum.xml zfar = torch.tensor(far, dtype=torch.float32) znear = torch.tensor(near, dtype=torch.float32) c_coef = -(zfar + znear) / (zfar - znear) d_coef = -(torch.tensor(2.0, dtype=torch.float32) * zfar * znear) / (zfar - znear) # In reverse Z mode the perspective matrix is transformed by the following c_coef = torch.tensor(-0.5, dtype=torch.float32) * c_coef - torch.tensor( 0.5, dtype=torch.float32 ) d_coef = torch.tensor(-0.5, dtype=torch.float32) * d_coef # We need 64 bits to convert Z from ndc to metric depth without noticeable # losses in precision out_64 = depth.to(dtype=torch.float64) # Undo OpenGL projection # Note: We do not need to take action to convert from window coordinates # to normalized device coordinates because in reversed Z mode the mapping # is identity out_64 = d_coef / (out_64 + c_coef) # Cast result back to float32 for backwards compatibility # This has a small accuracy cost return out_64.to(dtype=torch.float32)
[docs] def convert_depth_to_rgb( depth: torch.Tensor, znear: float | None = None, zfar: float | None = None ) -> torch.Tensor: """Converts depth values to RGB values. Args: model (mj.MjModel): The model. depth (torch.Tensor): The depth values. Returns: torch.Tensor: The RGB values. """ znear = znear or depth.min() zfar = zfar or depth.max() if znear != zfar: depth = (depth - znear) / (zfar - znear) depth = 1 - torch.clamp(depth, 0.0, 1.0) depth = depth.repeat(3, 1, 1).permute(1, 2, 0) return depth
[docs] def add_text( image: torch.Tensor, text: str, position: tuple[int, int] = (0, 0), size: int | None = None, **kwargs, ) -> torch.Tensor: """Add text to an image. Note: This is slow, so use it sparingly. Args: image (torch.Tensor): The image to add text to. text (str): The text to add. position (tuple[int, int]): The position to add the text. color (tuple[int, int, int], optional): The color of the text. Defaults to (255, 255, 255). Returns: torch.Tensor: The image with the text added. """ import numpy as np from PIL import Image, ImageDraw, ImageFont device = image.device image = Image.fromarray((torch.flipud(image) * 255).cpu().numpy().astype("uint8")) draw = ImageDraw.Draw(image) font = ImageFont.load_default(size) draw.text(position, text, font=font, **kwargs) image = torch.tensor(np.array(image), device=device) / 255 image = torch.flipud(image) return image