Source code for agentlego.tools.image_pose.image_to_pose

from agentlego.types import Annotated, ImageIO, Info
from agentlego.utils import load_or_build_object, require
from ..base import BaseTool


[docs]class HumanBodyPose(BaseTool):
    """A tool to extract human body keypoints from an image.

    Args:
        model (str): The model name used to inference. Which can be found
            in the ``MMPose`` repository.
            Defaults to `human`.
        device (str): The device to load the model. Defaults to 'cuda'.
        toolmeta (None | dict | ToolMeta): The additional info of the tool.
            Defaults to None.
    """

    default_desc = ('This tool can estimate the pose or keypoints of '
                    'human in an image and draw the human pose image.')

    @require('mmpose')
    def __init__(self, model: str = 'human', device: str = 'cuda', toolmeta=None):
        super().__init__(toolmeta=toolmeta)
        self.model_name = model
        self.device = device

    def setup(self):
        from mmpose.apis import MMPoseInferencer
        self._inferencer = load_or_build_object(
            MMPoseInferencer, pose2d=self.model_name, device=self.device)

    def apply(self, image: ImageIO
              ) -> Annotated[ImageIO, Info('The human pose keypoints image.')]:
        image = image.to_array()[:, :, ::-1]
        vis_params = self.adaptive_vis_params(*image.shape[:2])
        results = next(
            self._inferencer(
                inputs=image,
                skeleton_style='openpose',
                black_background=True,
                return_vis=True,
                **vis_params,
            ))
        skeleton_image = results['visualization'][0][:, :, ::-1]
        return ImageIO(skeleton_image)

    @staticmethod
    def adaptive_vis_params(width, height) -> dict:
        scale = (width * height)**0.5

        radius = max(round((3 / 256) * scale), 3)
        thickness = max(round((1 / 256) * scale), 3)

        return dict(radius=int(radius), thickness=int(thickness))