Source code for pose_format.utils.openpose

import json
import math
import os
import re
from typing import Any, Dict, List, Optional, Tuple

import numpy as np
from numpy import ma

from ..numpy.pose_body import NumPyPoseBody
from ..pose import Pose
from ..pose_header import PoseHeader, PoseHeaderComponent, PoseHeaderDimensions

BODY_POINTS = [
    "Nose", "Neck", "RShoulder", "RElbow", "RWrist", "LShoulder", "LElbow", "LWrist", "MidHip", "RHip", "RKnee",
    "RAnkle", "LHip", "LKnee", "LAnkle", "REye", "LEye", "REar", "LEar", "LBigToe", "LSmallToe", "LHeel", "RBigToe",
    "RSmallToe", "RHeel"
]

# Based on https://github.com/CMU-Perceptual-Computing-Lab/openpose/raw/master/.github/media/keypoints_pose_25.png
# Everything sprouts out of the neck
BODY_LIMBS = [
    # Body
    ("Neck", "RShoulder"),
    ("RShoulder", "RElbow"),
    ("RElbow", "RWrist"),
    ("Neck", "LShoulder"),
    ("LShoulder", "LElbow"),
    ("LElbow", "LWrist"),
    ("Neck", "MidHip"),
    # Face
    ("Nose", "LEye"),
    ("Nose", "REye"),
    ("Nose", "LEar"),
    ("Nose", "REar"),
    ("Neck", "Nose"),
    # Legs
    ("MidHip", "RHip"),
    ("RHip", "RKnee"),
    ("RKnee", "RAnkle"),
    ("MidHip", "LHip"),
    ("LHip", "LKnee"),
    ("LKnee", "LAnkle"),
    # Feet
    ("RAnkle", "RHeel"),
    ("RAnkle", "RBigToe"),
    ("RBigToe", "RSmallToe"),
    ("LAnkle", "LHeel"),
    ("LAnkle", "LBigToe"),
    ("LBigToe", "LSmallToe"),
]

#        8   12  16  20
#        |   |   |   |
#        7   11  15  19
#    4   |   |   |   |
#    |   6   10  14  18
#    3   |   |   |   |
#    |   5---9---13--17
#    2    \         /
#     \    \       /
#      1    \     /
#       \    \   /
#        ------0-

# Anatomy guide https://www.assh.org/handcare/blog/anatomy-101-finger-joints
HAND_POINTS = [
    "BASE",
    "T_STT",
    "T_BCMC",
    "T_MCP",
    "T_IP",  # Thumb
    "I_CMC",
    "I_MCP",
    "I_PIP",
    "I_DIP",  # Index
    "M_CMC",
    "M_MCP",
    "M_PIP",
    "M_DIP",  # Middle
    "R_CMC",
    "R_MCP",
    "R_PIP",
    "R_DIP",  # Ring
    "P_CMC",
    "P_MCP",
    "P_PIP",
    "P_DIP",  # Pinky
]

# Based on https://github.com/CMU-Perceptual-Computing-Lab/openpose/raw/master/.github/media/keypoints_hand.png
# Everything sprouts out of the base
HAND_LIMBS = [
    ("BASE", "T_STT"),
    ("BASE", "I_CMC"),
    ("BASE", "M_CMC"),
    ("BASE", "R_CMC"),
    ("BASE", "P_CMC"),  # Base
    ("T_STT", "T_BCMC"),
    ("T_BCMC", "T_MCP"),
    ("T_MCP", "T_IP"),  # Thumb
    ("I_CMC", "I_MCP"),
    ("I_MCP", "I_PIP"),
    ("I_PIP", "I_DIP"),  # Index
    ("M_CMC", "M_MCP"),
    ("M_MCP", "M_PIP"),
    ("M_PIP", "M_DIP"),  # Middle
    ("R_CMC", "R_MCP"),
    ("R_MCP", "R_PIP"),
    ("R_PIP", "R_DIP"),  # Ring
    ("P_CMC", "P_MCP"),
    ("P_MCP", "P_PIP"),
    ("P_PIP", "P_DIP"),  # Pinky
]

# Based on https://github.com/CMU-Perceptual-Computing-Lab/openpose/raw/master/.github/media/keypoints_face.png
# Border
FACE_BORDER_POINTS = ["FB_" + str(i) for i in range(17)]
FACE_BORDER_LIMBS_LEFT = [("FB_" + str(i), "FB_" + str(i - 1)) for i in reversed(range(1, 9))]
FACE_BORDER_LIMBS_RIGHT = [("FB_" + str(i), "FB_" + str(i + 1)) for i in range(8, 16)]

# Lips
FACE_OUTER_LIPS_POINTS = ["FLO_" + str(i) for i in range(48, 60)]
FACE_OUTER_LIPS_LIMBS = [("FLO_" + str(i), "FLO_" + str(i + 1)) for i in range(48, 59)] + [("FLO_59", "FLO_48")]
FACE_INNER_LIPS_POINTS = ["FLI_" + str(i) for i in range(60, 68)]
FACE_INNER_LIPS_LIMBS = [("FLI_" + str(i), "FLI_" + str(i + 1)) for i in range(60, 67)] + [("FLI_67", "FLI_60")]

# Nose
FACE_NOSE_POINTS = ["FN_" + str(i) for i in range(27, 36)]
FACE_NOSE_BRIDGE_LIMBS = [("FN_" + str(i), "FN_" + str(i + 1)) for i in range(27, 31)]
FACE_NOSE_HORIZONTAL_LIMBS = [("FN_" + str(i), "FN_" + str(i + 1)) for i in range(31, 35)]
FACE_NOSE_LIMBS = FACE_NOSE_BRIDGE_LIMBS + FACE_NOSE_HORIZONTAL_LIMBS + [("FN_30", "FN_33")]

# Eyebrows
FACE_EYE_POINTS = ["FE_" + str(i) for i in range(36, 48)]
FACE_EYE_LEFT_LIMBS = [("FE_" + str(i), "FE_" + str(i + 1)) for i in range(36, 41)] + [("FE_41", "FE_36")]
FACE_EYE_RIGHT_LIMBS = [("FE_" + str(i), "FE_" + str(i + 1)) for i in range(42, 47)] + [("FE_47", "FE_42")]
FACE_PUPILS_POINTS = ["FP_68", "FP_69"]

# Eyes
FACE_EYEBROWS_POINTS = ["FEB_" + str(i) for i in range(17, 27)]
FACE_EYEBROW_LEFT_LIMBS = [("FEB_" + str(i), "FEB_" + str(i + 1)) for i in range(17, 21)]
FACE_EYEBROW_RIGHT_LIMBS = [("FEB_" + str(i), "FEB_" + str(i + 1)) for i in range(22, 26)]

# Face points, in order
FACE_POINTS = FACE_BORDER_POINTS + FACE_EYEBROWS_POINTS + FACE_NOSE_POINTS + FACE_EYE_POINTS + FACE_OUTER_LIPS_POINTS + FACE_INNER_LIPS_POINTS + FACE_PUPILS_POINTS
FACE_LIMBS: List[Tuple[str, str]] = FACE_BORDER_LIMBS_LEFT + FACE_BORDER_LIMBS_RIGHT + FACE_OUTER_LIPS_LIMBS + \
                                    FACE_INNER_LIPS_LIMBS + FACE_NOSE_LIMBS + FACE_EYEBROW_LEFT_LIMBS + \
                                    FACE_EYEBROW_RIGHT_LIMBS + FACE_EYE_LEFT_LIMBS + FACE_EYE_RIGHT_LIMBS

HAND_POINTS_COLOR = [[192, 0, 0], [192, 192, 0], [0, 192, 0], [0, 192, 192], [0, 0, 192], [127, 127, 127]]

OPENPOSE_FRAME_PATTERN = "(?:^|\D)(\d+)\\_keypoints.json"

# Definition of OpenPose Components


[docs]def limbs_index(limbs: List[Tuple[str, str]], points: List[str]) -> List[Tuple[int, int]]: """ Convert limb names to indices based on a list of points. Parameters ---------- limbs : list of tuple of str limbs defined by point names points : list of str (List[str]) list of point names Returns ------- list of tuple of int List of limbs defined by point indices """ return [(points.index(p1), points.index(p2)) for p1, p2 in limbs]
hand_colors = [ tuple([math.floor(x + 35 * (i % 4)) for x in HAND_POINTS_COLOR[i // 4]]) for i in range(-1, len(HAND_POINTS) - 1) ] OpenPose_Hand_Component = lambda name: PoseHeaderComponent( name=name, points=HAND_POINTS, limbs=limbs_index(HAND_LIMBS, HAND_POINTS), colors=hand_colors, point_format="XYC") OpenPose_Hand_Component.__doc__ = """ This "lambda" function creates a PoseHeaderComponent using 'name' and a constants for points, limbs, colors, and format. """ # { # "points": HAND_POINTS, # "colors": [[math.floor(x + 35 * (i % 4)) for x in HAND_POINTS_COLOR[i // 4]] for i in # range(-1, len(HAND_POINTS) - 1)], # "limbs": HAND_LIMBS, # "point_format": {"X": 0, "Y": 1, "C": 2} # } OpenPose_Components = [ PoseHeaderComponent(name="pose_keypoints_2d", points=BODY_POINTS, limbs=limbs_index(BODY_LIMBS, BODY_POINTS), colors=[(255, 0, 0)], point_format="XYC"), PoseHeaderComponent(name="face_keypoints_2d", points=FACE_POINTS, limbs=limbs_index(FACE_LIMBS, FACE_POINTS), colors=[(128, 0, 0)], point_format="XYC"), OpenPose_Hand_Component("hand_left_keypoints_2d"), OpenPose_Hand_Component("hand_right_keypoints_2d"), ] OpenPoseFrame = Dict[str, Any] OpenPoseFrames = Dict[int, OpenPoseFrame]
[docs]def load_openpose(frames: OpenPoseFrames, fps: float = 24, width: int = 1000, height: int = 1000, depth: int = 0, num_frames: Optional[int] = None) -> Pose: """ Loads a dictionary of OpenPose frames into a Pose object. Parameters ---------- frames : dict Dictionary where keys are frame IDs, and values are individual frames. Each individual frame is also a dictionary. fps : float, optional Framerate, default is 24. width : int, optional Width of pose space, default is 1000. height : int, optional Height of pose space, default is 1000. depth : int, optional Depth of pose space, default is 0. num_frames : int, optional Number of frames when it's known and cannot be derived from OpenPose files. That is the case if the last frame(s) of a video are missing from the OpenPose output. Default is None. Returns ------- Pose Pose object with a header specific to OpenPose and a body that contains a single array. """ dimensions = PoseHeaderDimensions(width=width, height=height, depth=depth) header: PoseHeader = PoseHeader(version=0.1, dimensions=dimensions, components=OpenPose_Components) total_points = header.total_points() if num_frames is None: # take the maximum of all frame IDs because some frames could be missing num_frames = max(frames.keys()) + 1 # array dimensions: (frames, person, points, dimensions) people = max([len(frame["people"]) for frame in frames.values()]) data = np.zeros(shape=(num_frames, people, total_points, 2), dtype=np.float32) confidence = np.zeros(shape=(num_frames, people, total_points), dtype=np.float32) for frame_id, frame in frames.items(): for person_id, person in enumerate(frame["people"]): keypoint_id = 0 for component in header.components: numbers = person[component.name] for k in range(0, len(numbers), len(component.format)): data[frame_id, person_id, keypoint_id, 0] = numbers[k + 0] data[frame_id, person_id, keypoint_id, 1] = numbers[k + 1] confidence[frame_id, person_id, keypoint_id] = numbers[k + 2] keypoint_id += 1 # Mask data mask = confidence == 0 # 0 means no-mask, 1 means with-mask stacked_confidence = np.stack([mask, mask], axis=3) masked_data = ma.masked_array(data, mask=stacked_confidence) body = NumPyPoseBody(fps=int(fps), data=masked_data, confidence=confidence) return Pose(header, body)
[docs]def get_frame_id(filename: str, pattern: str) -> int: """ Parses a filename to find the ID of a frame. Example file name for frame with ID 17: `CAM2_000000000017_keypoints.json`. Parameters ---------- filename : str Name of the openpose frame file. pattern : str, optional Regex pattern to extract frame ID, default is OPENPOSE_FRAME_PATTERN. Returns ------- int Frame ID as an integer. """ m = re.findall(pattern, filename) frame_id = int(m[-1]) return frame_id
[docs]def load_frames_directory_dict(directory: str, pattern: str) -> OpenPoseFrames: """ Load a pose directory where each frame's pose data is stored in a separate file following a specific naming scheme. Filenames must adhere to the format: `[ARBITRARY CHARACTERS]_[FRAME_ID]_keypoints.json`. Example: For a frame with ID 17, the filename would be `CAM2_000000000017_keypoints.json`. Parameters ---------- directory : str Path to the folder containing pose files. pattern : str, optional Regular expression pattern to identify and parse frame filenames. The default pattern expects filenames of the form `[ARBITRARY CHARACTERS]_[FRAME_ID]_keypoints.json`. Returns ------- OpenPoseFrames Dictionary where keys are frame IDs (int) and values are the corresponding frames (dict). Examples -------- >>> frames = load_frames_directory_dict("path/to/frames") >>> print(frames[17]) {...} # content of CAM2_000000000017_keypoints.json """ frames = {} # type: OpenPoseFrames with os.scandir(directory) as entry_iterator: for entry in entry_iterator: # type: os.DirEntry with open(entry.path, "r") as f: frame_id = get_frame_id(entry.name, pattern=pattern) frame_dict = json.load(f) frames[frame_id] = frame_dict return frames
[docs]def load_openpose_directory(directory: str, fps: float = 24, width: int = 1000, height: int = 1000, depth: int = 0, num_frames: Optional[int] = None) -> Pose: """ Loads pose data from a directory containing OpenPose files and return a `Pose` object. Parameters ---------- directory : str Path to the folder that contains pose files. fps : float, optional Framerate. Default is 24. width : int, optional Width of pose space. Default 1000. height : int, optional Height of pose space, default; 1000. depth : int, optional Depth of pose space, default; 0. num_frames : int, optional Number of frames when known and cannot be derived from OpenPose files. This can be the case if the last frame(s) of a video are missing from the OpenPose output. Default is None. Returns ------- Pose Pose object with a header specific to OpenPose and a body containing a single array. Examples -------- >>> pose = load_openpose_directory("path/to/frames") >>> print(pose.header) PoseHeader(...) """ frames = load_frames_directory_dict(directory=directory, pattern=OPENPOSE_FRAME_PATTERN) return load_openpose(frames, fps=fps, width=width, height=height, depth=depth, num_frames=num_frames)