diff --git a/.gitignore b/.gitignore index 9ea4435..c31410a 100644 --- a/.gitignore +++ b/.gitignore @@ -41,5 +41,6 @@ __pycache__/ *.pyc # App/Reconstruction folder +dataset/ fragments/ scene/ \ No newline at end of file diff --git a/app/camera_manager.py b/app/camera_manager.py new file mode 100644 index 0000000..3334528 --- /dev/null +++ b/app/camera_manager.py @@ -0,0 +1,220 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + +import time +import numpy as np +import pyrealsense2 as rs +from enum import IntEnum + +class RealSenseResolution(IntEnum): + Low = 0 # 480X270 + Medium = 1 # 640X480 + High = 2 # 848X480 + Max = 3 # 1280X720 + + +class RealSenseFPS(IntEnum): + Low = 0 # 6 + Medium = 1 # 15 + High = 2 # 30 + Max = 3 # 60 + + +class RealSenseManager: + + def __init__(self, + resolution=RealSenseResolution.High, + fps=RealSenseFPS.High): + self.align = rs.align(rs.stream.color) + self.config = rs.config() + + self.resolution = None + if resolution == RealSenseResolution.Low: + self.resolution = (480, 270) + elif resolution == RealSenseResolution.Medium: + self.resolution = (640, 480) + elif resolution == RealSenseResolution.High: + self.resolution = (848, 480) + else: + self.resolution = (1280, 720) + + fps_ = None + if fps == RealSenseFPS.Low: + fps_ = 6 + elif fps == RealSenseFPS.Medium: + fps_ = 15 + elif fps == RealSenseFPS.High: + fps_ = 30 + else: + fps_ = 60 + + self.config.enable_stream(rs.stream.depth, self.resolution[0], + self.resolution[1], rs.format.z16, fps_) + fps_color = 30 if fps_ > 30 else fps_ + self.config.enable_stream(rs.stream.color, self.resolution[0], + self.resolution[1], rs.format.rgb8, fps_color) + self.pipeline = rs.pipeline() + + self.profile = None + self.depth_profile = None + self.color_profile = None + self.sensor = None + + def get_intrinsics(self, type='color'): + """ Get intrinsics of the RGB camera or IR camera, which are varied with resolution + + Args: + power ([string]): color or depth + + Returns: + [tuple[List, List]]: K and dist + """ + if self.sensor is None: + print('Sensor not opened!') + return None + intrin = None + if type == 'color': + intrin = self.profile.get_stream( + rs.stream.color).as_video_stream_profile().get_intrinsics() + else: + intrin = self.profile.get_stream( + rs.stream.depth).as_video_stream_profile().get_intrinsics() + K = [intrin.fx, intrin.fy, intrin.ppx, intrin.ppy] + dist = [ + intrin.coeffs[0], intrin.coeffs[1], intrin.coeffs[2], + intrin.coeffs[3], intrin.coeffs[4] + ] + return (K, dist) + + def get_extrinsics(self): + """ Get extrinsics from IR camera to RGB camera + + Returns: + [np.ndarray(4 X 4)]: + """ + + if self.sensor is None: + print('Sensor not opened!') + return None + + res = self.depth_profile.get_extrinsics_to(self.color_profile) + rotation = np.array(res.rotation).reshape((3, 3)) + translation = np.array(res.translation) + T = np.identity(4) + T[:3, :3] = rotation + T[:3, 3] = translation + + return T + + def get_resolution(self): + """ Get resolution of the camera + + Returns: + tuple: (width, height) + """ + return self.resolution + + def set_laser_power(self, power): + """ Set laser power within range[10, 360]) + + Args: + power ([int]): laser power + """ + power = max(10, min(360, power)) + if self.sensor is None: + print('Sensor not opened!') + return + self.sensor.set_option(rs.option.laser_power, power) + + def get_data(self, hole_fill=False): + """ Get data from the camera + + Args: + hole_fill ([bool]): whether to fill the hole + vis ([bool]): whether to show the image + + Returns: + [tuple[pyrealsense2.frame, pyrealsense2.frame]]: color and depth frames + """ + if self.sensor is None: + print('Sensor not opened!') + return None + + frames = self.pipeline.wait_for_frames() + aligned_frames = self.align.process(frames) + color_frame = aligned_frames.get_color_frame() + depth_frame = aligned_frames.get_depth_frame() + if not color_frame or not depth_frame: + print('Can not get data from realsense!') + return None + # set fill = 2 will use 4 pixels neighbor for hole filling + fill = 2 if hole_fill else 0 + depth_frame = rs.spatial_filter(0.5, 20, 2, fill).process(depth_frame) + + # convert to numpy array + color_image = np.asanyarray(color_frame.get_data()) + depth_image = np.asanyarray(depth_frame.get_data()) + + return (color_image, depth_image) + + def open(self): + """ Open the camera + + Returns: + [bool]: + """ + self.profile = self.pipeline.start(self.config) + self.depth_profile = rs.video_stream_profile( + self.profile.get_stream(rs.stream.depth)) + self.color_profile = rs.video_stream_profile( + self.profile.get_stream(rs.stream.color)) + device = self.profile.get_device() + if device.query_sensors().__len__() == 0: + print('Can not find realsense sensor!') + return False + else: + self.sensor = device.query_sensors()[0] + # set default laser power + self.sensor.set_option(rs.option.laser_power, 200) + # set to high density mode + self.sensor.set_option(rs.option.visual_preset, 4) + return True + + def close(self): + self.pipeline.stop() + self.sensor = None + self.profile = None + self.depth_profile = None + self.color_profile = None + + +if __name__ == '__main__': + import cv2 + import numpy as np + + camera = RealSenseManager(resolution=RealSenseResolution.High, + fps=RealSenseFPS.High) + camera.open() + try: + while True: + t0 = time.time() + color, depth = camera.get_data(hole_fill=False) + color_image = np.asanyarray(color.get_data()) + depth_color_frame = rs.colorizer().colorize(depth) + depth_color_image = np.asanyarray(depth_color_frame.get_data()) + color_image_bgr = cv2.cvtColor(color_image, cv2.COLOR_RGB2BGR) + t1 = time.time() + fps = 'FPS: ' + str(int(1 / (t1 - t0))) + cv2.putText(color_image_bgr, fps, (50, 50), + cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, + cv2.LINE_AA) + cv2.namedWindow('color image', cv2.WINDOW_AUTOSIZE) + cv2.imshow('color image', color_image_bgr) + cv2.namedWindow('depth image', cv2.WINDOW_AUTOSIZE) + cv2.imshow('depth image', depth_color_image) + cv2.waitKey(1) + except KeyboardInterrupt: + camera.close() + cv2.destroyAllWindows() + finally: + print('Exit') \ No newline at end of file diff --git a/app/label_maker/README.md b/app/label_maker/README.md new file mode 100644 index 0000000..f04f081 --- /dev/null +++ b/app/label_maker/README.md @@ -0,0 +1,138 @@ +# Label Maker for 6D Pose Estimation +This is a simple offline label maker for **instance-level** 6D pose estimation ground truth data generation, forming by several python scripts. The whole pipeline is as follows: +- Collect RGBD data from sensor. +- Run RGBD dense reconstruction. +- Pose initialization. +- Rendering mask and generate labels. + +## Requirements +- `opencv-python` +- `open3d` +- `misc3d` (with `reconstruction` module enabled) + +## Pipeline +### Step 1: Collect RGBD data +We provide a simple script (`label_maker/record_data.py`) to collect RGBD data from RealSense RGBD camera. There are some arguments you can set in the script to control record mode and frame rate. + +You can also collect the data by yourself. But it is recommended to use the following structure: +``` +dataset/ + - color/ + - 000000.png + - 000001.png + - ... + - depth/ + - 000000.png + - 000001.png + - ... +``` + +**Note:** +You should collect no more than 300 pairs of RGBD data for each dataset, otherwise the scene resonstruction in next step will be very slow. You can record your data by multiple times with different scene (different models, backgraound, etc.) + +### Step 2: Run RGBD dense reconstruction +Run `python3 reconstruction.py config.json` to get the reconstructed scene. You must change the `camera` parameters in the `config.json` file to match your camera specification. + +You can use `draw_geometry.py` to visualize the reconstructed scene triangle mesh or point clouds. + +### Step 3: Pose initialization +1. Prepare your model file with the following structure: + ``` + model/ + - 0.ply + - 1.ply + - 2.ply + - ... + ``` + **You must use number as the model name to indicate the mask value of each frame.** +2. Run `init_obj_pose.py --model_path --data_path `, and follow the instruction printed in the terminal. + + After finish the process, you will find the `init_poses.json` in your data path. + +### Step 4: Rendering mask and generate labels +Run `python3 generate_labels.py --model_path --data_path `. (add `--vis` to visualize the rendering instance mask) + +The results are 16 bit masks stored in `dataset/mask` and `json` file which contains 6d pose, bbox, object id and instance id. A minimal example with only one frame can be seen below: +```json +{ + "000000": [ + { + "obj_id": 0, + "instance_id": 0, + "cam_R_m2c": [ + [ + 0.9014091842603533, + 0.43197344750891603, + -0.029332970840869017 + ], + [ + 0.22044418653117434, + -0.5162045410566827, + -0.8276093477100617 + ], + [ + -0.3726470758716603, + 0.7395483841100113, + -0.560537549504563 + ] + ], + "cam_t_m2c": [ + 0.29020161109027537, + 0.2501192190463131, + 0.6792205163170392 + ], + "bbox": [ + 498, + 398, + 142, + 82 + ] + }, + { + "obj_id": 0, + "instance_id": 1, + "cam_R_m2c": [ + [ + 0.816165164729526, + 0.5773665735205735, + -0.022853088700338718 + ], + [ + 0.30813881463069986, + -0.4683609651098311, + -0.828063087741133 + ], + [ + -0.4887994423073907, + 0.6687943227499061, + -0.5601689558137602 + ] + ], + "cam_t_m2c": [ + 0.12174972304257478, + 0.18991541206314635, + 0.7773315438193125 + ], + "bbox": [ + 350, + 337, + 144, + 133 + ] + } + ] +} +``` + +The value for each instance object in the mask is equal to `v = obj_id * 1000 + instance_id`. You can decode each instance into a 8 bit mask image with single object contained using the following code: +```python +# you already have mask, obj_id and instance_id +mask_new = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.uint8) +v = obj_id * 1000 + instance_id +mask_new[mask == v] = 255 +``` + +## Reference: +LabelFusion +- [paper](https://ieeexplore.ieee.org/abstract/document/8460950) +- [code](https://github.com/RobotLocomotion/LabelFusion) \ No newline at end of file diff --git a/app/label_maker/config.json b/app/label_maker/config.json new file mode 100644 index 0000000..01956ed --- /dev/null +++ b/app/label_maker/config.json @@ -0,0 +1,29 @@ +{ + "data_path": "dataset", + "camera": { + "width": 640, + "height": 480, + "fx": 598.7568, + "fy": 598.7569, + "cx": 326.3443, + "cy": 250.2449, + "depth_scale": 1000.0 + }, + "make_fragments": { + "descriptor_type": "orb", + "feature_num": 100, + "n_frame_per_fragment": 30, + "keyframe_ratio": 0.2 + }, + "local_refine": "color", + "global_registration": "teaser", + "optimization_param": { + "preference_loop_closure_odometry": 0.1, + "preference_loop_closure_registration": 5.0 + }, + "max_depth": 3.0, + "max_depth_diff": 0.05, + "voxel_size": 0.01, + "integration_voxel_size": 0.006, + "enable_slac": false +} \ No newline at end of file diff --git a/app/label_maker/draw_geometry.py b/app/label_maker/draw_geometry.py new file mode 100644 index 0000000..9df1561 --- /dev/null +++ b/app/label_maker/draw_geometry.py @@ -0,0 +1,28 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + +import argparse +import open3d as o3d + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('geometry', help='path to geometry file (.ply)') + parser.add_argument("--pcd", action='store_true', + help="vis point cloud") + args = parser.parse_args() + + path_to_geometry = args.geometry + if args.pcd: + scene = o3d.io.read_point_cloud(path_to_geometry) + else: + scene = o3d.io.read_triangle_mesh(path_to_geometry) + if scene.has_triangles() == False: + scene = o3d.io.read_point_cloud(path_to_geometry) + + # create axis in world frame + axis = o3d.geometry.TriangleMesh.create_coordinate_frame( + size=0.2, origin=[0, 0, 0]) + + geometries = [scene, axis] + o3d.visualization.draw_geometries(geometries) diff --git a/app/label_maker/generate_labels.py b/app/label_maker/generate_labels.py new file mode 100644 index 0000000..43af83c --- /dev/null +++ b/app/label_maker/generate_labels.py @@ -0,0 +1,200 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + +import time +import argparse +import os +import cv2 +import shutil +import numpy as np +import open3d as o3d +import misc3d as m3d +import json +import sys +sys.path.append('../') +from utils import Colors +from utils import mask_to_bbox, rgbd_to_pointcloud + +from IPython import embed + + +def remove_and_create_dir(dir_path): + mask_path = os.path.join(dir_path, 'mask') + if os.path.exists(mask_path): + shutil.rmtree(mask_path) + os.makedirs(mask_path) + + +def read_model_and_init_poses(model_path, data_path): + with open(os.path.join(data_path, 'init_poses.json'), 'r') as f: + init_poses = json.load(f) + + models = {} + for file_name in os.listdir(model_path): + if file_name.endswith('.ply'): + name = os.path.splitext(file_name)[0] + if name in init_poses: + mesh = o3d.io.read_triangle_mesh( + os.path.join(model_path, file_name)) + models[name] = mesh + + return (models, init_poses) + + +def read_rgbd_and_name(path): + rgbds = [] + names = [] + + color_files = os.listdir(os.path.join(path, 'color')) + depth_files = os.listdir(os.path.join(path, 'depth')) + for i in range(len(color_files)): + color = cv2.imread(os.path.join(path, 'color', color_files[i])) + depth = cv2.imread(os.path.join( + path, 'depth', depth_files[i]), cv2.IMREAD_UNCHANGED) + rgbds.append((color, depth)) + names.append(os.path.splitext(color_files[i])[0]) + + return rgbds, names + + +def read_camera_intrinsic(path): + f = open(os.path.join(path, 'camera_intrinsic.json'), 'r') + data = json.load(f) + camera = o3d.camera.PinholeCameraIntrinsic( + data['width'], data['height'], data['fx'], data['fy'], data['cx'], data['cy']) + return camera + + +def read_odometry(path): + odometrys = [] + f = open(os.path.join(path, 'scene/trajectory.json'), 'r') + data = json.load(f) + + for key, value in data.items(): + if key == 'class_name': + continue + odometrys.append(np.array(value).reshape((4, 4))) + + return odometrys + + +def refine_local_pose(model, color, depth, camera, init_pose, threshold=0.005): + intrin = (camera.intrinsic_matrix[0, 0], + camera.intrinsic_matrix[1, 1], camera.intrinsic_matrix[0, 2], camera.intrinsic_matrix[1, 2]) + scene = rgbd_to_pointcloud(color, depth, intrin, 1000.0, 3.0, True) + scene = scene.voxel_down_sample(voxel_size=0.01) + + model = o3d.geometry.PointCloud(model.vertices) + bbox = model.get_oriented_bounding_box() + bbox.rotate(init_pose[:3, :3], bbox.center) + bbox.translate(init_pose[:3, 3]) + bbox.scale(1.2, bbox.center) + scene = scene.crop(bbox) + + result = o3d.pipelines.registration.registration_icp(model, scene, threshold, init_pose, + o3d.pipelines.registration.TransformationEstimationPointToPoint()) + pose = result.transformation + + return pose + + +def generate_label_and_save_mask(data_path, instance_map, init_poses, pose_list, name): + # create new instance map and save it + instance_mask = np.zeros(instance_map.shape, dtype=np.uint16) + labels = [] + + instance = 0 + for key, value in init_poses.items(): + for i in range(len(value)): + label = {} + label['obj_id'] = int(key) + label['instance_id'] = instance + label['cam_R_m2c'] = pose_list[instance][:3, :3].tolist() + label['cam_t_m2c'] = pose_list[instance][:3, 3].tolist() + mask_255 = np.zeros(instance_map.shape, dtype=np.uint8) + mask_255[instance_map == instance] = 255 + bbox = mask_to_bbox(mask_255) + label['bbox'] = bbox + + instance_value = int(key) * 1000 + instance + instance_map[instance_map == instance] = instance_value + instance_mask[instance_map == instance] = 0 + instance += 1 + labels.append(label) + + cv2.imwrite(os.path.join(data_path, 'mask', name + '.png'), instance_mask) + return labels + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--model_path', help='path to CAD model') + parser.add_argument("--data_path", default='dataset', + help="path to RGBD data set") + parser.add_argument("--local_refine", action='store_true', + help="use icp the refine model to the scene") + parser.add_argument("--vis", action='store_true', + help="visualize the rendering results") + args = parser.parse_args() + + remove_and_create_dir(args.data_path) + + models, init_poses = read_model_and_init_poses( + args.model_path, args.data_path) + rgbds, file_names = read_rgbd_and_name(args.data_path) + camera = read_camera_intrinsic(args.data_path) + odometrys = read_odometry(args.data_path) + + render = m3d.pose_estimation.RayCastRenderer(camera) + + t0 = time.time() + data_labels = {} + for i in range(len(rgbds)): + render_mesh = [] + mesh_pose = [] + odom = odometrys[i] + + for key, value in init_poses.items(): + for arr in value: + pose = np.array(arr).reshape((4, 4)) + render_mesh.append(models[key]) + + pose = np.linalg.inv(odom) @ pose + if args.local_refine: + pose = refine_local_pose( + models[key], rgbds[i][0], rgbds[i][1], camera, pose) + + mesh_pose.append(pose) + + ret = render.cast_rays(render_mesh, mesh_pose) + + # rendering instance map + instance_map = render.get_instance_map().numpy() + + label = generate_label_and_save_mask( + args.data_path, instance_map, init_poses, mesh_pose, file_names[i]) + data_labels[file_names[i]] = label + + # visualization + if args.vis: + mask = np.zeros( + (instance_map.shape[0], instance_map.shape[1], 3), dtype=np.uint8) + index = np.zeros( + (instance_map.shape[0], instance_map.shape[1]), dtype=np.bool_) + color = rgbds[i][0] + + for j in range(len(render_mesh)): + mask[instance_map == j] = Colors()(j, True) + index[instance_map == j] = True + color[index] = cv2.addWeighted(color, 0.5, mask, 0.5, 0)[index] + + cv2.namedWindow('Instance Mask Rendering', cv2.WINDOW_AUTOSIZE) + cv2.imshow('Instance Mask Rendering', color) + cv2.imwrite(os.path.join(args.data_path, 'mask', + file_names[i] + '_vis.png'), color) + key = cv2.waitKey(0) + + print('Time:', time.time() - t0) + # save reuslts inside data path + with open(os.path.join(args.data_path, 'labels.json'), 'w') as f: + json.dump(data_labels, f, indent=4) diff --git a/app/label_maker/init_obj_pose.py b/app/label_maker/init_obj_pose.py new file mode 100644 index 0000000..845444e --- /dev/null +++ b/app/label_maker/init_obj_pose.py @@ -0,0 +1,99 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + +import argparse +import os +import json +import open3d as o3d +import misc3d as m3d + + +def pick_points(pcd): + print("") + print( + "1) Please pick at least three correspondences using [shift + left click]" + ) + print(" Press [shift + right click] to undo point picking") + print("2) After picking points, press 'Q' to close the window") + vis = o3d.visualization.VisualizerWithEditing() + vis.create_window() + vis.add_geometry(pcd) + vis.run() # user picks points + vis.destroy_window() + print("") + return vis.get_picked_points() + + +def read_model(path): + try: + pcd = o3d.io.read_point_cloud(path) + except: + print("Error: cannot read file: {}".format(path)) + return False, None + + if pcd.has_points() == False: + return False, None + + return True, pcd + + +def RegistrationBySVD(model, scene, index1, index2, threshold=0.02): + src = model.select_by_index(index1) + dst = scene.select_by_index(index2) + pose = m3d.registration.compute_transformation_svd(src, dst) + + # icp refine + result = o3d.pipelines.registration.registration_icp( + model, scene, threshold, pose, + o3d.pipelines.registration.TransformationEstimationPointToPoint()) + pose = result.transformation + return pose + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--model_path', help='path to CAD model') + parser.add_argument("--data_path", default='dataset', + help="path to RGBD data set") + args = parser.parse_args() + + # read integrated scene + scene = o3d.io.read_point_cloud( + os.path.join(args.data_path, 'scene/integrated.ply')) + + init_poses = {} + while True: + file_name = input('Please enter the name of model CAD file: ') + ret, model = read_model(os.path.join( + args.model_path, file_name + '.ply')) + if ret is False: + print('Read model fail, please enter the correct file name.') + continue + + model_index = pick_points(model) + scene_index = pick_points(scene) + + if len(model_index) < 3 or len(scene_index) < 3 or len(model_index) != len(scene_index): + print('Please pick at least three correspondences.') + continue + + pose = RegistrationBySVD(model, scene, model_index, scene_index) + model.transform(pose) + model.paint_uniform_color([0, 1, 0]) + + o3d.visualization.draw_geometries([model, scene]) + + save = input('Save pose? (y/n): ') + if save == 'y': + if file_name not in init_poses: + init_poses[file_name] = [pose.flatten().tolist()] + else: + init_poses[file_name].append(pose.flatten().tolist()) + + end = input('Whether to continue? (y/n): ') + if end == 'n': + break + + # save reuslts inside data path + with open(os.path.join(args.data_path, 'init_poses.json'), 'w') as f: + json.dump(init_poses, f, indent=4) diff --git a/app/label_maker/reconstruction.py b/app/label_maker/reconstruction.py new file mode 100644 index 0000000..1157227 --- /dev/null +++ b/app/label_maker/reconstruction.py @@ -0,0 +1,14 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + +import argparse +import misc3d as m3d + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('config', help='path to pipeline config file') + args = parser.parse_args() + + pipeline = m3d.reconstruction.ReconstructionPipeline(args.config) + pipeline.run_system() diff --git a/app/label_maker/record_dara.py b/app/label_maker/record_dara.py new file mode 100644 index 0000000..2c576b9 --- /dev/null +++ b/app/label_maker/record_dara.py @@ -0,0 +1,127 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + +import shutil +import argparse +import numpy as np +import os +import time +import json +import cv2 + +import sys +sys.path.append('../') +from camera_manager import RealSenseManager, RealSenseFPS, RealSenseResolution + + +def remove_and_create_dir(dir_path): + if os.path.exists(dir_path): + shutil.rmtree(dir_path) + os.makedirs(dir_path) + os.makedirs(os.path.join(dir_path, 'depth')) + os.makedirs(os.path.join(dir_path, 'color')) + + +def save_color_and_depth(color, depth, dir_path): + global save_count + cv2.imwrite(os.path.join(dir_path, 'color', '%06d.png' % save_count), color) + cv2.imwrite(os.path.join(dir_path, 'depth', '%06d.png' % save_count), depth) + + +def save_camera_intrinsic(camera_intrinsic, resolution, dir_path): + with open(os.path.join(dir_path, 'camera_intrinsic.json'), 'w') as outfile: + obj = json.dump( + { + 'width': + resolution[0], + 'height': + resolution[1], + 'fx': round(camera_intrinsic[0], 4), + 'fy': round(camera_intrinsic[1], 4), + 'cx': round(camera_intrinsic[2], 4), + 'cy': round(camera_intrinsic[3], 4), + 'depth_scale': 1000.0 + }, + outfile, + indent=4) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description="RGBD camera Recorder. Please select one of the optional arguments") + parser.add_argument("--dataset", + default='dataset', + type=str, + help="name of dataset folder.") + parser.add_argument("--record_mode", + default='d', + type=str, + help="mode of data capture. discrete (d) or continuous (c).") + parser.add_argument("--frame_interval", + default='5', + type=int, + help="interval of frames to be recorded.") + parser.add_argument("--maximum_frame", + default='200', + type=int, + help="the maximum number of frames to be recorded.") + args = parser.parse_args() + + # create dataset folder + remove_and_create_dir(args.dataset) + + camera = RealSenseManager( + resolution=RealSenseResolution.Medium, fps=RealSenseFPS.High) + camera.open() + + # save camera intrinsic + intrinsic, _ = camera.get_intrinsics() + resolution = camera.get_resolution() + save_camera_intrinsic(intrinsic, resolution, args.dataset) + + count = 0 + save_count = 0 + try: + while True: + t0 = time.time() + + color, depth = camera.get_data() + color = cv2.cvtColor(color, cv2.COLOR_RGB2BGR) + + color_cv = color.copy() + depth_3d = np.dstack((depth, depth, depth)) + bg_removed = np.where((depth_3d <= 0), 127, color_cv) + depth_colormap = cv2.applyColorMap( + cv2.convertScaleAbs(depth, alpha=0.09), cv2.COLORMAP_JET) + + fps = 'FPS: ' + str(int(1 / (time.time() - t0))) + cv2.putText(bg_removed, fps, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, + (0, 255, 0), 2, cv2.LINE_AA) + images = np.hstack((bg_removed, depth_colormap)) + cv2.namedWindow('RGBD Recorder', cv2.WINDOW_AUTOSIZE) + cv2.imshow('RGBD Recorder', images) + key = cv2.waitKey(1) + + count += 1 + if args.record_mode == 'd': + if key == ord('s'): + save_color_and_depth(color, depth, args.dataset) + save_count += 1 + else: + if count % args.frame_interval == 0: + save_color_and_depth(color, depth, args.dataset) + save_count += 1 + + # if 'esc' button pressed, escape loop and exit program + if key == 27: + cv2.destroyAllWindows() + break + + if save_count >= args.maximum_frame: + print('Maximum number of frames reached.') + break + + except KeyboardInterrupt: + camera.close() + finally: + print('Exit') diff --git a/app/reconstruction/README.md b/app/reconstruction/README.md index 55aa492..47cf8ee 100644 --- a/app/reconstruction/README.md +++ b/app/reconstruction/README.md @@ -44,6 +44,7 @@ Create a `json` file with the following elements: These are the whole parameters of the reconstruction pipeline that can be tuned. If you do not specify part of these parameters, the default value will be used. #### Parameters Description +- `camera`: Specify the width, height, fx, fy, cx, cy, depth_scale of the RGBD camera. - `make_fragments`: 1. `descriptor_type`: The type of feature descriptor. It can be `orb` or `sift`. 2. `feature_num`: The number of features extracted from each color image. diff --git a/app/utils.py b/app/utils.py new file mode 100644 index 0000000..917cbc3 --- /dev/null +++ b/app/utils.py @@ -0,0 +1,204 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + +import numpy as np +import cv2 +import open3d as o3d + +from scipy.spatial.transform import Rotation + + +def mat_to_euler(m_list, mode='xyz', unit='m'): + if type(m_list) != list: + m_list = [m_list] + out = [] + if isinstance(m_list, list) is False: + m_list = [m_list] + for m in m_list: + r = m[:3, :3] + euler = Rotation.from_matrix(r).as_euler(mode, degrees=True) + + if unit == 'mm': + pose = np.r_[m[:3, 3] / 1000, euler].tolist() + else: + pose = np.r_[m[:3, 3], euler].tolist() + out.extend(pose) + + return out + + +def mask_to_bbox(mask): + """ Convert mask to bounding box (x, y, w, h). + + Args: + mask (np.ndarray): maks with with 0 and 255. + + Returns: + List: [x, y, w, h] + """ + mask = mask.astype(np.uint8) + contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + x = 0 + y = 0 + w = 0 + h = 0 + for contour in contours: + tmp_x, tmp_y, tmp_w, tmp_h = cv2.boundingRect(contour) + if tmp_w * tmp_h > w * h: + x = tmp_x + y = tmp_y + w = tmp_w + h = tmp_h + return [x, y, w, h] + + +def rgbd_to_pointcloud(rgb, + depth, + intrinsic, + depth_scale=1000.0, + depth_trunc=3.0, + project_valid_depth=False): + """ Convert RGBD images to point cloud. + + Args: + rgb ([np.ndarray]): rgb image. + depth ([np.ndarray]): depth image. + intrinsic ([tuple]): (fx, fy, cx, cy). + depth_scale (float, optional): [description]. Defaults to 1000.0. + depth_trunc (float, optional): [description]. Defaults to 3.0. + project_valid_depth (bool, optional): [description]. Defaults to False. + + Returns: + [open3d.geometry.PointCloud]: [description] + """ + + shape = depth.shape + if shape[0] != rgb.shape[0] or shape[1] != rgb.shape[1]: + print('Shape of depth and rgb image do not match.') + return o3d.geometry.PointCloud() + + depth = o3d.geometry.Image(depth) + color = o3d.geometry.Image(rgb) + + rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth( + color, depth, depth_scale, depth_trunc, convert_rgb_to_intensity=False) + + pinhole_camera_intrinsic = o3d.camera.PinholeCameraIntrinsic( + shape[0], shape[1], intrinsic[0], intrinsic[1], intrinsic[2], + intrinsic[3]) + pcd = o3d.geometry.PointCloud.create_from_rgbd_image( + rgbd, + pinhole_camera_intrinsic, + project_valid_depth_only=project_valid_depth) + + return pcd + + +def depth_to_pointcloud(depth, + intrinsic, + depth_scale=1000.0, + depth_trunc=3.0, + project_valid_depth=False): + """ Convert depth image to point cloud. + + Args: + depth ([np.ndarray]): depth image. + intrinsic ([tuple]): (fx, fy, cx, cy). + depth_scale (float, optional): [description]. Defaults to 1000.0. + depth_trunc (float, optional): [description]. Defaults to 3.0. + project_valid_depth (bool, optional): [description]. Defaults to False. + + Returns: + [open3d.geometry.PointCloud]: [description] + """ + + shape = depth.shape + depth = o3d.geometry.Image(depth) + + pinhole_camera_intrinsic = o3d.camera.PinholeCameraIntrinsic( + shape[0], shape[1], intrinsic[0], intrinsic[1], intrinsic[2], + intrinsic[3]) + pcd = o3d.geometry.PointCloud.create_from_depth_image( + depth, + pinhole_camera_intrinsic, + depth_scale=depth_scale, + depth_trunc=depth_trunc, + project_valid_depth_only=project_valid_depth) + + return pcd + + +def try_except(func): + """ + Decorator to catch exceptions. + """ + + # try-except function. Usage: @try_except decorator + def handler(*args, **kwargs): + try: + func(*args, **kwargs) + except Exception as e: + print(e) + + return handler + + +class Colors: + # Ultralytics color palette https://ultralytics.com/ + def __init__(self): + # hex = matplotlib.colors.TABLEAU_COLORS.values() + hex = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB', + '2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7') + self.palette = [self.hex2rgb('#' + c) for c in hex] + self.n = len(self.palette) + + def __call__(self, i, bgr=False): + c = self.palette[int(i) % self.n] + return (c[2], c[1], c[0]) if bgr else c + + @staticmethod + def hex2rgb(h): # rgb order (PIL) + return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4)) + + +def plot_bboxes(bboxes, img, map_class_name=None): + """ + description: Plots bounding boxes on an image, + param: + x (np.ndarray): bounding boxes with confidence score and class id + img (np.ndarray): a image object + map_class_name: (dict): a map from class id to class name + """ + if bboxes is None: + return + # generate color for unique class + color = Colors() + + for bbox in bboxes: + class_id = int(bbox[5]) + c1, c2 = (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])) + cv2.rectangle(img, + c1, + c2, + color(class_id, True), + thickness=2, + lineType=cv2.LINE_AA) + if map_class_name is not None: + class_name = map_class_name[class_id] + else: + class_name = str(class_id) + text = 'class: ' + class_name + ' conf: {:.2f}'.format(bbox[4]) + t_size = cv2.getTextSize(text, 0, fontScale=0.5, thickness=1)[0] + c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 + cv2.rectangle(img, c1, c2, color(class_id, True), -1, cv2.LINE_AA) + cv2.putText( + img, + text, + (c1[0], c1[1] - 2), + 0, + 0.5, + [225, 255, 255], + thickness=1, + lineType=cv2.LINE_AA, + )