diff --git a/lgmcts/algorithm/mcts.py b/lgmcts/algorithm/mcts.py index d471946..dd7b71e 100644 --- a/lgmcts/algorithm/mcts.py +++ b/lgmcts/algorithm/mcts.py @@ -20,12 +20,13 @@ class Sampler: """ manipulating_object, aligning_object, direction """ + def __init__(self, obj_name, origin_name, direction, region: Region2DSampler): self.obj_name = obj_name self.origin_name = origin_name self.direction = direction self.region = region - + class Node(object): """MCTS Node""" @@ -41,10 +42,10 @@ def __init__( updated_obj_id=None, UCB_scalar=1.0, num_sampling=1, - obj_support_tree:anytree.Node = None, + obj_support_tree: anytree.Node = None, prior_dict={}, verbose=False, - rng = None + rng=None ) -> None: self.node_id = node_id @@ -67,7 +68,7 @@ def __init__( self.verbose = verbose self.rng = rng - self.segmentation = None # segmentation of the workspace, will be generated only once when needed + self.segmentation = None # segmentation of the workspace, will be generated only once when needed def generate_actions(self): """ @@ -75,8 +76,8 @@ def generate_actions(self): That is, what samplers can be sampled without breaking the pattern ordering """ - no_sample_objs = set() # objects that cannot be sampled because of ordering - for obj_id, sampler in self.sampler_dict.items(): # check ordering + no_sample_objs = set() # objects that cannot be sampled because of ordering + for obj_id, sampler in self.sampler_dict.items(): # check ordering ordered = sampler.sample_info.get("ordered", False) if ordered: prior_objs = sampler.obj_ids[:sampler.obj_ids.index(obj_id)] @@ -84,7 +85,7 @@ def generate_actions(self): if prior_obj in self.sampler_dict: no_sample_objs.add(obj_id) break - + return [obj_id for obj_id in self.sampler_dict.keys() if obj_id not in no_sample_objs] def UCB(self): @@ -133,7 +134,7 @@ def action_parametriczation(self, action): # check graspability found_node = anytree.search.find( self.obj_support_tree, lambda node: node.name == action[0] - ) + ) if found_node and len(found_node.children) > 0: # not graspable, move a leave on the subtree away # Search for all leaf nodes @@ -141,10 +142,10 @@ def action_parametriczation(self, action): moved_obj = self.rng.choice(leaf_nodes).name # add a sampler to move the obstacle away buffer_sampler = SampleData( - pattern="line", - obj_id = moved_obj, - obj_ids = [moved_obj], - obj_poses_pix = {}) + pattern="line", + obj_id=moved_obj, + obj_ids=[moved_obj], + obj_poses_pix={}) success, _, (moved_obj, new_position) = self.sampling_function( self.region_sampler, self.object_states, @@ -152,7 +153,7 @@ def action_parametriczation(self, action): ) solved_sampler_obj_id = float('inf') return action, moved_obj, new_position, solved_sampler_obj_id - + sampler = self.sampler_dict[action[0]] success, obs, (moved_obj, new_position) = self.sampling_function( self.region_sampler, @@ -160,7 +161,7 @@ def action_parametriczation(self, action): sampler, ) solved_sampler_obj_id, _ = action - if not success: # fails to complete the sampling, do + if not success: # fails to complete the sampling, do if obs is None: # fails but not because of collision (e.g., out of workspace) solved_sampler_obj_id = float('inf') @@ -168,10 +169,10 @@ def action_parametriczation(self, action): else: # add a sampler to move the obstacle away buffer_sampler = SampleData( - pattern="line", - obj_id = obs, - obj_ids = [obs], - obj_poses_pix = {}) + pattern="line", + obj_id=obs, + obj_ids=[obs], + obj_poses_pix={}) success, _, (moved_obj, new_position) = self.sampling_function( self.region_sampler, self.object_states, @@ -181,12 +182,12 @@ def action_parametriczation(self, action): return action, moved_obj, new_position, solved_sampler_obj_id def sampling_function( - self, - region: Region2DSampler, - object_states: dict, - sample_data: SampleData, - verbose: bool = False, - ): + self, + region: Region2DSampler, + object_states: dict, + sample_data: SampleData, + verbose: bool = False, + ): """ sampling function If sampling succeeded, return True, None, (moved_obj_id, new_pose) @@ -197,32 +198,32 @@ def sampling_function( success, obs_name, action:(obj_name, new_pos) """ obj_id = sample_data.obj_id - + # update region region.set_object_poses(obj_states=object_states) # region.visualize() # keep track of sampled object poses - sampled_obj_poses_pix = {} + sampled_obj_poses_pix = {} pattern_objs = sample_data.obj_ids # objects involved in the sampling pattern objs_away_from_goal = list(self.sampler_dict.keys()) # pattern objects away from goal objs_at_goal = [ - pattern_obj for pattern_obj in pattern_objs - if (pattern_obj != obj_id) and (pattern_obj not in objs_away_from_goal) - ] # pattern objects at goal - #FIXME: this could be a problem here, because there is an offset + pattern_obj for pattern_obj in pattern_objs + if (pattern_obj != obj_id) and (pattern_obj not in objs_away_from_goal) + ] # pattern objects at goal + # FIXME: this could be a problem here, because there is an offset sampled_obj_poses_pix = { - obj:region._world2pix(object_states[obj][:3] + region.objects[obj].pos_offset) + obj: region._world2pix(object_states[obj][:3] + region.objects[obj].pos_offset) for obj in objs_at_goal} # update prior if sample_data.pattern in self.prior_dict: prior, pattern_info = self.prior_dict[sample_data.pattern].gen_prior( - region.grid_size, region.rng, - obj_id=sample_data.obj_id, + region.grid_size, region.rng, + obj_id=sample_data.obj_id, obj_ids=sample_data.obj_ids, obj_poses_pix=sampled_obj_poses_pix, - sample_info = sample_data.sample_info - ) + sample_info=sample_data.sample_info + ) # cv2.imshow("prior", prior) # cv2.waitKey(0) # the prior object is too close to the boundary so that no sampling is possible @@ -230,18 +231,18 @@ def sampling_function( obs = self.rng.choice([obj for obj in sample_data.obj_ids if obj != obj_id]) return False, obs, (obj_id, None) # sample - valid_pose, _, samples_status, _ = region.sample(sample_data.obj_id, 1, prior, allow_outside=False) + valid_pose, _, samples_status, _ = region.sample(sample_data.obj_id, 1, prior, allow_outside=False, pattern_info=pattern_info) if valid_pose.shape[0] > 0: valid_pose = valid_pose.reshape(-1) else: raise NotImplementedError - + success = samples_status == SampleStatus.SUCCESS # test # print(f"sample status: {samples_status.name}, valid_pose: {valid_pose}") - if not success: # find an obstacle + if not success: # find an obstacle if self.segmentation is None: self.segmentation = self.semantic_segmentation(region) leaf_nodes = self.obj_support_tree.leaves @@ -250,7 +251,7 @@ def sampling_function( while (counter > 0): counter -= 1 sample_pix, sample_probs = sample_distribution(prob=prior, rng=region.rng, n_samples=1) # (N, 2) - obs_id = self.segmentation[sample_pix[0][0], sample_pix[0][1], 0] + obs_id = self.segmentation[sample_pix[0][0], sample_pix[0][1], 0] if (obs_id not in [-1, obj_id]) and (obs_id in leaf_objs): break if counter <= 0: @@ -261,8 +262,8 @@ def sampling_function( return success, obs_id, action - def semantic_segmentation(self, region:Region2DSampler): - #TODO: Merge this part into sampler + def semantic_segmentation(self, region: Region2DSampler): + # TODO: Merge this part into sampler # semetic segmentation of the workspace segmentation = -1.0 * np.ones((region.grid_size[0], region.grid_size[1], 3), dtype=np.float32) # objects @@ -270,7 +271,8 @@ def semantic_segmentation(self, region:Region2DSampler): region._put_mask( mask=obj_data.mask, pos=obj_data.pos, - occupancy_map=segmentation, + rot=obj_data.rot, + region_map=segmentation, value=float(obj_id), ) return segmentation @@ -292,22 +294,22 @@ def __init__( region_sampler: Region2DSampler, L: List[SampleData], UCB_scalar=1.0, - obj_support_tree:anytree.Node = None, + obj_support_tree: anytree.Node = None, prior_dict={}, - n_samples = 1, + n_samples=1, verbose: bool = False, - seed = 0 + seed=0 ) -> None: self.rng = np.random.default_rng(seed=seed) self.settings = { "UCB_scalar": UCB_scalar, "prior_dict": prior_dict, "rng": self.rng, - "num_sampling" : n_samples + "num_sampling": n_samples } self.region_sampler = region_sampler self.sampler_dict = {s.obj_id: s for s in L} - self.obj_support_tree = obj_support_tree # initial object support tree + self.obj_support_tree = obj_support_tree # initial object support tree self.start_state = region_sampler.get_object_poses() # intialize MCTS tree @@ -355,14 +357,14 @@ def search(self, max_iter: int = 10000, log_step: int = 1000) -> bool: while num_iter < max_iter: if (num_iter % log_step) == 0: - print(num_iter) + print(f"Searched {num_iter}/{max_iter} iterations") num_iter += 1 current_node = self.selection() - # an action in MCTS is represented by (sampler_id, trail_id), + # an action in MCTS is represented by (sampler_id, trail_id), # the index is according to L and the num_sample children list - #TODO: do K sampling at the same time @KAI + # TODO: do K sampling at the same time @KAI action, moved_obj, new_position, solved_sampler_obj_id = current_node.expansion() - if (new_position.shape[0] > 0): # go to a new state + if (new_position.shape[0] > 0): # go to a new state new_node = self.move( num_iter, action, @@ -371,7 +373,7 @@ def search(self, max_iter: int = 10000, log_step: int = 1000) -> bool: solved_sampler_obj_id, current_node, ) - else: # stay in the same state + else: # stay in the same state new_node = Node( num_iter, region_sampler=self.region_sampler, @@ -391,7 +393,7 @@ def search(self, max_iter: int = 10000, log_step: int = 1000) -> bool: current_node.children[action[0]].append(new_node) # update reward - #TODO: new reward function @KAI + # TODO: new reward function @KAI reward = self.reward_detection(new_node) self.back_propagation(new_node, reward) if reward == len(self.sampler_dict): @@ -425,9 +427,9 @@ def move( } # print(f"id: {node_id}, obj_states: {new_object_states}, target: {target}") - new_sampler_dict = {obj_id:sampler for obj_id, sampler in current_node.sampler_dict.items() if obj_id != solved_sampler_obj_id} - - # If we are moving an obstacle, the moved object may be an object moved to goal, + new_sampler_dict = {obj_id: sampler for obj_id, sampler in current_node.sampler_dict.items() if obj_id != solved_sampler_obj_id} + + # If we are moving an obstacle, the moved object may be an object moved to goal, # we need to retrive the sampler to indicate that this sampler needs to be solved again if solved_sampler_obj_id == float("inf"): backtracked_node = current_node @@ -484,29 +486,25 @@ def construct_plan(self, node: Node): moved_object = current_node.updated_obj_id # current_node.show_arrangement() if moved_object is not None: - old_pose = np.concatenate( - [parent_node.object_states[moved_object][:3], np.array([0, 0, 0, 1])], - axis=0).reshape(-1).astype(np.float32) - new_pose = np.concatenate( - [current_node.object_states[moved_object][:3], np.array([0, 0, 0, 1])], - axis=0).reshape(-1).astype(np.float32) + old_pose = parent_node.object_states[moved_object].reshape(-1).astype(np.float32) + new_pose = current_node.object_states[moved_object].reshape(-1).astype(np.float32) self.action_list.append( { - "obj_id": moved_object, - "old_pose": old_pose, - "new_pose": new_pose, - } + "obj_id": moved_object, + "old_pose": old_pose, + "new_pose": new_pose, + } ) current_node = parent_node self.action_list.reverse() # copy anytree -def copy_tree(node:anytree.Node): +def copy_tree(node: anytree.Node): copied_node = anytree.Node(copy.deepcopy(node.name)) - + for child in node.children: child_copy = copy_tree(child) child_copy.parent = copied_node - return copied_node \ No newline at end of file + return copied_node diff --git a/lgmcts/algorithm/region_sampler.py b/lgmcts/algorithm/region_sampler.py index aeaed29..36dad31 100644 --- a/lgmcts/algorithm/region_sampler.py +++ b/lgmcts/algorithm/region_sampler.py @@ -67,14 +67,15 @@ class ObjectData: color: color of object. """ name: str - pos: np.ndarray # center position - pos_offset: np.ndarray + pos: np.ndarray # [x, y, z] of mask center + pos_offset: np.ndarray # position offset of mask center and object center mask: np.ndarray # mask height: float # height points: np.ndarray color: Tuple[int, int, int] - rot: np.ndarray = np.array([0.0, 0.0, 0.0, 1.0], dtype=np.float32) # TODO: currently, rot is not implemented + rot: np.ndarray = np.array([0.0, 0.0, 0.0], dtype=np.float32) # rx, ry, rz collision_mask: np.ndarray = None # TODO: currently, collision mask is not implemented + x_axis: np.ndarray = np.array([1.0, 0.0]) # xy-axis @dataclass @@ -160,73 +161,53 @@ def add_object( ): """Add object to scene, create mask from points Args: - mask_mode: "sphere", "raw_mask", "convex_hull". "sphere" is to provide clearance. + mask_mode: "raw_mask", "convex_hull". "sphere" is to provide clearance. """ assert points is not None, "points should not be None" if pos_ref is None: pos_ref = (points.max(axis=0) + points.min(axis=0)) / 2.0 # project points to region plane points_pix = self._world2pix(points) + # get x-axis + x_axis = self._get_object_xy_axis(points_pix) lb_pix = np.array( [points_pix[:, 0].min(), points_pix[:, 1].min(), points_pix[:, 2].min()] ) # lb, lower bottom - if mask_mode == "sphere": - mask_height = points_pix[:, 0].max() - points_pix[:, 0].min() + 1 - mask_width = points_pix[:, 1].max() - points_pix[:, 1].min() + 1 - mask_size = math.ceil(math.sqrt(mask_height ** 2 + mask_width ** 2)) - # pad size to odd - if mask_size % 2 == 0: - mask_size += 1 - mask = np.zeros((mask_size, mask_size), dtype=np.uint8) - # draw a filled circle - cv2.circle(mask, (mask_size // 2, mask_size // 2), mask_size // 2, 1, thickness=-1) - elif mask_mode == "convex_hull": - mask_height = points_pix[:, 0].max() - points_pix[:, 0].min() + 1 - mask_width = points_pix[:, 1].max() - points_pix[:, 1].min() + 1 - # pad size to odd - if mask_width % 2 == 0: - mask_width += 1 - if mask_height % 2 == 0: - mask_height += 1 - mask = np.zeros((mask_height, mask_width), dtype=np.uint8) - points_convex_hull = ConvexHull(points_pix[:, :2]) - pixels = (points_convex_hull.points[points_convex_hull.vertices]).astype(np.int32) - lb_pix[:2] + mask_height = points_pix[:, 0].max() - points_pix[:, 0].min() + 1 + mask_width = points_pix[:, 1].max() - points_pix[:, 1].min() + 1 + mask_size = math.ceil(math.sqrt(mask_height ** 2 + mask_width ** 2)) + mask_size = mask_size if mask_size % 2 == 1 else mask_size + 1 # make sure it is odd + x_offset = (mask_size - mask_height) // 2 + y_offset = (mask_size - mask_width) // 2 + mask = np.zeros((mask_size, mask_size), dtype=np.uint8) + pixels = points_pix[:, :2].astype(np.int32) - lb_pix[:2] + pixels[:, 0] += x_offset + pixels[:, 1] += y_offset + if mask_mode == "convex_hull": + points_convex_hull = ConvexHull(pixels[:, [1, 0]]) + pixels = (points_convex_hull.points[points_convex_hull.vertices]).astype(np.int32) cv2.fillConvexPoly(mask, pixels, 1,) - # DEBUG start here - # contour = draw_convex_contour(mask, pixels) - # cv2.imshow("contour", mask * 255) - # cv2.waitKey(0) - # DEBUG end here elif mask_mode == "raw_mask": - mask_height = points_pix[:, 0].max() - points_pix[:, 0].min() + 1 - mask_width = points_pix[:, 1].max() - points_pix[:, 1].min() + 1 - # pad size to odd - if mask_width % 2 == 0: - mask_width += 1 - if mask_height % 2 == 0: - mask_height += 1 - mask = np.zeros((mask_height, mask_width), dtype=np.uint8) - pixels = points_pix[:, :2].astype(np.int32) - lb_pix[:2] mask[pixels[:, 0], pixels[:, 1]] = 1 - # ##DEBUG: check mask - # # resize the height to 500 - # mask_vis = cv2.resize(mask, (mask.shape[1] * 500 // mask.shape[0], 500), interpolation=cv2.INTER_NEAREST) - # cv2.imshow("mask", mask_vis * 255) - # cv2.waitKey(0) height = points_pix[:, 2].max() - points_pix[:, 2].min() - # compute offset compared with pos_ref (reference position) - mask_center = np.array([mask.shape[0] // 2, mask.shape[1] // 2, 0]) + lb_pix - mask_center_world = self._pix2world(mask_center) - pos_offset = mask_center_world - pos_ref + mask_center = self._world2pix(pos_ref) # use center of pcd as mask center + pos_offset = np.zeros(3, np.float32) name = name if name is not None else f"obj_{obj_id}" # apply a safety padding to mask mask = cv2.copyMakeBorder(mask, self.pix_padding, self.pix_padding, self.pix_padding, self.pix_padding, cv2.BORDER_CONSTANT, value=0) mask = cv2.dilate(mask, np.ones((self.pix_padding, self.pix_padding), dtype=np.uint8), iterations=1) - # cv2.imshow("mask", mask * 255) - # cv2.waitKey(0) self.objects[obj_id] = ObjectData(name=name, pos=mask_center, mask=mask, - height=height, color=color, points=points, pos_offset=pos_offset) + height=height, color=color, points=points, pos_offset=pos_offset, x_axis=x_axis) + + def _get_object_xy_axis(self, points_pix: np.ndarray): + """Compute xy-axis from points projection in x-y plane""" + # compute the eigen vector + cov = np.cov(points_pix[:, :2].T) + eig_val, eig_vec = np.linalg.eig(cov) + # get the largest eigen vector + x_axis = eig_vec[:, np.argmax(eig_val)] + return x_axis def get_object_pose(self, obj_id: int) -> np.ndarray: """Get object position""" @@ -247,14 +228,19 @@ def get_object_poses(self) -> Dict[str, np.ndarray]: return obj_poses def set_object_pose( - self, obj_id: int, obj_pos: np.ndarray, enable_vis: bool = False + self, obj_id: int, obj_pose: np.ndarray, enable_vis: bool = False ) -> None: """Update object in scene""" assert obj_id in self.objects, "Object not found" - mask_center_world = obj_pos[:3] + self.objects[obj_id].pos_offset # position - # mask_center_world = obj_pos[:3] + mask_center_world = obj_pose[:3] + self.objects[obj_id].pos_offset # position if obj_id in self.objects: self.objects[obj_id].pos = self._world2pix(mask_center_world) + if obj_pose.shape[0] == 6: + self.objects[obj_id].rot = obj_pose[3:] + elif obj_pose.shape[0] == 3: + self.objects[obj_id].rot = np.zeros(3, dtype=np.float32) + else: + raise ValueError("obj_pose should be of shape (3,) or (6,)") self.moving_marker = obj_id if enable_vis: self.visualize() @@ -266,17 +252,29 @@ def set_object_poses( for obj_id, obj_pose in obj_states.items(): self.set_object_pose(obj_id, obj_pose, enable_vis) + def _rotate_mask(self, mask: np.ndarray, angle: float) -> np.ndarray: + """Rotate mask by angle w.r.t to center""" + angle_degree = angle * 180 / math.pi + rows, cols = mask.shape + center = (cols / 2, rows / 2) + rotation_matrix = cv2.getRotationMatrix2D(center, angle_degree, 1) + rotated_mask = cv2.warpAffine(mask, rotation_matrix, (cols, rows)) + return rotated_mask + def _put_mask( self, mask: np.ndarray, pos: np.ndarray, - occupancy_map: np.ndarray, + rot: np.ndarray, + region_map: np.ndarray, **kwargs, ) -> bool: """Put mask to the occupancy grid, pos is at left bottom corner of mask""" - height, width = occupancy_map.shape[:2] - mask_x = mask.shape[0] - mask_y = mask.shape[1] + # rotate the mask + mask_rotated = self._rotate_mask(mask, rot[2]) # rot[2] is the rotation along z-axis + height, width = region_map.shape[:2] + mask_x = mask_rotated.shape[0] + mask_y = mask_rotated.shape[1] mask_half_x = (mask_x - 1) // 2 mask_half_y = (mask_y - 1) // 2 @@ -289,13 +287,21 @@ def _put_mask( mask_max_y = min(mask_y, width - pos[1] + mask_half_y) if mask_max_x <= mask_min_x or mask_max_y <= mask_min_y: return False # no mask in region - mask_in_region = mask[mask_min_x:mask_max_x, mask_min_y:mask_max_y] - assert len(occupancy_map.shape) == 3, "Only support 3D occupancy map" - occupancy_map[ - pos[0] - mask_half_x + mask_min_x: pos[0] - mask_half_x + mask_max_x, - pos[1] - mask_half_y + mask_min_y: pos[1] - mask_half_y + mask_max_y, - :, - ][mask_in_region == 1] = value + mask_in_region = mask_rotated[mask_min_x:mask_max_x, mask_min_y:mask_max_y] + assert len(region_map.shape) == 3, "Only support 3D occupancy map" + mode = kwargs.get("mode", "replace") + if mode == "replace": + region_map[ + pos[0] - mask_half_x + mask_min_x: pos[0] - mask_half_x + mask_max_x, + pos[1] - mask_half_y + mask_min_y: pos[1] - mask_half_y + mask_max_y, + :, + ][mask_in_region == 1] = value + elif mode == "add": + region_map[ + pos[0] - mask_half_x + mask_min_x: pos[0] - mask_half_x + mask_max_x, + pos[1] - mask_half_y + mask_min_y: pos[1] - mask_half_y + mask_max_y, + :, + ][mask_in_region == 1] += value # # DEBUG # cv2.circle(occupancy_map, (pos[1], pos[0]), 1, (255, 0, 0), thickness=-1) # cv2.rectangle(occupancy_map, (pos[1] - mask_half_y, pos[0] - mask_half_x), @@ -313,15 +319,16 @@ def get_occupancy(self, obj_list: list[int] | None = None) -> bool: self._put_mask( mask=obj_data.mask, pos=obj_data.pos, - occupancy_map=occupancy_map, + rot=obj_data.rot, + region_map=occupancy_map, value=0.0, ) return occupancy_map - def get_free_space(self, obj_id: int, allow_outside: bool = True) -> np.ndarray: + def get_free_space(self, obj_id: int, angle: float = 0.0, allow_outside: bool = True, mode: str = "raw") -> np.ndarray: """Get the free space of the object using cv2.erosion""" obj = self.objects[obj_id] - mask = obj.mask + mask = self._rotate_mask(obj.mask, angle) obj_list = [id for id in self.objects if id != obj_id] occupancy_map = self.get_occupancy(obj_list) if not allow_outside: @@ -331,8 +338,13 @@ def get_free_space(self, obj_id: int, allow_outside: bool = True) -> np.ndarray: occupancy_map[:, 0, :] = 0 occupancy_map[:, -1, :] = 0 # get free space, free is 1, occupied is 0 - kernel_size = max(mask.shape[0], mask.shape[1]) - kernel = np.ones((kernel_size, kernel_size), np.uint8) + if mode == "bbox": + kernel_size = max(mask.shape[0], mask.shape[1]) + kernel = np.ones((kernel_size, kernel_size), np.uint8) + elif mode == "raw": + kernel = mask + else: + raise ValueError(f"Unknown mode {mode}") free_space = cv2.erode(occupancy_map, kernel, iterations=1) # ## DEBUG @@ -352,8 +364,26 @@ def get_free_space(self, obj_id: int, allow_outside: bool = True) -> np.ndarray: # cv2.waitKey(0) return free_space + def check_collision(self): + """Check collision""" + collision_map = np.zeros((self.grid_size[0], self.grid_size[1], 1), dtype=np.float32) + # objects + obj_list = list(self.objects.keys()) + for obj_id, obj_data in self.objects.items(): + if obj_id in obj_list: + self._put_mask( + mask=obj_data.mask, + pos=obj_data.pos, + rot=obj_data.rot, + region_map=collision_map, + value=1.0, + mode="add", + ) + return (collision_map > 1).any() + def sample( - self, obj_id: int, n_samples: int, prior: np.array | None = None, allow_outside: bool = True + self, obj_id: int, n_samples: int, prior: np.array | None = None, allow_outside: bool = True, + pattern_info: dict[str, any] = {}, **kwargs ) -> Tuple[List[np.ndarray], List[np.ndarray], SampleStatus, Dict[str, any]]: """General sampling method Args: @@ -366,7 +396,13 @@ def sample( - free_volume: free volume of the region - sample_probs: probability of each sample """ - free_space = self.get_free_space(obj_id, allow_outside).astype(np.float32) # free is 1, occupied is 0 + angle_desire = pattern_info.get("angle", 0.0) + # angle here is the angle disired angle for x-axis + raw_x_axis = self.objects[obj_id].x_axis + raw_x_axis_angle = math.atan2(raw_x_axis[1], raw_x_axis[0]) + angle_rot = angle_desire - raw_x_axis_angle + collision_mode = kwargs.get("collision_mode", "raw") + free_space = self.get_free_space(obj_id, angle_rot, allow_outside, mode=collision_mode).astype(np.float32) # free is 1, occupied is 0 if prior is not None: assert prior.shape[:2] == free_space.shape[:2], "prior shape must be the same as free shape" free_space = np.multiply(free_space, prior) @@ -376,10 +412,9 @@ def sample( samples_pix = np.concatenate([samples_pix, np.zeros((samples_pix.shape[0], 1))], axis=1) # (N, 3) samples_wd = self._pix2world(samples_pix) # (N, 3) samples_wd = samples_wd - self.objects[obj_id].pos_offset.reshape(1, 3) - # samples_wd = np.clip(samples_wd, a_max=self.pose_boundary[:, 1], a_min=self.pose_boundary[:, 0]) - # FIXME: currently we don't support sample in rotation, so we set it to identity - rots = np.tile(np.array([0.0, 0.0, 0.0, 1.0-(1e-6)], dtype=np.float32), (samples_pix.shape[0], 1)) + # rotation is along z-axis + rots = np.tile(np.array([0.0, 0.0, angle_rot], dtype=np.float32), (samples_pix.shape[0], 1)) samples_wd = np.hstack([samples_wd, rots]) # Assemble sample info sample_info = { @@ -415,10 +450,21 @@ def visualize(self, **kwargs): self._put_mask( mask=obj_data.mask, pos=obj_data.pos, - offset=None, - occupancy_map=img, + rot=obj_data.rot, + region_map=img, value=obj_color_np, ) + # show axis + x_axis = obj_data.x_axis + x_axis_rot = [x_axis[0] * math.cos(obj_data.rot[2]) - x_axis[1] * math.sin(obj_data.rot[2]), + x_axis[0] * math.sin(obj_data.rot[2]) + x_axis[1] * math.cos(obj_data.rot[2])] + x_axis_rot = [int(x_axis_rot[0] * 10), int(x_axis_rot[1] * 10)] + cv2.arrowedLine(img, (obj_data.pos[1], obj_data.pos[0]), + (obj_data.pos[1] + x_axis_rot[1], obj_data.pos[0] + x_axis_rot[0]), + (0, 0, 255), thickness=2) # red, x-axis + cv2.arrowedLine(img, (obj_data.pos[1], obj_data.pos[0]), + (obj_data.pos[1] + x_axis_rot[0], obj_data.pos[0] - x_axis_rot[1]), + (0, 255, 0), thickness=2) # green, y-axis # put the object id font_size = 0.002 / self.resolution * 0.8 cv2.putText(img, str(obj_id), (obj_data.pos[1], obj_data.pos[0]), cv2.FONT_HERSHEY_SIMPLEX, font_size, @@ -432,7 +478,7 @@ def visualize(self, **kwargs): cv2.rectangle(img, (obj_data.pos[1] - obj_data.mask.shape[1] // 2, obj_data.pos[0] - obj_data.mask.shape[0] // 2), (obj_data.pos[1] + obj_data.mask.shape[1] // 2, - obj_data.pos[0] + obj_data.mask.shape[0] // 2), (0, 255, 0), thickness=3) + obj_data.pos[0] + obj_data.mask.shape[0] // 2), (255, 0, 0), thickness=3) # concat with scene image if self.scene_pcd is not None: scene_image = self.project_pcd(self.scene_pcd) @@ -567,7 +613,7 @@ def load_from_pcds(self, pcd_list: list, name_ids: list, mask_mode: str, **kwarg obj_color = obj_color.mean(axis=0) * 255.0 # add object to region sampler self.add_object(obj_id=id, points=obj_pcd, pos_ref=pos_ref, name=name, color=obj_color, mask_mode=mask_mode) - self.set_object_pose(obj_id=id, obj_pos=obj_pcd_center) + self.set_object_pose(obj_id=id, obj_pose=obj_pcd_center) now_pose = self.get_object_pose(id) # analysing support tree @@ -581,7 +627,6 @@ def bulid_support_tree(self, iou_threshold: float = 0.5): self.obj_support_tree = Node(-1) for obj_id in obj_ids: self._append_obj_to_support_tree(obj_id, iou_threshold) - print(RenderTree(self.obj_support_tree)) def _append_obj_to_support_tree(self, obj_id, iou_threshold: float = 0.5): # traverse the tree in reverse order diff --git a/lgmcts/components/patterns.py b/lgmcts/components/patterns.py index 282c474..3dbac1b 100644 --- a/lgmcts/components/patterns.py +++ b/lgmcts/components/patterns.py @@ -20,7 +20,7 @@ "circle": { "radius": { "L": [0.4, 0.5], - "M": [0.2, 0.4], + "M": [0.1, 0.4], "S": [0.1, 0.2] } }, @@ -48,14 +48,6 @@ def gen_prior(cls, size, rng, **kwargs): """ raise NotImplementedError - @abstractclassmethod - def gen_ordered_prior(cls, size, rng, **kwargs): - """Generate a fixed pattern prior: - Args: - rng: random generator - """ - raise NotImplementedError - @abstractclassmethod def check(cls, obj_poses: dict[int, np.ndarray], **kwargs): """Check if the object states meet the pattern requirement @@ -106,16 +98,17 @@ def gen_prior(cls, img_size, rng, **kwargs): if rng.random() > 0.5: # horizontal line cv2.line(prior, (0, y0), (x0 + width, y0), 1.0, thickness) + angle = 0.0 else: # vertical line cv2.line(prior, (x0, 0), (x0, height), 1.0, thickness) - angle = 0.0 + angle = np.pi / 2.0 else: x0 = 0 y0 = 0 # no points are provided prior[:, :] = 1.0 - angle = 0.0 + angle = rng.integers(0, 1) * np.pi / 2.0 # randomly select a horizontal or vertical line elif len(rel_obj_ids) == 1: # given one pix x0 = rel_obj_poses_pix[0][1] @@ -144,6 +137,7 @@ def gen_prior(cls, img_size, rng, **kwargs): # Pattern info pattern_info = {} pattern_info["type"] = "pattern:line" + pattern_info["angle"] = angle pattern_info["min_length"] = scale_max pattern_info["max_length"] = scale_min pattern_info["length"] = scale @@ -151,39 +145,6 @@ def gen_prior(cls, img_size, rng, **kwargs): pattern_info["rotation"] = [0.0, 0.0, angle] return prior, pattern_info - @classmethod - def gen_ordered_prior(cls, img_size, rng, **kwargs): - obj_id = kwargs.get("obj_id", -1) - obj_ids = kwargs.get("obj_ids", []) - thickness = kwargs.get("thickness", 1) - assert len(obj_ids) == 0 or (len(obj_ids) >= cls._num_limit[0] and len(obj_ids) - <= cls._num_limit[1]), "Number of objects should be within the limit!" - - # extract relative obj & poses - obj_idx_in_list = obj_ids.index(obj_id) - assert obj_idx_in_list >= 0, "Object id not found!" - # some constants - scale = kwargs.get("scale", 0.1) - - position = kwargs.get("position", [0.0, 0.0]) - angle = kwargs.get("angle", 0.0) - - height, width = img_size[0], img_size[1] - prior = np.zeros([height, width], dtype=np.float32) - - x0 = int((position[0] + scale * math.sin(angle) * obj_idx_in_list) * width) - y0 = int((position[1] + scale * math.cos(angle) * obj_idx_in_list) * height) - cv2.circle(prior, (x0, y0), thickness, 1.0, -1) - pattern_info = { - "type": "pattern:line", - "min_length": scale, - "max_length": scale, - "length": scale, - "position": position.tolist() + [0.0], - "rotation": [0.0, 0.0, angle] - } - return prior, pattern_info - @classmethod def check(cls, obj_poses: dict[int, np.ndarray], **kwargs): """Check if obj poses meets a line pattern""" @@ -272,7 +233,6 @@ def gen_prior(cls, img_size, rng, **kwargs): obj_ids = kwargs.get("obj_ids", []) thickness = kwargs.get("thickness", 3) rel_size = kwargs.get("rel_size", "M") - segments = kwargs.get("segments", 6) assert len(obj_ids) == 0 or (len(obj_ids) >= cls._num_limit[0] and len(obj_ids) <= cls._num_limit[1]), "Number of objects should be within the limit!" @@ -288,67 +248,78 @@ def gen_prior(cls, img_size, rng, **kwargs): prior = np.zeros([height, width], dtype=np.float32) # some constants - # clearance = int(0.1 * min(height, width)) - clearance = 0 scale_max = PATTERN_CONSTANTS["circle"]["radius"][rel_size][0] scale_min = PATTERN_CONSTANTS["circle"]["radius"][rel_size][1] scale = rng.random() * (scale_max - scale_min) + scale_min radius = int(scale * (min(height, width))) - segments = len(obj_ids) + segments = len(obj_ids) if len(obj_ids) % 2 == 0 else len(obj_ids) + 1 block_vis = False if len(rel_obj_ids) == 0: if len(obj_ids) == 0: + # FIXME: Currently, this doesn't support generate proper angle # pure pattern center_x = rng.integers(radius, width - radius) center_y = rng.integers(radius, height - radius) - # cv2.circle(prior, (center_x, center_y), radius, 1.0, thickness) cls.draw_seg_circle(prior, (center_x, center_y), radius, 1.0, thickness, segments) + angle = 0.0 else: # no points are provided prior[radius:height - radius, radius:width - radius] = 1.0 + angle = np.pi / 2.0 block_vis = True elif len(rel_obj_ids) == 1: # given an pix, the next point is on the other side of circle # HACK: make sure the circle is within the region + angle = -np.pi / 2.0 x0, y0 = rel_obj_poses_pix[0][1], rel_obj_poses_pix[0][0] if cls.check_circle_in_region((x0 - radius, y0), radius, height, width): - cv2.circle(prior, (x0 - radius, y0), 1, 1.0, thickness) + cv2.circle(prior, (x0 - 2 * radius, y0), 1, 1.0, thickness) if cls.check_circle_in_region((x0 + radius, y0), radius, height, width): cv2.circle(prior, (x0 + 2 * radius, y0), 1, 1.0, thickness) - if cls.check_circle_in_region((x0, y0 - radius), radius, height, width): - cv2.circle(prior, (x0, y0 - 2 * radius), 1, 1.0, thickness) - if cls.check_circle_in_region((x0, y0 + radius), radius, height, width): - cv2.circle(prior, (x0, y0 + 2 * radius), 1, 1.0, thickness) - elif len(rel_obj_ids) == 2: - # given two pix, locate the third point - # HACK: assume the two points are on the same height; making sure the circle is within the region - x0, y0 = rel_obj_poses_pix[0][1], rel_obj_poses_pix[0][0] - x1, y1 = rel_obj_poses_pix[1][1], rel_obj_poses_pix[1][0] - center = [int((x0 + x1) / 2), int((y0 + y1) / 2)] - radius = int(np.linalg.norm(np.array([x0, y0]) - np.array([x1, y1])) / 2) - cls.draw_seg_circle(prior, center, radius, 1.0, thickness, segments) + center_xs = [x0 - radius, x0 + radius] + center_ys = [y0, y0] else: # if more than one object is sampled, we generate a circle based on the objects rel_obj_poses_pix = [pix[:2] for pix in rel_obj_poses_pix] points = np.array(rel_obj_poses_pix) points = points[:, [1, 0]] # swap x, y - # Find the minimum enclosing circle of first 3 points - (center_x, center_y), radius = cls.cercle_circonscrit(points[:3, :]) - center_x = int(center_x) - center_y = int(center_y) - radius = int(radius) - cls.draw_seg_circle(prior, (center_x, center_y), radius, 1.0, thickness, segments) + if len(rel_obj_ids) == 2: + # Find the minimum enclosing circle of first 3 points + center_x = (points[0, 0] + points[1, 0]) / 2.0 + center_y = (points[0, 1] + points[1, 1]) / 2.0 + radius = np.linalg.norm(points[0, :] - points[1, :]) / 2.0 + else: + # Find the minimum enclosing circle of first 3 points + center, radius = cls.cercle_circonscrit(points[:3, :]) + center_x, center_y = center[0], center[1] + # provides two candidates + angle_circle = (2.0 * np.pi / segments) * (len(rel_obj_ids) // 2) + cv2.circle(prior, (int(center_x + radius * math.cos(angle_circle)), + int(center_y + radius * math.sin(angle_circle))), thickness, 1.0, -1) + cv2.circle(prior, (int(center_x - radius * math.cos(angle_circle)), + int(center_y - radius * math.sin(angle_circle))), thickness, 1.0, -1) + angle = np.pi / 2.0 - angle_circle + center_xs = [center_x] + center_ys = [center_y] if not block_vis: - cv2.imshow("cricle", prior) + vis_prior = prior.copy() + for (center_x, center_y) in zip(center_xs, center_ys): + cv2.circle(vis_prior, (int(center_x), int(center_y)), int(radius), 1.0, 1) + # add existing + for id, rel_pix in zip(rel_obj_ids, rel_obj_poses_pix): + cv2.circle(vis_prior, (rel_pix[1], rel_pix[0]), 5, 1.0, 1) + cv2.putText(vis_prior, f"{id}", (rel_pix[1] - 30, rel_pix[0]), cv2.FONT_HERSHEY_SIMPLEX, 1, 0.8, 1) + # put text + cv2.putText(vis_prior, f"angle: {angle}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, 0.8, 1) + cv2.imshow("cricle", vis_prior) cv2.waitKey(1) # pass # Pattern info pattern_info = {} pattern_info["type"] = "pattern:circle" - # pattern_info["center_pixel"] = [center_x, center_y] - # pattern_info["radius"] = radius pattern_info["obj_ids"] = obj_ids + pattern_info["angle"] = angle return prior, pattern_info @classmethod @@ -866,7 +837,7 @@ def gen_prior(cls, img_size, rng, **kwargs): close_range = int(close_range * min(height, width)) cv2.circle(prior_close, (int(anchor[0]), int(anchor[1])), close_range, 1.0, -1) prior = prior * prior_close - + # cv2.imshow("prior", prior) # cv2.waitKey(0) diff --git a/lgmcts/scripts/data_generation/gen_strdiff.py b/lgmcts/scripts/data_generation/gen_strdiff.py index d2da87f..603f780 100644 --- a/lgmcts/scripts/data_generation/gen_strdiff.py +++ b/lgmcts/scripts/data_generation/gen_strdiff.py @@ -74,7 +74,7 @@ def _generate_data_for_one_task( obj_selector.reset() # generate goal - prompt_str, obs = task.gen_goal_config_ordered(env, prompt_generator, obj_selector, enable_distract=False, force_anchor_exclude=True) + prompt_str, obs = task.gen_goal_config(env, prompt_generator, obj_selector, enable_distract=False, force_anchor_exclude=True) obs_cache.append(obs) # generate start diff --git a/lgmcts/scripts/eval/eval_lgmcts_real.py b/lgmcts/scripts/eval/eval_lgmcts_real.py index 1e6d452..4c57ba3 100644 --- a/lgmcts/scripts/eval/eval_lgmcts_real.py +++ b/lgmcts/scripts/eval/eval_lgmcts_real.py @@ -17,10 +17,10 @@ def eval_real(data_path: str, prompt_path: str, method: str, mask_mode: str, n_s # Step 1. load the scene camera_pose = np.array([ [-9.99019040e-01, 4.42819236e-02, 2.62008166e-04, 2.40630148e-02], - [ 4.42787021e-02, 9.98990882e-01, -7.52417562e-03, -4.88996877e-01], + [4.42787021e-02, 9.98990882e-01, -7.52417562e-03, -4.88996877e-01], [-5.94928738e-04, -7.50519333e-03, -9.99971659e-01, 5.96053361e-01], - [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00] - ]) + [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00] + ]) intrinsics_matrix = np.array([[635.41156006, 0., 644.21557617], [0., 634.80944824, 368.45831299], [0., 0., 1.]]) @@ -45,12 +45,13 @@ def eval_real(data_path: str, prompt_path: str, method: str, mask_mode: str, n_s texture_mapping[mask_info["value"]] = "unknown" pcd_list = utils.get_pointcloud_list(color, depth, mask, name_ids, intrinsics_matrix, np.eye(4, dtype=np.float32)) + # init region_sampler resolution = 0.002 pix_padding = 1 # padding for clearance bounds = np.array([[-0.4, 0.4], [-0.5, 0.5], [0.0, 0.5]]) # (height, width, depth) region_sampler = Region2DSamplerLGMCTS(resolution, pix_padding, bounds) - region_sampler.load_from_pcds(pcd_list, name_ids, mask_mode="raw_mask") + region_sampler.load_from_pcds(pcd_list, name_ids, mask_mode="convex_hull") region_sampler.visualize() init_objects_poses = region_sampler.get_object_poses() obj_id_reverse_mapping = {} @@ -67,8 +68,8 @@ def eval_real(data_path: str, prompt_path: str, method: str, mask_mode: str, n_s # goals = prompt_goals[0] goals = [ - {"type": "pattern:rectangle", "obj_ids": [3, 4, 5, 6]}, - {"type": "pattern:line", "obj_ids": [4, 1, 2]}, + # {"type": "pattern:circle", "obj_ids": [3, 4, 1, 2, 5]}, + {"type": "pattern:line", "obj_ids": [4, 1, 2, 5]}, ] sampled_ids = [] L = [] @@ -113,10 +114,12 @@ def eval_real(data_path: str, prompt_path: str, method: str, mask_mode: str, n_s "pose1_rotation": step["new_pose"][3:].tolist(), } export_action_list.append(action) + print(f"Collision status: {region_sampler.check_collision()}.") # export to json with open(os.path.join(data_path, "action_list.json"), "w") as f: json.dump(export_action_list, f) + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--dataset_path", type=str, default=None, help="Path to the dataset") @@ -129,6 +132,6 @@ def eval_real(data_path: str, prompt_path: str, method: str, mask_mode: str, n_s args = parser.parse_args() root_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..") - real_data_path = os.path.join(root_path, "test_data", "real_000000") + real_data_path = os.path.join(root_path, "test_data", "real_000001", "output") prompt_path = f"{root_path}/output/struct_rearrange" eval_real(real_data_path, prompt_path, args.method, args.mask_mode, args.n_samples, args.debug) diff --git a/lgmcts/tasks/struct_rearrange.py b/lgmcts/tasks/struct_rearrange.py index 5a29d84..22c7fc2 100644 --- a/lgmcts/tasks/struct_rearrange.py +++ b/lgmcts/tasks/struct_rearrange.py @@ -317,85 +317,6 @@ def gen_goal_config(self, env, promptor: PromptGenerator, obj_selector: ObjectSe obs, _, _, _, _ = env.step() return self.prompt, obs - def gen_goal_config_ordered(self, env, promptor: PromptGenerator, obj_selector: ObjectSelector, **kwargs): - """Generate goal config with fix prior""" - num_color = kwargs.get("num_color", 2) # Each scene only has X colors - force_anchor_exclude = kwargs.get("force_anchor_exclude", False) - num_added_objs = 0 - obj_list = self.rng.choice(self.obj_list, min(self.max_num_obj, len(self.obj_list)), replace=False) # current candidate - color_list = self.rng.choice(self.color_list, num_color, replace=False) - # Step 1: select object candidates - for i in range(max(self.max_num_pattern - 1, 1)): - if obj_list is None or len(obj_list) <= 2: - break # no more enough candidate to formulate pattern - selected_objs = obj_list - selected_colors = self.rng.choice(color_list, len(obj_list), replace=True) - obj_selector.reset() - obj_selector.set_objs(selected_objs, selected_colors) - selection = obj_selector.gen_anchor_obj_prompt(force_anchor_exclude=force_anchor_exclude) - if not selection: # no valid selection - continue - # Step 2: select pattern & add objects to scene - if selection["anchor_obj"] is not None: - [anchor_id], _ = self.add_objects_to_pattern( - env, - objs=[selection["anchor_obj"]], - colors=[selection["anchor_color"]], - pattern_prior=None, - use_existing=False, - stack_prob=0.0) # add anchor object - else: - anchor_id = -1 - # generate pattern - pattern_type = env.rng.choice(self.pattern_types) - max_try = 3 - rearrange_obj_ids = [] - pattern_info = {} - - for i in range(max_try): - # generate random position & rotation - scale = 0.2 - position = self.rng.uniform(scale, 1.0-scale, size=(2,)) - angle = np.pi / 3.0 * self.rng.random() - np.pi / 6.0 # [-pi/6, pi/6] - in_objs = selection["in_obj"] - for in_id, in_obj in enumerate(in_objs): - pattern_prior, pattern_info = PATTERN_DICT[pattern_type].gen_ordered_prior( - env.ws_map_size, env.rng, obj_id=in_id, obj_ids=list(range(len(in_objs))), position=position, angle=angle, scale=scale) - added_obj_ids, obj_status = self.add_objects_to_pattern( - env, - objs=[selection["in_obj"][in_id]], - colors=[selection["in_color"][in_id]], - pattern_prior=pattern_prior, - use_existing=False, - stack_prob=0.0) - if len(added_obj_ids) == 0: - break - rearrange_obj_ids += added_obj_ids - if len(rearrange_obj_ids) == 0: - continue - else: - break - - if anchor_id == -1: - anchor_id = rearrange_obj_ids[0] - # update goals - pattern_info["obj_ids"] = rearrange_obj_ids - pattern_info["anchor_id"] = anchor_id - self.goals.append(pattern_info) - # update prompt - promptor.gen_pattern_prompt(selection["prompt_str"], pattern_type) - # update obj - num_added_objs += len(rearrange_obj_ids) - obj_list = selection["out_obj"] - color_list = selection["out_color"] - - # gen prompt - promptor.gen_prompt() - self.prompt = promptor.prompt - # Env step forward - obs, _, _, _, _ = env.step() - return self.prompt, obs - def gen_start_config(self, env) -> dict: """Generate a random config using existing objects""" self.add_objects_to_random(env, self.max_num_obj, use_existing=True, stack_prob=self.stack_prob)