update: fix for real robot experiment

changhaonan · Sep 12, 2023 · cfd6093 · cfd6093
1 parent 5abe90d
commit cfd6093
Show file tree

Hide file tree

Showing 7 changed files with 107 additions and 55 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/.gitignore b/.gitignore
@@ -9,6 +9,7 @@ output
 # test data
 examples/
 build/
+test_data/
 
 # itermidate data
 *.mtl

diff --git a/lgmcts/algorithm/planner.py b/lgmcts/algorithm/planner.py
@@ -81,6 +81,7 @@ def plan_mcts(self, goals: list[SampleData], **kwargs):
         """
         seed = kwargs.get("seed", 0)  # update seed
         prior_dict = kwargs.get("prior_dict", {})
+        max_iter = kwargs.get("max_iter", 10000)
         sampled_obj_poses_pix = {}  # keep track of sampled object poses
         action_list = []
         cur_obj_poses = self.sampler.get_object_poses()
@@ -95,5 +96,5 @@ def plan_mcts(self, goals: list[SampleData], **kwargs):
             seed=seed,
         )
 
-        sampler_planner.search()
+        sampler_planner.search(max_iter=max_iter)
         return sampler_planner.action_list
diff --git a/lgmcts/algorithm/region_sampler.py b/lgmcts/algorithm/region_sampler.py
@@ -422,7 +422,7 @@ def visualize(self, **kwargs):
                 # put the object id
                 font_size = 0.002 / self.resolution * 0.8
                 cv2.putText(img, str(obj_id), (obj_data.pos[1], obj_data.pos[0]), cv2.FONT_HERSHEY_SIMPLEX, font_size,
-                            (255, 255, 255), 1, cv2.LINE_AA)
+                            (255, 255, 255), 2, cv2.LINE_AA)
             except:
                 continue
         # circle out the moving object

diff --git a/lgmcts/components/patterns.py b/lgmcts/components/patterns.py
@@ -20,7 +20,7 @@
     "circle": {
         "radius": {
             "L": [0.4, 0.5],
-            "M": [0.3, 0.4],
+            "M": [0.2, 0.4],
             "S": [0.1, 0.2]
         }
     },
@@ -101,14 +101,14 @@ def gen_prior(cls, img_size, rng, **kwargs):
         if len(rel_obj_ids) == 0:
             if len(obj_ids) == 0:
                 # pure pattern
-                x0 = rng.integers(int(scale * width), int((1.0 - scale) * width))
-                y0 = rng.integers(int(scale * height), int((1.0 - scale) * height))
+                x0 = rng.integers(0, width)
+                y0 = rng.integers(0, height)
                 if rng.random() > 0.5:
                     # horizontal line
-                    cv2.line(prior, (x0 - int(scale * width), y0), (x0 + int(scale * width), y0), 1.0, thickness)
+                    cv2.line(prior, (0, y0), (x0 + width, y0), 1.0, thickness)
                 else:
                     # vertical line
-                    cv2.line(prior, (x0, y0 - int(scale * height)), (x0, y0 + int(scale * height)), 1.0, thickness)
+                    cv2.line(prior, (x0, 0), (x0, height), 1.0, thickness)
                 angle = 0.0
             else:
                 x0 = 0
@@ -123,11 +123,11 @@ def gen_prior(cls, img_size, rng, **kwargs):
             # random pixel
             if rng.random() > 0.5:
                 # horizontal line
-                cv2.line(prior, (x0 - int(scale * width), y0), (x0 + int(scale * width), y0), 1.0, thickness)
+                cv2.line(prior, (0, y0), (width, y0), 1.0, thickness)
                 angle = 0.0
             else:
                 # vertical line
-                cv2.line(prior, (x0, y0 - int(scale * height)), (x0, y0 + int(scale * height)), 1.0, thickness)
+                cv2.line(prior, (x0, 0), (x0, height), 1.0, thickness)
                 angle = np.pi / 2.0
         else:
             # if more than one object is sampled, we generate a line based on the objects
@@ -262,15 +262,15 @@ def dist_p2l(cls, p, o, k):
 class CirclePattern(Pattern):
     """Circle pattern, obj poses should formulate a circle"""
     name = "circle"
-    _num_limit = [3, 6]  # at least 3 points
+    _num_limit = [3, 100]  # at least 3 points
 
     @classmethod
     def gen_prior(cls, img_size, rng, **kwargs):
         """Generate circle prior"""
         obj_poses_pix = kwargs.get("obj_poses_pix", {})
         obj_id = kwargs.get("obj_id", -1)
         obj_ids = kwargs.get("obj_ids", [])
-        thickness = kwargs.get("thickness", 1)
+        thickness = kwargs.get("thickness", 3)
         rel_size = kwargs.get("rel_size", "M")
         segments = kwargs.get("segments", 6)
         assert len(obj_ids) == 0 or (len(obj_ids) >= cls._num_limit[0] and len(obj_ids)
@@ -288,32 +288,41 @@ def gen_prior(cls, img_size, rng, **kwargs):
         prior = np.zeros([height, width], dtype=np.float32)
 
         # some constants
-        clearance = int(0.1 * min(height, width))
+        # clearance = int(0.1 * min(height, width))
+        clearance = 0
         scale_max = PATTERN_CONSTANTS["circle"]["radius"][rel_size][0]
         scale_min = PATTERN_CONSTANTS["circle"]["radius"][rel_size][1]
         scale = rng.random() * (scale_max - scale_min) + scale_min
-        radius = int(scale * (min(height, width) - clearance))
+        radius = int(scale * (min(height, width)))
+        segments = len(obj_ids)
 
+        block_vis = False
         if len(rel_obj_ids) == 0:
             if len(obj_ids) == 0:
                 # pure pattern
-                center_x = rng.integers(radius + clearance, width - radius - clearance)
-                center_y = rng.integers(radius + clearance, height - radius - clearance)
+                center_x = rng.integers(radius, width - radius)
+                center_y = rng.integers(radius, height - radius)
                 # cv2.circle(prior, (center_x, center_y), radius, 1.0, thickness)
                 cls.draw_seg_circle(prior, (center_x, center_y), radius, 1.0, thickness, segments)
             else:
                 # no points are provided
-                prior[radius + clearance:height - radius - clearance, radius + clearance:width - radius - clearance] = 1.0
+                prior[radius:height - radius, radius:width - radius] = 1.0
+                block_vis = True
         elif len(rel_obj_ids) == 1:
             # given an pix, the next point is on the other side of circle
+            # HACK: make sure the circle is within the region
             x0, y0 = rel_obj_poses_pix[0][1], rel_obj_poses_pix[0][0]
-            cv2.circle(prior, (x0 - 2 * radius, y0), 1, 1.0, thickness)
-            cv2.circle(prior, (x0 + 2 * radius, y0), 1, 1.0, thickness)
-            cv2.circle(prior, (x0, y0 - 2 * radius), 1, 1.0, thickness)
-            cv2.circle(prior, (x0, y0 + 2 * radius), 1, 1.0, thickness)
+            if cls.check_circle_in_region((x0 - radius, y0), radius, height, width):
+                cv2.circle(prior, (x0 - radius, y0), 1, 1.0, thickness)
+            if cls.check_circle_in_region((x0 + radius, y0), radius, height, width):
+                cv2.circle(prior, (x0 + 2 * radius, y0), 1, 1.0, thickness)
+            if cls.check_circle_in_region((x0, y0 - radius), radius, height, width):
+                cv2.circle(prior, (x0, y0 - 2 * radius), 1, 1.0, thickness)
+            if cls.check_circle_in_region((x0, y0 + radius), radius, height, width):
+                cv2.circle(prior, (x0, y0 + 2 * radius), 1, 1.0, thickness)
         elif len(rel_obj_ids) == 2:
             # given two pix, locate the third point
-            # HACK: assume the two points are on the same height
+            # HACK: assume the two points are on the same height; making sure the circle is within the region
             x0, y0 = rel_obj_poses_pix[0][1], rel_obj_poses_pix[0][0]
             x1, y1 = rel_obj_poses_pix[1][1], rel_obj_poses_pix[1][0]
             center = [int((x0 + x1) / 2), int((y0 + y1) / 2)]
@@ -330,9 +339,10 @@ def gen_prior(cls, img_size, rng, **kwargs):
             center_y = int(center_y)
             radius = int(radius)
             cls.draw_seg_circle(prior, (center_x, center_y), radius, 1.0, thickness, segments)
-        cv2.imshow("cricle", prior)
-        cv2.waitKey(1)
-
+        if not block_vis:
+            cv2.imshow("cricle", prior)
+            cv2.waitKey(1)
+            # pass
         # Pattern info
         pattern_info = {}
         pattern_info["type"] = "pattern:circle"
@@ -383,6 +393,15 @@ def objective(params, *args):
         distances = circle_equation(result.x, point_list)
         return distances
 
+    @classmethod
+    def check_circle_in_region(cls, center, radius, height, width):
+        """Check if the circle is within the region"""
+        if center[0] - radius < 0 or center[0] + radius > width:
+            return False
+        if center[1] - radius < 0 or center[1] + radius > height:
+            return False
+        return True
+
     @classmethod
     def cercle_circonscrit(cls, T):
         (x1, y1), (x2, y2), (x3, y3) = T
@@ -840,6 +859,17 @@ def gen_prior(cls, img_size, rng, **kwargs):
         else:
             raise NotImplementedError("Spatial label {} not implemented!".format(spatial_label))
 
+        force_close = kwargs.get("force_close", True)
+        if force_close:
+            prior_close = np.zeros([height, width], dtype=np.float32)
+            close_range = kwargs.get("close_range", 0.2)
+            close_range = int(close_range * min(height, width))
+            cv2.circle(prior_close, (int(anchor[0]), int(anchor[1])), close_range, 1.0, -1)
+            prior = prior * prior_close
+
+        # cv2.imshow("prior", prior)
+        # cv2.waitKey(0)
+
         # Pattern info
         pattern_info = {}
         pattern_info["type"] = "pattern:spatial"

diff --git a/lgmcts/scripts/eval/eval_lgmcts_real.py b/lgmcts/scripts/eval/eval_lgmcts_real.py
@@ -15,11 +15,12 @@
 
 def eval_real(data_path: str, prompt_path: str, method: str, mask_mode: str, n_samples: int = 10, debug: bool = True):
     # Step 1. load the scene
-    camera_pose = np.array([[-9.98961852e-01,  4.55540366e-02, -2.20703533e-04,  2.41992141e-02],
-                            [4.55544520e-02, 9.98936424e-01, -7.12825762e-03, -4.90078981e-01],
-                            [-1.04252110e-04, -7.13091146e-03, -9.99974569e-01, 5.98174172e-01],
-                            [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00],]
-                           )
+    camera_pose = np.array([
+        [-9.99019040e-01,  4.42819236e-02,  2.62008166e-04,  2.40630148e-02],
+        [ 4.42787021e-02,  9.98990882e-01, -7.52417562e-03, -4.88996877e-01],
+        [-5.94928738e-04, -7.50519333e-03, -9.99971659e-01,  5.96053361e-01],
+        [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  1.00000000e+00]
+        ])
     intrinsics_matrix = np.array([[635.41156006,   0., 644.21557617],
                                   [0.,  634.80944824, 368.45831299],
                                   [0.,    0.,   1.]])
@@ -32,44 +33,43 @@ def eval_real(data_path: str, prompt_path: str, method: str, mask_mode: str, n_s
     color = cv2.imread(os.path.join(data_path, "color_image.png"))
     color = cv2.cvtColor(color, cv2.COLOR_BGR2RGB)
     # load pointcloud
-    name_ids = [(mask_info["label"], mask_info["value"])
-                for mask_info in label["mask"] if mask_info["label"] != "background"]
+    name_ids = []
+    texture_mapping = {}
+    for mask_info in label["mask"]:
+        if mask_info["label"] == "background":
+            continue
+        name_ids.append((mask_info["label"].split(" ")[0], mask_info["value"]))
+        if "color" in mask_info:
+            texture_mapping[mask_info["value"]] = mask_info["color"]
+        else:
+            texture_mapping[mask_info["value"]] = "unknown"
     pcd_list = utils.get_pointcloud_list(color, depth, mask, name_ids,
                                          intrinsics_matrix, np.eye(4, dtype=np.float32))
     # init region_sampler
     resolution = 0.002
     pix_padding = 1  # padding for clearance
-    bounds = np.array([[-0.5, 0.5], [-0.5, 0.5], [0.0, 0.5]])  # (height, width, depth)
+    bounds = np.array([[-0.4, 0.4], [-0.5, 0.5], [0.0, 0.5]])  # (height, width, depth)
     region_sampler = Region2DSamplerLGMCTS(resolution, pix_padding, bounds)
     region_sampler.load_from_pcds(pcd_list, name_ids, mask_mode="raw_mask")
     region_sampler.visualize()
     init_objects_poses = region_sampler.get_object_poses()
-    # create an object id reverse mapping
-    texture_mapping = {
-        "toothpaste": "ruby blue",
-        "smartphone2": "pearl white",
-        "cube": "yellow",
-        "ketchup bottle": "red",
-        "bottle": "yellow",
-        "ranch bottle": "white green blend",
-        "dessert box2": "chocolate",
-        "smartphone1": "graphite black",
-        "dessert box1": "strawberry splash",
-        "box":  "yellow"
-    }
     obj_id_reverse_mapping = {}
     for name_id in name_ids:
-        obj_id_reverse_mapping[name_id[1]] = {"obj_name": name_id[0], "texture_name": texture_mapping[name_id[0]]}
+        obj_id_reverse_mapping[name_id[1]] = {"obj_name": name_id[0], "texture_name": texture_mapping[name_id[1]]}
     # Step 2. parse the goal using LLM
     # FIXME: manually set the goal for now
     use_llm = True
-    run_llm = False
-    encode_ids_to_llm = True
-    # Generate goals using llm and object selector
-    prompt_goals = gen_prompt_goal_from_llm(prompt_path, use_llm=use_llm,
-                                            run_llm=run_llm, encode_ids_to_llm=encode_ids_to_llm, obj_id_reverse_mappings=[obj_id_reverse_mapping], debug=debug)
+    run_llm = True
+    # encode_ids_to_llm = True
+    # # Generate goals using llm and object selector
+    # prompt_goals = gen_prompt_goal_from_llm(prompt_path, use_llm=use_llm,
+    #                                         run_llm=run_llm, encode_ids_to_llm=encode_ids_to_llm, obj_id_reverse_mappings=[obj_id_reverse_mapping], debug=debug)
 
-    goals = prompt_goals[0]
+    # goals = prompt_goals[0]
+    goals = [
+        {"type": "pattern:rectangle", "obj_ids": [3, 4, 5, 6]},
+        {"type": "pattern:line", "obj_ids": [4, 1, 2]},
+    ]
     sampled_ids = []
     L = []
     for goal in goals:
@@ -92,13 +92,30 @@ def eval_real(data_path: str, prompt_path: str, method: str, mask_mode: str, n_s
 
     # Step 3. generate & exectue plan
     sampling_planner = SamplingPlanner(region_sampler, n_samples=n_samples)
-    action_list = sampling_planner.plan(L, algo=method, prior_dict=PATTERN_DICT, debug=debug)
+    action_list = sampling_planner.plan(L, algo=method, prior_dict=PATTERN_DICT, debug=debug, max_iter=20000, seed=1)
+    print("Plan finished!")
     region_sampler.set_object_poses(init_objects_poses)
     region_sampler.visualize()
+    export_action_list = []
     for step in action_list:
         region_sampler.set_object_pose(step["obj_id"], step["new_pose"])
         region_sampler.visualize()
-
+        #
+        pose0_position = camera_pose[:3, :3] @ step["old_pose"][:3] + camera_pose[:3, 3]
+        pose0_position[2] = 0.0
+        pose1_position = camera_pose[:3, :3] @ step["new_pose"][:3] + camera_pose[:3, 3]
+        pose1_position[2] = 0.05
+        action = {
+            "obj_id": int(step["obj_id"]),
+            "pose0_position": pose0_position.tolist(),
+            "pose0_rotation": step["old_pose"][3:].tolist(),
+            "pose1_position": pose1_position.tolist(),
+            "pose1_rotation": step["new_pose"][3:].tolist(),
+        }
+        export_action_list.append(action)
+    # export to json
+    with open(os.path.join(data_path, "action_list.json"), "w") as f:
+        json.dump(export_action_list, f)
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
@@ -112,6 +129,6 @@ def eval_real(data_path: str, prompt_path: str, method: str, mask_mode: str, n_s
     args = parser.parse_args()
 
     root_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..")
-    real_data_path = os.path.join(root_path, "test_data", "real_000000")
+    real_data_path = os.path.join(root_path, "test_data", "real_000005", "output")
     prompt_path = f"{root_path}/output/struct_rearrange"
     eval_real(real_data_path, prompt_path, args.method, args.mask_mode, args.n_samples, args.debug)
diff --git a/lgmcts/utils/misc_utils.py b/lgmcts/utils/misc_utils.py
@@ -1,5 +1,6 @@
 """Miscellaneous utilities."""
 import cv2
+import warnings
 import kornia
 import matplotlib
 import matplotlib.pyplot as plt
@@ -87,6 +88,8 @@ def get_pointcloud_list(color, depth, mask, mask_name_ids, intrinisc, extrinsic,
     for (mask_name, mask_id) in mask_name_ids:
         obj_mask = (mask == mask_id)[:, :, 0] & valid_mask
         obj_points = scene_points[obj_mask].reshape(-1, 3)
+        if obj_points.shape[0] == 0:
+            warnings.warn(f"Object {mask_name} has no points")
         # transform
         obj_points = (extrinsic[:3, :3] @ obj_points.T).T + (extrinsic[:3, 3])[None, :]
         obj_colors = color[obj_mask].reshape(-1, 3) / 255.0