Merge pull request #36 from matsuolab/feature/35_yolov8

yolov3 -> yolov8 にアップデート
matsuolab · Aug 4, 2023 · 5dd29c3 · 5dd29c3
2 parents 2654880 + 3f32618
commit 5dd29c3
Show file tree

Hide file tree

Showing 7 changed files with 74 additions and 960 deletions.
diff --git a/catkin_ws/src/three-dimensions_tutorial/scripts/detection_distance.py b/catkin_ws/src/three-dimensions_tutorial/scripts/detection_distance.py
@@ -1,14 +1,16 @@
 #!/usr/bin/env python3
 
-import rospy
+import copy
+from typing import List
+
+import cv2
 import message_filters
-from sensor_msgs.msg import Image
+import rospy
 from cv_bridge import CvBridge
-from pytorchyolo import detect, models
-import matplotlib.pyplot as plt
-import matplotlib.patches as patches
-import cv2
-import copy
+from sensor_msgs.msg import Image
+from ultralytics import YOLO
+from ultralytics.engine.results import Results
+
 
 class DetectionDistance:
     def __init__(self):
@@ -25,6 +27,8 @@ def __init__(self):
         self.bridge = CvBridge()
         self.rgb_image, self.depth_image = None, None
 
+        self.model = YOLO('yolov8n.pt')
+
     def callback_rgbd(self, data1, data2):
         cv_array = self.bridge.imgmsg_to_cv2(data1, 'bgr8')
         cv_array = cv2.cvtColor(cv_array, cv2.COLOR_BGR2RGB)
@@ -34,33 +38,28 @@ def callback_rgbd(self, data1, data2):
         self.depth_image = cv_array
 
     def process(self):
-        path = "/root/roomba_hack/catkin_ws/src/three-dimensions_tutorial/yolov3/"
-
-        # load category
-        with open(path+"data/coco.names") as f:
-            category = f.read().splitlines()
-
-        # prepare model
-        model = models.load_model(path+"config/yolov3.cfg", path+"weights/yolov3.weights")
-
         while not rospy.is_shutdown():
             if self.rgb_image is None:
                 continue
 
             # inference
             tmp_image = copy.copy(self.rgb_image)
-            boxes = detect.detect_image(model, tmp_image)
-            # [[x1, y1, x2, y2, confidence, class]]
+
+            results: List[Results] = self.model.predict(self.rgb_image, verbose=False)
 
             # plot bouding box
-            for box in boxes:
-                x1, y1, x2, y2 = map(int, box[:4])
-                cls_pred = int(box[5])
+            for result in results:
+                boxes = result.boxes.cpu().numpy()
+                names = result.names
+                if len(boxes.xyxy) == 0:
+                    continue
+                x1, y1, x2, y2 = map(int, boxes.xyxy[0][:4])
+                cls_pred = boxes.cls[0]
                 tmp_image = cv2.rectangle(tmp_image, (x1, y1), (x2, y2), (0, 255, 0), 3)
-                tmp_image = cv2.putText(tmp_image, category[cls_pred], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), 2)
+                tmp_image = cv2.putText(tmp_image, names[cls_pred], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), 2)
                 cx, cy = (x1+x2)//2, (y1+y2)//2
-                print(category[cls_pred], self.depth_image[cy][cx]/1000, "m")
-            
+                print(names[cls_pred], self.depth_image[cy][cx]/1000, "m")
+
             # publish image
             tmp_image = cv2.cvtColor(tmp_image, cv2.COLOR_RGB2BGR)
             detection_result = self.bridge.cv2_to_imgmsg(tmp_image, "bgr8")

diff --git a/catkin_ws/src/three-dimensions_tutorial/scripts/detection_mask.py b/catkin_ws/src/three-dimensions_tutorial/scripts/detection_mask.py
@@ -1,15 +1,16 @@
 #!/usr/bin/env python3
+import copy
+from typing import List
 
-import rospy
+import cv2
 import message_filters
-from sensor_msgs.msg import Image, CameraInfo
-from cv_bridge import CvBridge
-from pytorchyolo import detect, models
-import matplotlib.pyplot as plt
-import matplotlib.patches as patches
 import numpy as np
-import cv2
-import copy
+import rospy
+from cv_bridge import CvBridge
+from sensor_msgs.msg import CameraInfo, Image
+from ultralytics import YOLO
+from ultralytics.engine.results import Results
+
 
 class DetectionMask:
     def __init__(self):
@@ -29,9 +30,10 @@ def __init__(self):
         self.bridge = CvBridge()
         self.rgb_image, self.depth_image, self.camera_info = None, None, None
 
+        self.model = YOLO('yolov8n.pt')
+
     def callback_rgbd(self, data1, data2, data3):
         cv_array = self.bridge.imgmsg_to_cv2(data1, 'bgr8')
-        cv_array = cv2.cvtColor(cv_array, cv2.COLOR_BGR2RGB)
         self.rgb_image = cv_array
 
         cv_array = self.bridge.imgmsg_to_cv2(data2, 'passthrough')
@@ -40,38 +42,30 @@ def callback_rgbd(self, data1, data2, data3):
         self.camera_info = data3
 
     def process(self):
-        path = "/root/roomba_hack/catkin_ws/src/three-dimensions_tutorial/yolov3/"
-
-        # load category
-        with open(path+"data/coco.names") as f:
-            category = f.read().splitlines()
-
-        # prepare model
-        model = models.load_model(path+"config/yolov3.cfg", path+"weights/yolov3.weights")
-
         while not rospy.is_shutdown():
             if self.rgb_image is None:
                 continue
 
-            # inference
             tmp_rgb_image = copy.copy(self.rgb_image)
             tmp_depth_image = copy.copy(self.depth_image)
             tmp_camera_info = copy.copy(self.camera_info)
 
-            boxes = detect.detect_image(model, tmp_rgb_image)
-            # [[x1, y1, x2, y2, confidence, class]]
-
             depth_mask = np.zeros_like(tmp_depth_image)
 
-            # plot bouding box
-            for box in boxes:
-                x1, y1, x2, y2 = map(int, box[:4])
-                cls_pred = int(box[5])
+            results: List[Results] = self.model.predict(self.rgb_image)
+
+            for result in results:
+                boxes = result.boxes.cpu().numpy()
+                names = result.names
+                if len(boxes.xyxy) == 0:
+                    continue
+                x1, y1, x2, y2 = map(int, boxes.xyxy[0][:4])
+                cls_pred = boxes.cls[0]
                 tmp_rgb_image = cv2.rectangle(tmp_rgb_image, (x1, y1), (x2, y2), (0, 255, 0), 3)
-                tmp_rgb_image = cv2.putText(tmp_rgb_image, category[cls_pred], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), 2)
+                tmp_rgb_image = cv2.putText(tmp_rgb_image, names[cls_pred], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), 2)
                 depth_mask[y1:y2, x1:x2] = 1
-            
-            tmp_rgb_image = cv2.cvtColor(tmp_rgb_image, cv2.COLOR_RGB2BGR)
+
+            # publish image
             detection_result = self.bridge.cv2_to_imgmsg(tmp_rgb_image, "bgr8")
             self.detection_result_pub.publish(detection_result)
 
@@ -87,4 +81,4 @@ def process(self):
     try:
         dd.process()
     except rospy.ROSInitException:
-        pass
+        pass
diff --git a/catkin_ws/src/three-dimensions_tutorial/scripts/object_detection.py b/catkin_ws/src/three-dimensions_tutorial/scripts/object_detection.py
@@ -1,13 +1,15 @@
 #!/usr/bin/env python3
 
+import copy
+from typing import List
+
+import cv2
 import rospy
-from sensor_msgs.msg import Image
 from cv_bridge import CvBridge
-from pytorchyolo import detect, models
-import matplotlib.pyplot as plt
-import matplotlib.patches as patches
-import cv2
-import copy
+from sensor_msgs.msg import Image
+from ultralytics import YOLO
+from ultralytics.engine.results import Results
+
 
 class ObjectDetection:
     def __init__(self):
@@ -17,44 +19,38 @@ def __init__(self):
         self.detection_result_pub = rospy.Publisher('/detection_result', Image, queue_size=10)
 
         # Subscriber
-        rgb_sub = rospy.Subscriber('/camera/color/image_raw', Image, self.callback_rgb)
+        rospy.Subscriber('/camera/color/image_raw', Image, self.callback_rgb)
 
         self.bridge = CvBridge()
         self.rgb_image = None
 
+        self.model = YOLO('yolov8n.pt')
+
     def callback_rgb(self, data):
         cv_array = self.bridge.imgmsg_to_cv2(data, 'bgr8')
-        cv_array = cv2.cvtColor(cv_array, cv2.COLOR_BGR2RGB)
         self.rgb_image = cv_array
 
     def process(self):
-        path = "/root/roomba_hack/catkin_ws/src/three-dimensions_tutorial/yolov3/"
-
-        # load category
-        with open(path+"data/coco.names") as f:
-            category = f.read().splitlines()
-
-        # prepare model
-        model = models.load_model(path+"config/yolov3.cfg", path+"weights/yolov3.weights")
-
         while not rospy.is_shutdown():
             if self.rgb_image is None:
                 continue
 
-            # inference
-            tmp_image = copy.copy(self.rgb_image)
-            boxes = detect.detect_image(model, tmp_image)
-            # [[x1, y1, x2, y2, confidence, class]]
-
-            # plot bouding box
-            for box in boxes:
-                x1, y1, x2, y2 = map(int, box[:4])
-                cls_pred = int(box[5])
-                tmp_image = cv2.rectangle(tmp_image, (x1, y1), (x2, y2), (0, 255, 0), 3)
-                tmp_image = cv2.putText(tmp_image, category[cls_pred], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), 2)
+            results: List[Results] = self.model.predict(self.rgb_image)
+
+            # plot bounding box
+            tmp_image = copy.deepcopy(self.rgb_image)
+            for result in results:
+                boxes = result.boxes.cpu().numpy()
+                names = result.names
+                for xyxy, conf, cls in zip(boxes.xyxy, boxes.conf, boxes.cls):
+                    if conf < 0.5:
+                        continue
+                    x1, y1, x2, y2 = map(int, xyxy[:4])
+                    cls_pred = cls
+                    tmp_image = cv2.rectangle(tmp_image, (x1, y1), (x2, y2), (0, 255, 0), 3)
+                    tmp_image = cv2.putText(tmp_image, names[cls_pred], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), 2)
 
             # publish image
-            tmp_image = cv2.cvtColor(tmp_image, cv2.COLOR_RGB2BGR)
             detection_result = self.bridge.cv2_to_imgmsg(tmp_image, "bgr8")
             self.detection_result_pub.publish(detection_result)