How to test on my own images #8

wyy-thu · 2024-05-09T08:21:25Z

I have some images, and I want to get cropped sub-images for each image.

aa-oo · 2024-07-29T07:01:38Z

Hello, I've encountered the same issue. Could you kindly let me know if you've managed to resolve it, and if so, how did you go about it?

dongdk · 2024-09-12T09:01:06Z

+1

dongdk · 2024-09-14T07:21:32Z

hi @wyy-thu i have finished the pipleline of testing my own dataset, first, u should use the faster-rcnn-vg to output the top-score bbox (please note that, the output format of bbox is yxyx), second, u can use generate_bboxes to generate the predefined crops (please note that the output format of bbox is yxyx), finally, run the test.py (should modify the code to support your own dataset).

h3clikejava · 2024-12-04T07:08:45Z

Thank you for the insights provided by @dongdk

This is my final implementation:

import torch, os, sys, cv2, random, yaml
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from torch.autograd import Variable
from torchvision import models, transforms
from types import SimpleNamespace
from model.ssc import SSC

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

def get_fastrcnn_bbox(tf_image, confidence_threshold=0.1):
    bboxes = []
    model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True).to(device)
    model.eval()
    predictions = None
    with torch.no_grad(): 
        predictions = model(tf_image)
    if predictions:
        predictions = predictions[0]
        for i in range(len(predictions['boxes'])):
            score = round(predictions['scores'][i].item(), 2)
            if score > confidence_threshold:
                box = predictions['boxes'][i].cpu().numpy().astype(int)
                (x1, y1, x2, y2) = box
                bboxes.append([int(x1), int(y1), int(x2), int(y2), score])
    return bboxes

def generate_bboxes_1_1(image):
    h = image.shape[0]
    w = image.shape[1]
    h_step = 12
    w_step = 12
    annotations = list()
    for i in range(0,30):
        out_h = h_step*i
        out_w = w_step*i
        if out_h < h and out_w < w and out_h*out_w>0.3*h*w:
            for w_start in range(0,w-out_w,w_step):
                for h_start in range(0,h-out_h,h_step):
                    annotations.append([int(w_start), int(h_start), int(w_start+out_w-1), int(h_start+out_h-1)])
    return annotations

def inference(cfg_path, tf_image, bboxs, fastrcnn_bboxes):
    with open(cfg_path, 'r') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)
    cfg = SimpleNamespace(**config)
    net = SSC(cfg)
    net.load_state_dict(torch.load("./gaicv1_best.pth"))
    cuda = True if torch.cuda.is_available() else False
    if cuda:
        net = torch.nn.DataParallel(net, device_ids=[0])
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        net = net.cuda()
    
    
    roi = []
    rcnn_roi = []
    for box in bboxs:
        roi.append((0, box[0], box[1], box[2], box[3]))
    for box in fastrcnn_bboxes:
        rcnn_roi.append((0, box[0], box[1], box[2], box[3]))
    if cuda:
        image = Variable(tf_image.cuda())
        roi = Variable(torch.Tensor(roi))
        rcnn_roi = Variable(torch.Tensor(rcnn_roi))
    else:
        image = Variable(tf_image)
        roi = Variable(roi)
        rcnn_roi = Variable(rcnn_roi)
                
    pre_scores = net(image, roi, rcnn_roi)
    pre_scores = pre_scores.cpu().detach().numpy().reshape(-1)
    max_index = np.argmax(pre_scores)
    finally_rect = bboxs[max_index]
    return [int(value) for value in finally_rect]

def main():
    image_path = sys.argv[1]
    image_size = 256
    debug = True
    
    cv_image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
    scale = float(image_size) / float(min(cv_image.shape[:2]))
    h = round(cv_image.shape[0] * scale / 32.0) * 32
    w = round(cv_image.shape[1] * scale / 32.0) * 32
    resized_image = cv2.resize(cv_image,(int(w),int(h))) / 256.0
    
    preprocess = transforms.Compose([transforms.ToTensor(), ])
    tf_image = preprocess(resized_image).unsqueeze(0).to(device).float()
    
    # generate fastrcnn bboxes
    fastrcnn_bboxes = get_fastrcnn_bbox(tf_image)
    
    # generate bboxes
    annotations = generate_bboxes_1_1(resized_image)
    
    # inference
    rect = inference("./config/GAICv1.yaml", tf_image, annotations, fastrcnn_bboxes)
    
    if debug:
        # debug_bbox = annotations[:5]
        debug_bbox = [rect]
        print(debug_bbox)
        
        if len(debug_bbox) == 1:
            fig, axs = plt.subplots(1, 2, figsize=(12, 6)) 
            axs[0].axis('off') 

            for i, box in enumerate(debug_bbox):
                color = (random.random(), random.random(), random.random())
                cv2.rectangle(resized_image, (box[0], box[1]), (box[2], box[3]), color, 2)

            axs[0].imshow(resized_image)
            axs[0].set_title("Original")

            box = debug_bbox[0]
            cropped_image = resized_image[int(box[1]):int(box[3]), int(box[0]):int(box[2])]
            axs[1].imshow(cropped_image)
            axs[1].set_title("Cropped")
            axs[1].axis('off')

            plt.tight_layout()
        else:
            for i, box in enumerate(debug_bbox):
                color = (random.random(), random.random(), random.random())
                cv2.rectangle(resized_image, (box[0], box[1]), (box[2], box[3]), color, 2)
                # label = f"Obj: {predictions['labels'][i].item()} ({box[4]:.2f})"
                label = f"{i}:({box[4]:.2f})" if len(box) > 4 else str(i)
                cv2.putText(resized_image, label, (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
            plt.imshow(resized_image)
            plt.axis("off")
        plt.show()

if __name__ == "__main__":
    main()

I'm a novice in machine learning. This is my implementation based on my understanding. I'm not even sure if they are correct, but the results seem pretty good. If anyone finds any issues, please let me know in time. Thank you very much!

Although it works well, unlike the original, I used Fast R-CNN instead of Faster R-CNN because I couldn't run Faster R-CNN properly. If anyone has a simpler way, please let me know. Additionally, I plan to replace Fast R-CNN with YOLO, but I haven't implemented it yet.

aiXia121 · 2025-01-17T03:28:19Z

Thank you for the insights provided by @dongdk

This is my final implementation: “这是我的最终实现：”

import torch, os, sys, cv2, random, yaml
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from torch.autograd import Variable
from torchvision import models, transforms
from types import SimpleNamespace
from model.ssc import SSC

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

def get_fastrcnn_bbox(tf_image, confidence_threshold=0.1):
    bboxes = []
    model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True).to(device)
    model.eval()
    predictions = None
    with torch.no_grad(): 
        predictions = model(tf_image)
    if predictions:
        predictions = predictions[0]
        for i in range(len(predictions['boxes'])):
            score = round(predictions['scores'][i].item(), 2)
            if score > confidence_threshold:
                box = predictions['boxes'][i].cpu().numpy().astype(int)
                (x1, y1, x2, y2) = box
                bboxes.append([int(x1), int(y1), int(x2), int(y2), score])
    return bboxes

def generate_bboxes_1_1(image):
    h = image.shape[0]
    w = image.shape[1]
    h_step = 12
    w_step = 12
    annotations = list()
    for i in range(0,30):
        out_h = h_step*i
        out_w = w_step*i
        if out_h < h and out_w < w and out_h*out_w>0.3*h*w:
            for w_start in range(0,w-out_w,w_step):
                for h_start in range(0,h-out_h,h_step):
                    annotations.append([int(w_start), int(h_start), int(w_start+out_w-1), int(h_start+out_h-1)])
    return annotations

def inference(cfg_path, tf_image, bboxs, fastrcnn_bboxes):
    with open(cfg_path, 'r') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)
    cfg = SimpleNamespace(**config)
    net = SSC(cfg)
    net.load_state_dict(torch.load("./gaicv1_best.pth"))
    cuda = True if torch.cuda.is_available() else False
    if cuda:
        net = torch.nn.DataParallel(net, device_ids=[0])
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        net = net.cuda()
    
    
    roi = []
    rcnn_roi = []
    for box in bboxs:
        roi.append((0, box[0], box[1], box[2], box[3]))
    for box in fastrcnn_bboxes:
        rcnn_roi.append((0, box[0], box[1], box[2], box[3]))
    if cuda:
        image = Variable(tf_image.cuda())
        roi = Variable(torch.Tensor(roi))
        rcnn_roi = Variable(torch.Tensor(rcnn_roi))
    else:
        image = Variable(tf_image)
        roi = Variable(roi)
        rcnn_roi = Variable(rcnn_roi)
                
    pre_scores = net(image, roi, rcnn_roi)
    pre_scores = pre_scores.cpu().detach().numpy().reshape(-1)
    max_index = np.argmax(pre_scores)
    finally_rect = bboxs[max_index]
    return [int(value) for value in finally_rect]

def main():
    image_path = sys.argv[1]
    image_size = 256
    debug = True
    
    cv_image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
    scale = float(image_size) / float(min(cv_image.shape[:2]))
    h = round(cv_image.shape[0] * scale / 32.0) * 32
    w = round(cv_image.shape[1] * scale / 32.0) * 32
    resized_image = cv2.resize(cv_image,(int(w),int(h))) / 256.0
    
    preprocess = transforms.Compose([transforms.ToTensor(), ])
    tf_image = preprocess(resized_image).unsqueeze(0).to(device).float()
    
    # generate fastrcnn bboxes
    fastrcnn_bboxes = get_fastrcnn_bbox(tf_image)
    
    # generate bboxes
    annotations = generate_bboxes_1_1(resized_image)
    
    # inference
    rect = inference("./config/GAICv1.yaml", tf_image, annotations, fastrcnn_bboxes)
    
    if debug:
        # debug_bbox = annotations[:5]
        debug_bbox = [rect]
        print(debug_bbox)
        
        if len(debug_bbox) == 1:
            fig, axs = plt.subplots(1, 2, figsize=(12, 6)) 
            axs[0].axis('off') 

            for i, box in enumerate(debug_bbox):
                color = (random.random(), random.random(), random.random())
                cv2.rectangle(resized_image, (box[0], box[1]), (box[2], box[3]), color, 2)

            axs[0].imshow(resized_image)
            axs[0].set_title("Original")

            box = debug_bbox[0]
            cropped_image = resized_image[int(box[1]):int(box[3]), int(box[0]):int(box[2])]
            axs[1].imshow(cropped_image)
            axs[1].set_title("Cropped")
            axs[1].axis('off')

            plt.tight_layout()
        else:
            for i, box in enumerate(debug_bbox):
                color = (random.random(), random.random(), random.random())
                cv2.rectangle(resized_image, (box[0], box[1]), (box[2], box[3]), color, 2)
                # label = f"Obj: {predictions['labels'][i].item()} ({box[4]:.2f})"
                label = f"{i}:({box[4]:.2f})" if len(box) > 4 else str(i)
                cv2.putText(resized_image, label, (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
            plt.imshow(resized_image)
            plt.axis("off")
        plt.show()

if __name__ == "__main__":
    main()

I'm a novice in machine learning. This is my implementation based on my understanding. I'm not even sure if they are correct, but the results seem pretty good. If anyone finds any issues, please let me know in time. Thank you very much!

Although it works well, unlike the original, I used Fast R-CNN instead of Faster R-CNN because I couldn't run Faster R-CNN properly. If anyone has a simpler way, please let me know. Additionally, I plan to replace Fast R-CNN with YOLO, but I haven't implemented it yet.

have you done this by YOLO ?

dongdk · 2025-01-17T03:38:01Z

it does not matter what kind of detector or segmentation method u use. providing the bboxes is enough. good luck.

h3clikejava mentioned this issue Dec 4, 2024

If I want to use my own dataset, how can I generate bbox? #14

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

How to test on my own images #8

How to test on my own images #8

wyy-thu commented May 9, 2024

aa-oo commented Jul 29, 2024

dongdk commented Sep 12, 2024

dongdk commented Sep 14, 2024

h3clikejava commented Dec 4, 2024

aiXia121 commented Jan 17, 2025

dongdk commented Jan 17, 2025

How to test on my own images #8

How to test on my own images #8

Comments

wyy-thu commented May 9, 2024

aa-oo commented Jul 29, 2024

dongdk commented Sep 12, 2024

dongdk commented Sep 14, 2024

h3clikejava commented Dec 4, 2024

aiXia121 commented Jan 17, 2025

dongdk commented Jan 17, 2025