Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

How to test on my own images #8

Open
wyy-thu opened this issue May 9, 2024 · 6 comments
Open

How to test on my own images #8

wyy-thu opened this issue May 9, 2024 · 6 comments

Comments

@wyy-thu
Copy link

wyy-thu commented May 9, 2024

I have some images, and I want to get cropped sub-images for each image.

@aa-oo
Copy link

aa-oo commented Jul 29, 2024

Hello, I've encountered the same issue. Could you kindly let me know if you've managed to resolve it, and if so, how did you go about it?

@dongdk
Copy link

dongdk commented Sep 12, 2024

+1

@dongdk
Copy link

dongdk commented Sep 14, 2024

hi @wyy-thu i have finished the pipleline of testing my own dataset, first, u should use the faster-rcnn-vg to output the top-score bbox (please note that, the output format of bbox is yxyx), second, u can use generate_bboxes to generate the predefined crops (please note that the output format of bbox is yxyx), finally, run the test.py (should modify the code to support your own dataset).

@h3clikejava
Copy link

Thank you for the insights provided by @dongdk

This is my final implementation:

import torch, os, sys, cv2, random, yaml
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from torch.autograd import Variable
from torchvision import models, transforms
from types import SimpleNamespace
from model.ssc import SSC

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

def get_fastrcnn_bbox(tf_image, confidence_threshold=0.1):
    bboxes = []
    model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True).to(device)
    model.eval()
    predictions = None
    with torch.no_grad(): 
        predictions = model(tf_image)
    if predictions:
        predictions = predictions[0]
        for i in range(len(predictions['boxes'])):
            score = round(predictions['scores'][i].item(), 2)
            if score > confidence_threshold:
                box = predictions['boxes'][i].cpu().numpy().astype(int)
                (x1, y1, x2, y2) = box
                bboxes.append([int(x1), int(y1), int(x2), int(y2), score])
    return bboxes

def generate_bboxes_1_1(image):
    h = image.shape[0]
    w = image.shape[1]
    h_step = 12
    w_step = 12
    annotations = list()
    for i in range(0,30):
        out_h = h_step*i
        out_w = w_step*i
        if out_h < h and out_w < w and out_h*out_w>0.3*h*w:
            for w_start in range(0,w-out_w,w_step):
                for h_start in range(0,h-out_h,h_step):
                    annotations.append([int(w_start), int(h_start), int(w_start+out_w-1), int(h_start+out_h-1)])
    return annotations

def inference(cfg_path, tf_image, bboxs, fastrcnn_bboxes):
    with open(cfg_path, 'r') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)
    cfg = SimpleNamespace(**config)
    net = SSC(cfg)
    net.load_state_dict(torch.load("./gaicv1_best.pth"))
    cuda = True if torch.cuda.is_available() else False
    if cuda:
        net = torch.nn.DataParallel(net, device_ids=[0])
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        net = net.cuda()
    
    
    roi = []
    rcnn_roi = []
    for box in bboxs:
        roi.append((0, box[0], box[1], box[2], box[3]))
    for box in fastrcnn_bboxes:
        rcnn_roi.append((0, box[0], box[1], box[2], box[3]))
    if cuda:
        image = Variable(tf_image.cuda())
        roi = Variable(torch.Tensor(roi))
        rcnn_roi = Variable(torch.Tensor(rcnn_roi))
    else:
        image = Variable(tf_image)
        roi = Variable(roi)
        rcnn_roi = Variable(rcnn_roi)
                
    pre_scores = net(image, roi, rcnn_roi)
    pre_scores = pre_scores.cpu().detach().numpy().reshape(-1)
    max_index = np.argmax(pre_scores)
    finally_rect = bboxs[max_index]
    return [int(value) for value in finally_rect]

def main():
    image_path = sys.argv[1]
    image_size = 256
    debug = True
    
    cv_image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
    scale = float(image_size) / float(min(cv_image.shape[:2]))
    h = round(cv_image.shape[0] * scale / 32.0) * 32
    w = round(cv_image.shape[1] * scale / 32.0) * 32
    resized_image = cv2.resize(cv_image,(int(w),int(h))) / 256.0
    
    preprocess = transforms.Compose([transforms.ToTensor(), ])
    tf_image = preprocess(resized_image).unsqueeze(0).to(device).float()
    
    # generate fastrcnn bboxes
    fastrcnn_bboxes = get_fastrcnn_bbox(tf_image)
    
    # generate bboxes
    annotations = generate_bboxes_1_1(resized_image)
    
    # inference
    rect = inference("./config/GAICv1.yaml", tf_image, annotations, fastrcnn_bboxes)
    
    if debug:
        # debug_bbox = annotations[:5]
        debug_bbox = [rect]
        print(debug_bbox)
        
        if len(debug_bbox) == 1:
            fig, axs = plt.subplots(1, 2, figsize=(12, 6)) 
            axs[0].axis('off') 

            for i, box in enumerate(debug_bbox):
                color = (random.random(), random.random(), random.random())
                cv2.rectangle(resized_image, (box[0], box[1]), (box[2], box[3]), color, 2)

            axs[0].imshow(resized_image)
            axs[0].set_title("Original")

            box = debug_bbox[0]
            cropped_image = resized_image[int(box[1]):int(box[3]), int(box[0]):int(box[2])]
            axs[1].imshow(cropped_image)
            axs[1].set_title("Cropped")
            axs[1].axis('off')

            plt.tight_layout()
        else:
            for i, box in enumerate(debug_bbox):
                color = (random.random(), random.random(), random.random())
                cv2.rectangle(resized_image, (box[0], box[1]), (box[2], box[3]), color, 2)
                # label = f"Obj: {predictions['labels'][i].item()} ({box[4]:.2f})"
                label = f"{i}:({box[4]:.2f})" if len(box) > 4 else str(i)
                cv2.putText(resized_image, label, (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
            plt.imshow(resized_image)
            plt.axis("off")
        plt.show()

if __name__ == "__main__":
    main()

I'm a novice in machine learning. This is my implementation based on my understanding. I'm not even sure if they are correct, but the results seem pretty good. If anyone finds any issues, please let me know in time. Thank you very much!
image
image

Although it works well, unlike the original, I used Fast R-CNN instead of Faster R-CNN because I couldn't run Faster R-CNN properly. If anyone has a simpler way, please let me know. Additionally, I plan to replace Fast R-CNN with YOLO, but I haven't implemented it yet.

@aiXia121
Copy link

Thank you for the insights provided by @dongdk

This is my final implementation: “这是我的最终实现:”

import torch, os, sys, cv2, random, yaml
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from torch.autograd import Variable
from torchvision import models, transforms
from types import SimpleNamespace
from model.ssc import SSC

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

def get_fastrcnn_bbox(tf_image, confidence_threshold=0.1):
    bboxes = []
    model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True).to(device)
    model.eval()
    predictions = None
    with torch.no_grad(): 
        predictions = model(tf_image)
    if predictions:
        predictions = predictions[0]
        for i in range(len(predictions['boxes'])):
            score = round(predictions['scores'][i].item(), 2)
            if score > confidence_threshold:
                box = predictions['boxes'][i].cpu().numpy().astype(int)
                (x1, y1, x2, y2) = box
                bboxes.append([int(x1), int(y1), int(x2), int(y2), score])
    return bboxes

def generate_bboxes_1_1(image):
    h = image.shape[0]
    w = image.shape[1]
    h_step = 12
    w_step = 12
    annotations = list()
    for i in range(0,30):
        out_h = h_step*i
        out_w = w_step*i
        if out_h < h and out_w < w and out_h*out_w>0.3*h*w:
            for w_start in range(0,w-out_w,w_step):
                for h_start in range(0,h-out_h,h_step):
                    annotations.append([int(w_start), int(h_start), int(w_start+out_w-1), int(h_start+out_h-1)])
    return annotations

def inference(cfg_path, tf_image, bboxs, fastrcnn_bboxes):
    with open(cfg_path, 'r') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)
    cfg = SimpleNamespace(**config)
    net = SSC(cfg)
    net.load_state_dict(torch.load("./gaicv1_best.pth"))
    cuda = True if torch.cuda.is_available() else False
    if cuda:
        net = torch.nn.DataParallel(net, device_ids=[0])
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        net = net.cuda()
    
    
    roi = []
    rcnn_roi = []
    for box in bboxs:
        roi.append((0, box[0], box[1], box[2], box[3]))
    for box in fastrcnn_bboxes:
        rcnn_roi.append((0, box[0], box[1], box[2], box[3]))
    if cuda:
        image = Variable(tf_image.cuda())
        roi = Variable(torch.Tensor(roi))
        rcnn_roi = Variable(torch.Tensor(rcnn_roi))
    else:
        image = Variable(tf_image)
        roi = Variable(roi)
        rcnn_roi = Variable(rcnn_roi)
                
    pre_scores = net(image, roi, rcnn_roi)
    pre_scores = pre_scores.cpu().detach().numpy().reshape(-1)
    max_index = np.argmax(pre_scores)
    finally_rect = bboxs[max_index]
    return [int(value) for value in finally_rect]

def main():
    image_path = sys.argv[1]
    image_size = 256
    debug = True
    
    cv_image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
    scale = float(image_size) / float(min(cv_image.shape[:2]))
    h = round(cv_image.shape[0] * scale / 32.0) * 32
    w = round(cv_image.shape[1] * scale / 32.0) * 32
    resized_image = cv2.resize(cv_image,(int(w),int(h))) / 256.0
    
    preprocess = transforms.Compose([transforms.ToTensor(), ])
    tf_image = preprocess(resized_image).unsqueeze(0).to(device).float()
    
    # generate fastrcnn bboxes
    fastrcnn_bboxes = get_fastrcnn_bbox(tf_image)
    
    # generate bboxes
    annotations = generate_bboxes_1_1(resized_image)
    
    # inference
    rect = inference("./config/GAICv1.yaml", tf_image, annotations, fastrcnn_bboxes)
    
    if debug:
        # debug_bbox = annotations[:5]
        debug_bbox = [rect]
        print(debug_bbox)
        
        if len(debug_bbox) == 1:
            fig, axs = plt.subplots(1, 2, figsize=(12, 6)) 
            axs[0].axis('off') 

            for i, box in enumerate(debug_bbox):
                color = (random.random(), random.random(), random.random())
                cv2.rectangle(resized_image, (box[0], box[1]), (box[2], box[3]), color, 2)

            axs[0].imshow(resized_image)
            axs[0].set_title("Original")

            box = debug_bbox[0]
            cropped_image = resized_image[int(box[1]):int(box[3]), int(box[0]):int(box[2])]
            axs[1].imshow(cropped_image)
            axs[1].set_title("Cropped")
            axs[1].axis('off')

            plt.tight_layout()
        else:
            for i, box in enumerate(debug_bbox):
                color = (random.random(), random.random(), random.random())
                cv2.rectangle(resized_image, (box[0], box[1]), (box[2], box[3]), color, 2)
                # label = f"Obj: {predictions['labels'][i].item()} ({box[4]:.2f})"
                label = f"{i}:({box[4]:.2f})" if len(box) > 4 else str(i)
                cv2.putText(resized_image, label, (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
            plt.imshow(resized_image)
            plt.axis("off")
        plt.show()

if __name__ == "__main__":
    main()

I'm a novice in machine learning. This is my implementation based on my understanding. I'm not even sure if they are correct, but the results seem pretty good. If anyone finds any issues, please let me know in time. Thank you very much! image image

Although it works well, unlike the original, I used Fast R-CNN instead of Faster R-CNN because I couldn't run Faster R-CNN properly. If anyone has a simpler way, please let me know. Additionally, I plan to replace Fast R-CNN with YOLO, but I haven't implemented it yet.

have you done this by YOLO ?

@dongdk
Copy link

dongdk commented Jan 17, 2025

it does not matter what kind of detector or segmentation method u use. providing the bboxes is enough. good luck.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

5 participants