Skip to content

Commit

Permalink
Bump the version to 0.0.12
Browse files Browse the repository at this point in the history
* Add sapiens-seg model
  • Loading branch information
jamjamjon authored Aug 31, 2024
1 parent f25f5cf commit f6755a8
Show file tree
Hide file tree
Showing 23 changed files with 340 additions and 31 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "usls"
version = "0.0.11"
version = "0.0.12"
edition = "2021"
description = "A Rust library integrated with ONNXRuntime, providing a collection of ML models."
repository = "https://github.com/jamjamjon/usls"
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@

- **YOLO Models**: [YOLOv5](https://github.com/ultralytics/yolov5), [YOLOv6](https://github.com/meituan/YOLOv6), [YOLOv7](https://github.com/WongKinYiu/yolov7), [YOLOv8](https://github.com/ultralytics/ultralytics), [YOLOv9](https://github.com/WongKinYiu/yolov9), [YOLOv10](https://github.com/THU-MIG/yolov10)
- **SAM Models**: [SAM](https://github.com/facebookresearch/segment-anything), [SAM2](https://github.com/facebookresearch/segment-anything-2), [MobileSAM](https://github.com/ChaoningZhang/MobileSAM), [EdgeSAM](https://github.com/chongzhou96/EdgeSAM), [SAM-HQ](https://github.com/SysCV/sam-hq), [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM)
- **Vision Models**: [RTDETR](https://arxiv.org/abs/2304.08069), [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo), [DB](https://arxiv.org/abs/1911.08947), [SVTR](https://arxiv.org/abs/2205.00159), [Depth-Anything-v1-v2](https://github.com/LiheYoung/Depth-Anything), [DINOv2](https://github.com/facebookresearch/dinov2), [MODNet](https://github.com/ZHKKKe/MODNet)
- **Vision Models**: [RTDETR](https://arxiv.org/abs/2304.08069), [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo), [DB](https://arxiv.org/abs/1911.08947), [SVTR](https://arxiv.org/abs/2205.00159), [Depth-Anything-v1-v2](https://github.com/LiheYoung/Depth-Anything), [DINOv2](https://github.com/facebookresearch/dinov2), [MODNet](https://github.com/ZHKKKe/MODNet), [Sapiens](https://arxiv.org/abs/2408.12569)
- **Vision-Language Models**: [CLIP](https://github.com/openai/CLIP), [BLIP](https://arxiv.org/abs/2201.12086), [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO), [YOLO-World](https://github.com/AILab-CVC/YOLO-World)

<details>
Expand Down Expand Up @@ -70,6 +70,7 @@
| [Depth-Anything](https://github.com/LiheYoung/Depth-Anything) | Monocular Depth Estimation | [demo](examples/depth-anything) |||||
| [MODNet](https://github.com/ZHKKKe/MODNet) | Image Matting | [demo](examples/modnet) |||||
| [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO) | Open-Set Detection With Language | [demo](examples/grounding-dino) ||| | |
| [Sapiens](https://github.com/facebookresearch/sapiens/tree/main) | Body Part Segmentation | [demo](examples/sapiens) ||| | |

</details>

Expand Down
Binary file added assets/paul-george.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 2 additions & 2 deletions benches/yolo.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use anyhow::Result;
use criterion::{black_box, criterion_group, criterion_main, Criterion};

use usls::{coco, models::YOLO, DataLoader, Options, Vision, YOLOTask, YOLOVersion};
use usls::{models::YOLO, DataLoader, Options, Vision, YOLOTask, YOLOVersion, COCO_KEYPOINTS_17};

enum Stage {
Pre,
Expand Down Expand Up @@ -60,7 +60,7 @@ pub fn benchmark_cuda(c: &mut Criterion, h: isize, w: isize) -> Result<()> {
.with_i02((320, h, 1280).into())
.with_i03((320, w, 1280).into())
.with_confs(&[0.2, 0.15]) // class_0: 0.4, others: 0.15
.with_names2(&coco::KEYPOINTS_NAMES_17);
.with_names2(&COCO_KEYPOINTS_17);
let mut model = YOLO::new(options)?;

let xs = vec![DataLoader::try_read("./assets/bus.jpg")?];
Expand Down
2 changes: 1 addition & 1 deletion examples/depth-anything/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut model = DepthAnything::new(options)?;

// load
let x = vec![DataLoader::try_read("./assets/2.jpg")?];
let x = [DataLoader::try_read("./assets/2.jpg")?];

// run
let y = model.run(&x)?;
Expand Down
4 changes: 2 additions & 2 deletions examples/rtmo/main.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use usls::{coco, models::RTMO, Annotator, DataLoader, Options};
use usls::{models::RTMO, Annotator, DataLoader, Options, COCO_SKELETONS_16};

fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
Expand All @@ -19,7 +19,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
// annotate
let annotator = Annotator::default()
.with_saveout("RTMO")
.with_skeletons(&coco::SKELETONS_16);
.with_skeletons(&COCO_SKELETONS_16);
annotator.annotate(&x, &y);

Ok(())
Expand Down
30 changes: 30 additions & 0 deletions examples/sapiens/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
use usls::{
models::{Sapiens, SapiensTask},
Annotator, DataLoader, Options, BODY_PARTS_28,
};

fn main() -> Result<(), Box<dyn std::error::Error>> {
// build
let options = Options::default()
.with_model("sapiens-seg-0.3b-dyn.onnx")?
.with_sapiens_task(SapiensTask::Seg)
.with_names(&BODY_PARTS_28)
.with_profile(false)
.with_i00((1, 1, 8).into());
let mut model = Sapiens::new(options)?;

// load
let x = [DataLoader::try_read("./assets/paul-george.jpg")?];

// run
let y = model.run(&x)?;

// annotate
let annotator = Annotator::default()
.without_masks(true)
.with_polygons_name(false)
.with_saveout("Sapiens");
annotator.annotate(&x, &y);

Ok(())
}
11 changes: 7 additions & 4 deletions examples/yolo/main.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
use anyhow::Result;
use clap::Parser;

use usls::{coco, models::YOLO, Annotator, DataLoader, Options, Vision, YOLOTask, YOLOVersion};
use usls::{
models::YOLO, Annotator, DataLoader, Options, Vision, YOLOTask, YOLOVersion, COCO_KEYPOINTS_17,
COCO_SKELETONS_16,
};

#[derive(Parser, Clone)]
#[command(author, version, about, long_about = None)]
Expand Down Expand Up @@ -174,8 +177,8 @@ fn main() -> Result<()> {
.with_i02((args.height_min, args.height, args.height_max).into())
.with_i03((args.width_min, args.width, args.width_max).into())
.with_confs(&[0.2, 0.15]) // class_0: 0.4, others: 0.15
// .with_names(&coco::NAMES_80)
.with_names2(&coco::KEYPOINTS_NAMES_17)
// .with_names(&COCO_CLASS_NAMES_80)
.with_names2(&COCO_KEYPOINTS_17)
.with_find_contours(!args.no_contours) // find contours or not
.with_profile(args.profile);
let mut model = YOLO::new(options)?;
Expand All @@ -187,7 +190,7 @@ fn main() -> Result<()> {

// build annotator
let annotator = Annotator::default()
.with_skeletons(&coco::SKELETONS_16)
.with_skeletons(&COCO_SKELETONS_16)
.with_bboxes_thickness(4)
.without_masks(true) // No masks plotting when doing segment task.
.with_saveout(saveout);
Expand Down
2 changes: 2 additions & 0 deletions src/core/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ pub mod onnx;
pub mod ops;
mod options;
mod ort_engine;
mod task;
mod tokenizer_stream;
mod ts;
mod vision;
Expand All @@ -25,6 +26,7 @@ pub use min_opt_max::MinOptMax;
pub use ops::Ops;
pub use options::Options;
pub use ort_engine::OrtEngine;
pub use task::Task;
pub use tokenizer_stream::TokenizerStream;
pub use ts::Ts;
pub use vision::Vision;
Expand Down
58 changes: 52 additions & 6 deletions src/core/ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use fast_image_resize::{
FilterType, ResizeAlg, ResizeOptions, Resizer,
};
use image::{DynamicImage, GenericImageView};
use ndarray::{s, Array, Axis, IntoDimension, IxDyn};
use ndarray::{s, Array, Array3, Axis, IntoDimension, IxDyn};
use rayon::prelude::*;

pub enum Ops<'a> {
Expand Down Expand Up @@ -159,7 +159,41 @@ impl Ops<'_> {
mask.resize_exact(w1 as u32, h1 as u32, image::imageops::FilterType::Triangle)
}

pub fn resize_lumaf32_vec(
// pub fn argmax(xs: Array<f32, IxDyn>, d: usize, keep_dims: bool) -> Result<Array<f32, IxDyn>> {
// let mask = Array::zeros(xs.raw_dim());
// todo!();
// }

pub fn interpolate_3d(
xs: Array<f32, IxDyn>,
tw: f32,
th: f32,
filter: &str,
) -> Result<Array<f32, IxDyn>> {
let d_max = xs.ndim();
if d_max != 3 {
anyhow::bail!("`interpolate_3d`: The input's ndim: {} is not 3.", d_max);
}
let (n, h, w) = (xs.shape()[0], xs.shape()[1], xs.shape()[2]);
let mut ys = Array3::zeros((n, th as usize, tw as usize));
for (i, luma) in xs.axis_iter(Axis(0)).enumerate() {
let v = Ops::resize_lumaf32_f32(
&luma.to_owned().into_raw_vec_and_offset().0,
w as _,
h as _,
tw as _,
th as _,
false,
filter,
)?;
let y_ = Array::from_shape_vec((th as usize, tw as usize), v)?;
ys.slice_mut(s![i, .., ..]).assign(&y_);
}

Ok(ys.into_dyn())
}

pub fn resize_lumaf32_u8(
v: &[f32],
w0: f32,
h0: f32,
Expand All @@ -168,6 +202,20 @@ impl Ops<'_> {
crop_src: bool,
filter: &str,
) -> Result<Vec<u8>> {
let mask_f32 = Self::resize_lumaf32_f32(v, w0, h0, w1, h1, crop_src, filter)?;
let v: Vec<u8> = mask_f32.par_iter().map(|&x| (x * 255.0) as u8).collect();
Ok(v)
}

pub fn resize_lumaf32_f32(
v: &[f32],
w0: f32,
h0: f32,
w1: f32,
h1: f32,
crop_src: bool,
filter: &str,
) -> Result<Vec<f32>> {
let src = Image::from_vec_u8(
w0 as _,
h0 as _,
Expand All @@ -189,12 +237,10 @@ impl Ops<'_> {
.map(|chunk| f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]))
.collect();

// f32 -> u8
let v: Vec<u8> = mask_f32.par_iter().map(|&x| (x * 255.0) as u8).collect();
Ok(v)
Ok(mask_f32)
}

pub fn resize_luma8_vec(
pub fn resize_luma8_u8(
v: &[u8],
w0: f32,
h0: f32,
Expand Down
9 changes: 8 additions & 1 deletion src/core/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use anyhow::Result;

use crate::{
auto_load,
models::{SamKind, YOLOPreds, YOLOTask, YOLOVersion},
models::{SamKind, SapiensTask, YOLOPreds, YOLOTask, YOLOVersion},
Device, MinOptMax,
};

Expand Down Expand Up @@ -92,6 +92,7 @@ pub struct Options {
pub find_contours: bool,
pub sam_kind: Option<SamKind>,
pub use_low_res_mask: Option<bool>,
pub sapiens_task: Option<SapiensTask>,
}

impl Default for Options {
Expand Down Expand Up @@ -175,6 +176,7 @@ impl Default for Options {
find_contours: false,
sam_kind: None,
use_low_res_mask: None,
sapiens_task: None,
}
}
}
Expand Down Expand Up @@ -220,6 +222,11 @@ impl Options {
self
}

pub fn with_sapiens_task(mut self, x: SapiensTask) -> Self {
self.sapiens_task = Some(x);
self
}

pub fn with_yolo_version(mut self, x: YOLOVersion) -> Self {
self.yolo_version = Some(x);
self
Expand Down
27 changes: 27 additions & 0 deletions src/core/task.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#[derive(Debug, Clone)]
pub enum Task {
// vision
ImageClassification,
ObjectDetection,
KeypointsDetection,
RegisonProposal,
PoseEstimation,
SemanticSegmentation,
InstanceSegmentation,
DepthEstimation,
SurfaceNormalPrediction,
Image2ImageGeneration,
Inpainting,
SuperResolution,
Denoising,

// vl
Tagging,
Captioning,
DetailedCaptioning,
MoreDetailedCaptioning,
PhraseGrounding,
Vqa,
Ocr,
Text2ImageGeneration,
}
5 changes: 3 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
//! - [YOLOPv2](https://arxiv.org/abs/2208.11434): Panoptic Driving Perception
//! - [Depth-Anything (v1, v2)](https://github.com/LiheYoung/Depth-Anything): Monocular Depth Estimation
//! - [MODNet](https://github.com/ZHKKKe/MODNet): Image Matting
//! - [Sapiens](https://arxiv.org/abs/2408.12569): Human-centric Vision Tasks
//!
//! # Examples
//!
Expand All @@ -35,7 +36,7 @@
//! Using provided [`models`] with [`Options`]
//!
//! ```rust, no_run
//! use usls::{coco, models::YOLO, Annotator, DataLoader, Options, Vision};
//! use usls::{ models::YOLO, Annotator, DataLoader, Options, Vision, COCO_CLASS_NAMES_80};
//!
//! let options = Options::default()
//! .with_yolo_version(YOLOVersion::V8) // YOLOVersion: V5, V6, V7, V8, V9, V10, RTDETR
Expand Down Expand Up @@ -74,7 +75,7 @@
//!
//! ```rust, no_run
//! let options = Options::default()
//! .with_names(&coco::NAMES_80);
//! .with_names(&COCO_CLASS_NAMES_80);
//! ```
//!
//! More options can be found in the [`Options`] documentation.
Expand Down
2 changes: 1 addition & 1 deletion src/models/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ impl DB {
})
.collect::<Vec<_>>();

let luma = Ops::resize_luma8_vec(
let luma = Ops::resize_luma8_u8(
&v,
self.width() as _,
self.height() as _,
Expand Down
2 changes: 1 addition & 1 deletion src/models/depth_anything.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ impl DepthAnything {
.map(|x| (((*x - min_) / (max_ - min_)) * 255.).clamp(0., 255.) as u8)
.collect::<Vec<_>>();

let luma = Ops::resize_luma8_vec(
let luma = Ops::resize_luma8_u8(
&v,
self.width() as _,
self.height() as _,
Expand Down
2 changes: 2 additions & 0 deletions src/models/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ mod grounding_dino;
mod modnet;
mod rtmo;
mod sam;
mod sapiens;
mod svtr;
mod yolo;
mod yolo_;
Expand All @@ -23,6 +24,7 @@ pub use grounding_dino::GroundingDINO;
pub use modnet::MODNet;
pub use rtmo::RTMO;
pub use sam::{SamKind, SamPrompt, SAM};
pub use sapiens::{Sapiens, SapiensTask};
pub use svtr::SVTR;
pub use yolo::YOLO;
pub use yolo_::*;
Expand Down
2 changes: 1 addition & 1 deletion src/models/modnet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ impl MODNet {
for (idx, luma) in xs[0].axis_iter(Axis(0)).enumerate() {
let (w1, h1) = (xs0[idx].width(), xs0[idx].height());
let luma = luma.mapv(|x| (x * 255.0) as u8);
let luma = Ops::resize_luma8_vec(
let luma = Ops::resize_luma8_u8(
&luma.into_raw_vec_and_offset().0,
self.width() as _,
self.height() as _,
Expand Down
2 changes: 1 addition & 1 deletion src/models/sam.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ impl SAM {

let (h, w) = mask.dim();
let luma = if self.use_low_res_mask {
Ops::resize_lumaf32_vec(
Ops::resize_lumaf32_u8(
&mask.into_owned().into_raw_vec_and_offset().0,
w as _,
h as _,
Expand Down
Loading

0 comments on commit f6755a8

Please sign in to comment.