Skip to content

Commit

Permalink
improve sparse matrix reading
Browse files Browse the repository at this point in the history
  • Loading branch information
kaizhang committed Mar 12, 2024
1 parent 9809ece commit 6f7402f
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 30 deletions.
12 changes: 3 additions & 9 deletions anndata-hdf5/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,11 @@ repository = "https://github.com/kaizhang/anndata-rs"
homepage = "https://github.com/kaizhang/anndata-rs"

[dependencies]
anndata = "0.3"
#anndata = "0.3"
anndata = { path = '../anndata' }
anyhow = "1.0"
hdf5 = { version = "0.8" }
hdf5-sys = { version = "0.8", features = ["static", "zlib", "threadsafe"] }
#libz-sys = { version = "1", features = ["zlib-ng"], default-features = false }
libz-sys = { version = "1", features = ["libc"], default-features = false }
ndarray = { version = "0.15" }

[dev-dependencies]
anndata-test-utils = { path = '../anndata-test-utils' }
tempfile = "3.2"
proptest = "1"
rand = "0.8.5"
ndarray-rand = "0.14"
ndarray = { version = "0.15" }
11 changes: 9 additions & 2 deletions anndata-test-utils/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ edition = "2021"
[dependencies]
anyhow = "1.0"
ndarray = { version = "0.15" }
anndata = ">= 0.2, < 0.4"
anndata = { path = '../anndata' }
num = "0.4"
tempfile = "3.2"
criterion = { version = "0.4", features = ["rayon", "plotters", "cargo_bench_support", "html_reports"] }
Expand All @@ -15,4 +15,11 @@ rand = "0.8.5"
ndarray-rand = "0.14"
nalgebra = { version = "0.32", features = ["rand"] }
nalgebra-sparse = "0.9"
itertools = "0.12"
itertools = "0.12"

[dev-dependencies]
anndata-hdf5 = { path = '../anndata-hdf5' }
tempfile = "3.2"
proptest = "1"
rand = "0.8.5"
ndarray-rand = "0.14"
4 changes: 2 additions & 2 deletions anndata-test-utils/src/common/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ fn dense_array_strat(shape: &Vec<usize>) -> impl Strategy<Value = ArrayData> {
}

/// Generate a random compressed sparse row matrix
fn rand_csr<T>(nrow: usize, ncol: usize, nnz: usize, low: T, high: T) -> CsrMatrix<T>
pub fn rand_csr<T>(nrow: usize, ncol: usize, nnz: usize, low: T, high: T) -> CsrMatrix<T>
where
T: Scalar + Zero + ClosedAdd + SampleUniform,
{
Expand All @@ -289,7 +289,7 @@ where
}

/// Generate a random compressed sparse column matrix
fn rand_csc<T>(nrow: usize, ncol: usize, nnz: usize, low: T, high: T) -> CscMatrix<T>
pub fn rand_csc<T>(nrow: usize, ncol: usize, nnz: usize, low: T, high: T) -> CscMatrix<T>
where
T: Scalar + Zero + ClosedAdd + SampleUniform,
{
Expand Down
7 changes: 6 additions & 1 deletion anndata-test-utils/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,24 @@ mod common;
pub use common::*;

use anndata::{*, data::CsrNonCanonical};
use ndarray::Array2;
use ndarray::{Array2, ArrayD};
use nalgebra_sparse::{CooMatrix, CsrMatrix};
use proptest::prelude::*;

pub fn test_basic<B: Backend>() {
with_tmp_dir(|dir| {
let ann1 = AnnData::<B>::new(dir.join("test1.h5ad")).unwrap();
let csc = rand_csc::<i32>(10, 5, 3, 1, 100);
ann1.obsm().add("csc", &csc).unwrap();
assert!(ann1.obsm().get_item::<CsrMatrix<i32>>("csc").is_err());

let ann2 = AnnData::<B>::new(dir.join("test2.h5ad")).unwrap();
AnnDataSet::<B>::new(
[("ann1", ann1), ("ann2", ann2)],
dir.join("dataset.h5ads"),
"sample",
).unwrap();

})
}

Expand Down
File renamed without changes.
21 changes: 13 additions & 8 deletions anndata/src/data/array/sparse/csc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -567,14 +567,19 @@ impl<T: BackendData> WriteData for CscMatrix<T> {

impl<T: BackendData> ReadData for CscMatrix<T> {
fn read<B: Backend>(container: &DataContainer<B>) -> Result<Self> {
let group = container.as_group()?;
let shape: Vec<usize> = group.read_array_attr("shape")?.to_vec();
let data = group.open_dataset("data")?.read_array::<_, Ix1>()?.into_raw_vec();
let indptr: Vec<usize> = group.open_dataset("indptr")?.read_array::<_, Ix1>()?.into_raw_vec();
let indices: Vec<usize> = group.open_dataset("indices")?.read_array::<_, Ix1>()?.into_raw_vec();
CscMatrix::try_from_csc_data(
shape[0], shape[1], indptr, indices, data
).map_err(|e| anyhow::anyhow!("{}", e))
match container.encoding_type()? {
DataType::CscMatrix(_) => {
let group = container.as_group()?;
let shape: Vec<usize> = group.read_array_attr("shape")?.to_vec();
let data = group.open_dataset("data")?.read_array::<_, Ix1>()?.into_raw_vec();
let indptr: Vec<usize> = group.open_dataset("indptr")?.read_array::<_, Ix1>()?.into_raw_vec();
let indices: Vec<usize> = group.open_dataset("indices")?.read_array::<_, Ix1>()?.into_raw_vec();
CscMatrix::try_from_csc_data(
shape[0], shape[1], indptr, indices, data
).map_err(|e| anyhow::anyhow!("{}", e))
},
ty => bail!("cannot read csc matrix from container with data type {:?}", ty),
}
}
}

Expand Down
21 changes: 13 additions & 8 deletions anndata/src/data/array/sparse/csr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -571,14 +571,19 @@ impl<T: BackendData> WriteData for CsrMatrix<T> {

impl<T: BackendData> ReadData for CsrMatrix<T> {
fn read<B: Backend>(container: &DataContainer<B>) -> Result<Self> {
let group = container.as_group()?;
let shape: Vec<usize> = group.read_array_attr("shape")?.to_vec();
let data = group.open_dataset("data")?.read_array::<_, Ix1>()?.into_raw_vec();
let indptr: Vec<usize> = group.open_dataset("indptr")?.read_array::<_, Ix1>()?.into_raw_vec();
let indices: Vec<usize> = group.open_dataset("indices")?.read_array::<_, Ix1>()?.into_raw_vec();
CsrMatrix::try_from_csr_data(
shape[0], shape[1], indptr, indices, data
).map_err(|e| anyhow!("cannot read csr matrix: {}", e))
match container.encoding_type()? {
DataType::CsrMatrix(_) => {
let group = container.as_group()?;
let shape: Vec<usize> = group.read_array_attr("shape")?.to_vec();
let data = group.open_dataset("data")?.read_array::<_, Ix1>()?.into_raw_vec();
let indptr: Vec<usize> = group.open_dataset("indptr")?.read_array::<_, Ix1>()?.into_raw_vec();
let indices: Vec<usize> = group.open_dataset("indices")?.read_array::<_, Ix1>()?.into_raw_vec();
CsrMatrix::try_from_csr_data(
shape[0], shape[1], indptr, indices, data
).map_err(|e| anyhow!("cannot read csr matrix: {}", e))
},
ty => bail!("cannot read csr matrix from container with data type {:?}", ty),
}
}
}

Expand Down
4 changes: 4 additions & 0 deletions anndata/src/traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,13 @@ pub trait ElemCollectionOp {
pub trait AxisArraysOp {
type ArrayElem: ArrayElemOp;

/// Return the keys.
fn keys(&self) -> Vec<String>;

/// Return the ArrayElem object by key, but do not read the data.
fn get(&self, key: &str) -> Option<Self::ArrayElem>;

/// Return the array data by key.
fn get_item<D>(&self, key: &str) -> Result<Option<D>>
where
D: ReadData + Into<ArrayData> + TryFrom<ArrayData> + Clone,
Expand All @@ -137,6 +140,7 @@ pub trait AxisArraysOp {
.map_err(|e| e.context(format!("key: {}", key)))
}

/// Return a slice of the array data by key.
fn get_item_slice<D, S>(&self, key: &str, slice: S) -> Result<Option<D>>
where
D: ReadArrayData + Into<ArrayData> + TryFrom<ArrayData> + ArrayOp + Clone,
Expand Down

0 comments on commit 6f7402f

Please sign in to comment.