diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 486f5ea..5ff83a3 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -43,4 +43,5 @@ jobs: # Develop and test with extras maturin develop --extras polars + pip install -e ".[polars]" pytest diff --git a/python/biobear/reader.py b/python/biobear/reader.py index e476807..9ba19b8 100644 --- a/python/biobear/reader.py +++ b/python/biobear/reader.py @@ -7,15 +7,34 @@ class Reader(ABC): - """The abstract reader class.""" + """An abstract base class (ABC) representing a reader. + + The class defines basic functionalities for conversion, but the specifics must be + implemented in a subclass. + """ @property @abstractmethod def inner(self): - """Return the inner reader.""" + """Abstract property for the inner reader. + + Returns: + The inner reader. The type of the reader is defined by the specific + subclass. + """ def to_polars(self): - """Read the fasta file and return a polars DataFrame.""" + """Convert the inner data to a Polars DataFrame. + + This method first converts the inner reader's data to an Arrow table, + then to a Python dictionary, and finally to a Polars DataFrame. + + Returns: + pl.DataFrame: The converted data in a Polars DataFrame. + + Raises: + ImportError: If the 'polars' package is not installed. + """ try: import polars as pl except ImportError as import_error: @@ -27,9 +46,29 @@ def to_polars(self): return pl.from_dict(pydict) def to_arrow_scanner(self) -> ds.Scanner: - """Convert the fasta reader to an arrow scanner.""" + """Convert the inner data to an Arrow scanner. + + This method first converts the inner reader's data to Arrow batches, + and then forms a scanner from these batches. + + Returns: + ds.Scanner: The converted data in an Arrow scanner. + """ return ds.Scanner.from_batches(self.to_arrow()) def to_arrow(self) -> pa.RecordBatchReader: - """Convert the fasta reader to an arrow batch reader.""" + """Convert the inner data to an Arrow record batch reader. + + If the inner reader is exhausted, this method raises an exception. + Otherwise, it converts the inner reader's data to an Arrow record batch. + + Returns: + pa.RecordBatchReader: The converted data in an Arrow record batch reader. + + Raises: + StopIteration: If the inner reader is exhausted. + """ + if self.inner.is_exhausted(): + raise StopIteration("The reader is exhausted.") + return self.inner.to_pyarrow() diff --git a/python/tests/test_fasta_reader.py b/python/tests/test_fasta_reader.py index 3c169f0..9f078ac 100644 --- a/python/tests/test_fasta_reader.py +++ b/python/tests/test_fasta_reader.py @@ -55,3 +55,13 @@ def test_fasta_reader_to_arrow(): def test_fasta_reader_no_file(): with pytest.raises(OSError): FastaReader("test.fasta") + + +@pytest.mark.skipif( + not importlib.util.find_spec("polars"), reason="polars not installed" +) +def test_multiple_calls_raise_an_exhausted_error(): + fasta_reader = FastaReader(DATA / "test.fasta") + fasta_reader.to_polars() + with pytest.raises(StopIteration): + fasta_reader.to_polars() diff --git a/src/exon_reader.rs b/src/exon_reader.rs index 2d44ec6..2a8db0b 100644 --- a/src/exon_reader.rs +++ b/src/exon_reader.rs @@ -30,6 +30,7 @@ use tokio::runtime::Runtime; #[pyclass(name = "_ExonReader")] pub struct ExonReader { df: datafusion::dataframe::DataFrame, + exhausted: bool, _runtime: Arc, } @@ -64,7 +65,11 @@ impl ExonReader { }); match df { - Ok(df) => Ok(Self { df, _runtime: rt }), + Ok(df) => Ok(Self { + df, + _runtime: rt, + exhausted: false, + }), Err(e) => Err(e), } } @@ -102,7 +107,12 @@ impl ExonReader { }) } - fn to_pyarrow(&self) -> PyResult { + fn is_exhausted(&self) -> bool { + self.exhausted + } + + #[allow(clippy::wrong_self_convention)] + fn to_pyarrow(&mut self) -> PyResult { let stream = Arc::new(FFI_ArrowArrayStream::empty()); let stream_ptr = Arc::into_raw(stream) as *mut FFI_ArrowArrayStream; @@ -116,6 +126,8 @@ impl ExonReader { .unwrap(); }); + self.exhausted = true; + Python::with_gil(|py| unsafe { match ArrowArrayStreamReader::from_raw(stream_ptr) { Ok(stream_reader) => stream_reader.into_pyarrow(py),