Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add exhausted check to avoid panic #43

Merged
merged 2 commits into from
Jul 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,5 @@ jobs:

# Develop and test with extras
maturin develop --extras polars
pip install -e ".[polars]"
pytest
49 changes: 44 additions & 5 deletions python/biobear/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,34 @@


class Reader(ABC):
"""The abstract reader class."""
"""An abstract base class (ABC) representing a reader.

The class defines basic functionalities for conversion, but the specifics must be
implemented in a subclass.
"""

@property
@abstractmethod
def inner(self):
"""Return the inner reader."""
"""Abstract property for the inner reader.

Returns:
The inner reader. The type of the reader is defined by the specific
subclass.
"""

def to_polars(self):
"""Read the fasta file and return a polars DataFrame."""
"""Convert the inner data to a Polars DataFrame.

This method first converts the inner reader's data to an Arrow table,
then to a Python dictionary, and finally to a Polars DataFrame.

Returns:
pl.DataFrame: The converted data in a Polars DataFrame.

Raises:
ImportError: If the 'polars' package is not installed.
"""
try:
import polars as pl
except ImportError as import_error:
Expand All @@ -27,9 +46,29 @@ def to_polars(self):
return pl.from_dict(pydict)

def to_arrow_scanner(self) -> ds.Scanner:
"""Convert the fasta reader to an arrow scanner."""
"""Convert the inner data to an Arrow scanner.

This method first converts the inner reader's data to Arrow batches,
and then forms a scanner from these batches.

Returns:
ds.Scanner: The converted data in an Arrow scanner.
"""
return ds.Scanner.from_batches(self.to_arrow())

def to_arrow(self) -> pa.RecordBatchReader:
"""Convert the fasta reader to an arrow batch reader."""
"""Convert the inner data to an Arrow record batch reader.

If the inner reader is exhausted, this method raises an exception.
Otherwise, it converts the inner reader's data to an Arrow record batch.

Returns:
pa.RecordBatchReader: The converted data in an Arrow record batch reader.

Raises:
StopIteration: If the inner reader is exhausted.
"""
if self.inner.is_exhausted():
raise StopIteration("The reader is exhausted.")

return self.inner.to_pyarrow()
10 changes: 10 additions & 0 deletions python/tests/test_fasta_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,13 @@ def test_fasta_reader_to_arrow():
def test_fasta_reader_no_file():
with pytest.raises(OSError):
FastaReader("test.fasta")


@pytest.mark.skipif(
not importlib.util.find_spec("polars"), reason="polars not installed"
)
def test_multiple_calls_raise_an_exhausted_error():
fasta_reader = FastaReader(DATA / "test.fasta")
fasta_reader.to_polars()
with pytest.raises(StopIteration):
fasta_reader.to_polars()
16 changes: 14 additions & 2 deletions src/exon_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ use tokio::runtime::Runtime;
#[pyclass(name = "_ExonReader")]
pub struct ExonReader {
df: datafusion::dataframe::DataFrame,
exhausted: bool,
_runtime: Arc<Runtime>,
}

Expand Down Expand Up @@ -64,7 +65,11 @@ impl ExonReader {
});

match df {
Ok(df) => Ok(Self { df, _runtime: rt }),
Ok(df) => Ok(Self {
df,
_runtime: rt,
exhausted: false,
}),
Err(e) => Err(e),
}
}
Expand Down Expand Up @@ -102,7 +107,12 @@ impl ExonReader {
})
}

fn to_pyarrow(&self) -> PyResult<PyObject> {
fn is_exhausted(&self) -> bool {
self.exhausted
}

#[allow(clippy::wrong_self_convention)]
fn to_pyarrow(&mut self) -> PyResult<PyObject> {
let stream = Arc::new(FFI_ArrowArrayStream::empty());
let stream_ptr = Arc::into_raw(stream) as *mut FFI_ArrowArrayStream;

Expand All @@ -116,6 +126,8 @@ impl ExonReader {
.unwrap();
});

self.exhausted = true;

Python::with_gil(|py| unsafe {
match ArrowArrayStreamReader::from_raw(stream_ptr) {
Ok(stream_reader) => stream_reader.into_pyarrow(py),
Expand Down
Loading