Skip to content

Commit

Permalink
Improve documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
torrancew committed Jul 15, 2024
1 parent 6e4f44c commit 2cd0075
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 12 deletions.
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ version = "0.1.1-dev"
edition = "2021"
license = "MIT"
description = "Rust bindings for the Xapian search engine"
documentation = "https://torrancew.github.io/xapian-rs"
rust-version = "1.70"

[dependencies]
autocxx = "0.27.0"
Expand Down
7 changes: 5 additions & 2 deletions src/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use std::{path::Path, pin::Pin};
use autocxx::{cxx, prelude::*};
use bytes::Bytes;

/// A read-only Xapian database
pub struct Database(Pin<Box<ffi::Database>>);

impl Database {
Expand All @@ -19,11 +20,12 @@ impl Database {
self.0.as_mut().close()
}

/// Get the number of documents stored in the database
pub fn doc_count(&self) -> u32 {
self.0.get_doccount().into()
}

// Detect whether a given term exists in the database
/// Detect whether a given term exists in the database
pub fn term_exists(&self, term: impl AsRef<[u8]>) -> bool {
cxx::let_cxx_string!(term = term);
self.0.term_exists(&term)
Expand Down Expand Up @@ -54,10 +56,11 @@ impl From<WritableDatabase> for Database {
}
}

/// A Xapian database that can be read or written to
pub struct WritableDatabase(Pin<Box<ffi::WritableDatabase>>);

impl Default for WritableDatabase {
/// Open a new, in-memory WritableDatabase
/// Open a new, in-memory [`WritableDatabase`]
fn default() -> Self {
Self::inmemory()
}
Expand Down
26 changes: 26 additions & 0 deletions src/doc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,21 @@ use std::{
use autocxx::{cxx, prelude::*};
use bytes::Bytes;

/// A document in a Xapian database
pub struct Document(Pin<Box<ffi::Document>>);

impl Document {
pub(crate) fn new(ptr: Pin<Box<ffi::Document>>) -> Self {
Self(ptr)
}

/// Add a boolean term to the document
pub fn add_boolean_term(&mut self, term: impl AsRef<str>) {
cxx::let_cxx_string!(term = term.as_ref());
self.0.as_mut().add_boolean_term(&term)
}

/// Add an occurrence of `term` at the position given by `pos`
pub fn add_posting(
&mut self,
term: impl AsRef<str>,
Expand All @@ -32,6 +35,7 @@ impl Document {
.add_posting(&term, pos, increment.into().unwrap_or(1.into()))
}

/// Add a term to the document, without positional information
pub fn add_term(
&mut self,
term: impl AsRef<str>,
Expand All @@ -43,42 +47,64 @@ impl Document {
.add_term(&term, increment.into().unwrap_or(1.into()))
}

/// Remove all terms and postings from the document
pub fn clear_terms(&mut self) {
self.0.as_mut().clear_terms()
}

/// Get the data blob stored in this document
pub fn data(&self) -> Bytes {
ffi::cxx_bytes(&self.0.get_data())
}

/// Get the document ID (if any) associated with this document
pub fn id(&self) -> Option<crate::DocId> {
crate::DocId::new(self.0.get_docid())
}

/// Remove `term` and all postings associated with it from this document
pub fn remove_term(&mut self, term: impl AsRef<str>) {
cxx::let_cxx_string!(term = term.as_ref());
self.0.as_mut().remove_term(&term)
}

/// Set the data blob stored alongside this document
pub fn set_data(&mut self, data: impl AsRef<[u8]>) {
cxx::let_cxx_string!(data = data);
self.0.as_mut().set_data(&data);
}

/// Set the value stored in the given slot number
///
/// Xapian values are stored as strings, but are often more useful in some other form.
/// To accomodate this, [`ToValue`][crate::ToValue] is used to serialize data in a
/// Xapian-friendly fashion. This trait is already implemented for most numeric primitives,
/// string types and byte collections.
pub fn set_value(&mut self, slot: impl Into<crate::Slot>, value: impl crate::ToValue) {
cxx::let_cxx_string!(value = value.serialize());
self.0
.as_mut()
.add_value(ffi::valueno::from(slot.into()), &value)
}

/// Retrieve an iterator over the terms in this document
pub fn terms(&self) -> crate::iter::TermIter {
crate::iter::TermIter::new(
self.0.termlist_begin().within_box(),
self.0.termlist_end().within_box(),
)
}

/// Retrieve the value (if any) stored in the given slot number
///
/// Xapian values are stored as strings, but are often more useful in some other form.
/// To accomodate this, [`FromValue`][crate::FromValue] is used to deserialize data
/// from its Xapian representation. This trait is already implemented for most numeric
/// primitives, string types and byte collections.
///
/// Returns `None` when there is no value stored in `slot`
/// Returns `Some(Err(T::Error)` when there is a value but deserialization fails
/// Returns `Some(Ok(T))` otherwise
pub fn value<T: crate::FromValue>(
&self,
slot: impl Into<crate::Slot>,
Expand Down
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#![doc = include_str!("../README.md")]
mod db;

use std::num::NonZeroU32;
Expand Down Expand Up @@ -59,6 +60,7 @@ impl From<DocId> for u32 {
}
}

/// A newtype wrapper representing a valid document position
#[derive(Debug)]
pub struct Position(ffi::termpos);

Expand Down
2 changes: 2 additions & 0 deletions src/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ impl From<ffi::Query_op> for Operator {
}
}

/// A parsed query, ready for use in a search
#[derive(Clone)]
pub struct Query(Pin<Box<ffi::Query>>);

Expand Down Expand Up @@ -223,6 +224,7 @@ impl Display for Query {
}
}

/// A type for building [`Query`] objects from strings
pub struct QueryParser(Pin<Box<ffi::QueryParser>>);

impl QueryParser {
Expand Down
58 changes: 49 additions & 9 deletions src/search.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
use crate::ffi;
use crate::{ffi, DocId};

use std::{
cell::{Ref, RefCell},
fmt::Debug,
ops::Deref,
pin::Pin,
rc::Rc,
string::FromUtf8Error,
};

use autocxx::{cxx, prelude::*};
Expand Down Expand Up @@ -38,23 +37,30 @@ impl DateRangeProcessor {
)
}

pub fn upcast(&mut self) -> Pin<&mut ffi::RangeProcessor> {
pub(crate) fn upcast(&mut self) -> Pin<&mut ffi::RangeProcessor> {
unsafe { ffi::upcast(self.0.as_mut()) }
}
}

/// The primary interface to retrieve information from Xapian.
///
/// Used to perform searches, faceting, term iteration, expansion, sorting, relevancy and more.
pub struct Enquire(Pin<Box<ffi::Enquire>>);

impl Enquire {
pub fn new(db: impl AsRef<ffi::Database>) -> Self {
Self(ffi::Enquire::new2(db.as_ref()).within_box())
}

/// Attach a [`MatchSpy`] implementation to this `Enquire`
///
/// Instances of `MatchSpy` can be used to implement faceting
pub fn add_matchspy<T: crate::MatchSpy + Clone + 'static>(&mut self, spy: &T) {
let spy = spy.clone().into_ffi();
unsafe { ffi::shim::enquire_add_matchspy(self.0.as_mut(), spy.upcast()) }
}

/// Retrieve the [`MSet`] for the current [`Query`][crate::Query]
pub fn mset(
&self,
first: u32,
Expand Down Expand Up @@ -84,10 +90,12 @@ impl Enquire {
)
}

/// Retrieve the query currently associated with this Enquire instance
pub fn query(&self) -> crate::Query {
crate::Query::from_ffi(ffi::shim::query_clone(self.0.get_query()).within_box())
}

/// Set the query currently associated with this Enquire instance
pub fn set_query(&mut self, query: impl AsRef<ffi::Query>, qlen: impl Into<Option<u32>>) {
self.0
.as_mut()
Expand All @@ -101,6 +109,7 @@ impl AsRef<ffi::Enquire> for Enquire {
}
}

/// An individual match item from the iterator yielded by [`MSet::matches`]
#[derive(Clone)]
pub struct Match {
value: ffi::docid,
Expand All @@ -113,22 +122,27 @@ impl Match {
Self { value, ptr }
}

pub fn docid(&self) -> u32 {
self.value.into()
/// Retrieve the [`DocId`][crate::DocId] associated with this Match
pub fn docid(&self) -> DocId {
unsafe { DocId::new_unchecked(self.value) }
}

/// Retrieve the [`Document`][crate::Document] associated with this Match
pub fn document(&self) -> crate::Document {
crate::Document::new(self.ptr.get_document().within_box())
}

/// Retrieve the weight of this Match, represented as a percentage
pub fn percent(&self) -> i32 {
self.ptr.get_percent().into()
}

/// Retrieve the [`MSet`] rank of this Match
pub fn rank(&self) -> u32 {
self.ptr.get_rank().into()
}

/// Retrieve the weight of this Match
pub fn weight(&self) -> f64 {
self.ptr.get_weight()
}
Expand All @@ -154,9 +168,12 @@ impl PartialEq for Match {
}
}

/// A [`MatchDecider`] can be used to reject documents from an [`MSet`]
pub trait MatchDecider {
/// Decide whether this document should be included in the `MSet`
fn is_match(&self, doc: &crate::Document) -> bool;

#[doc(hidden)]
fn into_ffi(self) -> &'static MatchDeciderWrapper
where
Self: Sized + 'static,
Expand All @@ -165,6 +182,7 @@ pub trait MatchDecider {
}
}

#[doc(hidden)]
pub struct MatchDeciderWrapper(Rc<RefCell<ffi::RustMatchDecider>>);

impl MatchDeciderWrapper {
Expand All @@ -179,21 +197,30 @@ impl<T: MatchDecider + 'static> From<T> for MatchDeciderWrapper {
}
}

/// A [`MatchSpy`] can be used to accumulate information seen during the match.
///
/// Useful for faceting and generally profiling matching documents
pub trait MatchSpy {
/// Process this [`Document`][crate::Document]
///
/// Used to collect any desired data/metadata from the document
fn observe(&self, doc: &crate::Document, weight: f64);

#[doc(hidden)]
fn into_ffi(self) -> &'static mut MatchSpyWrapper
where
Self: Sized + 'static,
{
Box::leak(Box::new(MatchSpyWrapper::from(self)))
}

/// An optional, human-friendly name for the MatchSpy
fn name(&self) -> Option<String> {
None
}
}

#[doc(hidden)]
pub struct MatchSpyWrapper(Rc<RefCell<ffi::RustMatchSpy>>);

impl MatchSpyWrapper {
Expand All @@ -209,6 +236,7 @@ impl<T: MatchSpy + 'static> From<T> for MatchSpyWrapper {
}
}

/// A list of search results with associated metadata
pub struct MSet(Pin<Box<ffi::MSet>>);

impl MSet {
Expand All @@ -224,14 +252,17 @@ impl MSet {
self.0.end().within_box()
}

/// Convert a weight to a percentage, taking into account weighted query terms
pub fn convert_to_percent(&self, weight: f64) -> i32 {
self.0.convert_to_percent(weight).into()
}

/// Detects whether this `MSet` is empty
pub fn empty(&self) -> bool {
self.0.empty()
}

/// Retrieve the iterator of [`Match`] objects for this `MSet`
pub fn matches(&self) -> crate::iter::MSetIter {
crate::iter::MSetIter::new(self)
}
Expand All @@ -240,6 +271,14 @@ impl MSet {
self.0.size().into()
}

/// Generate a snippet from the provided `text`
///
/// `length` controls the size of the snippet
/// `stemmer` should be an instance of the same stemming algorithm used to build the query
/// `flags` are used to control specific bits of functionality
/// `hl` is an optional pair of string-likes used to highlight matches within the snippet, for use in markup
/// `omit` is used to indicate any truncated prefix or suffix
/// mid-sen
pub fn snippet<T, U, V>(
&self,
text: impl AsRef<str>,
Expand All @@ -248,7 +287,7 @@ impl MSet {
flags: u32,
hl: impl Into<Option<(T, U)>>,
omit: impl Into<Option<V>>,
) -> Result<String, FromUtf8Error>
) -> String
where
T: AsRef<str> + Default,
U: AsRef<str> + Default,
Expand All @@ -269,9 +308,10 @@ impl MSet {
&omit,
);

String::from_utf8(Vec::from(text.as_bytes()))
text.to_string()
}

/// Get the number of documents which `term` occurs in
pub fn termfreq(&self, term: impl AsRef<str>) -> u32 {
cxx::let_cxx_string!(term = term.as_ref());
self.0.get_termfreq(&term).into()
Expand Down Expand Up @@ -311,7 +351,7 @@ impl NumberRangeProcessor {
)
}

pub fn upcast(&mut self) -> Pin<&mut ffi::RangeProcessor> {
pub(crate) fn upcast(&mut self) -> Pin<&mut ffi::RangeProcessor> {
unsafe { ffi::upcast(self.0.as_mut()) }
}
}
Expand Down Expand Up @@ -375,7 +415,7 @@ impl RangeProcessor {
)
}

pub fn upcast(&mut self) -> Pin<&mut ffi::RangeProcessor> {
pub(crate) fn upcast(&mut self) -> Pin<&mut ffi::RangeProcessor> {
self.0.as_mut()
}
}
Loading

0 comments on commit 2cd0075

Please sign in to comment.