Skip to content

Commit

Permalink
enh(target document tokens): return offsets of chunks that were appen…
Browse files Browse the repository at this point in the history
…ded / prepended (#9571)

Co-authored-by: Henry Fontanier <henry@dust.tt>
  • Loading branch information
fontanierh and Henry Fontanier authored Dec 20, 2024
1 parent 21439c9 commit 99bc958
Showing 1 changed file with 9 additions and 0 deletions.
9 changes: 9 additions & 0 deletions core/src/data_sources/data_source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,11 @@ pub struct Chunk {
pub offset: usize,
pub vector: Option<Vec<f64>>,
pub score: Option<f64>,
// Empty unless search was ran with `target_document_tokens`
// and the chunk's `text` was expanded with content from other chunks.
// In this case, this field will contain the offsets of the chunks that
// were included to produce the chunk's `text`.
pub expanded_offsets: Vec<usize>,
}

/// Document is used as a data-strucutre for insertion into the SQL store (no
Expand Down Expand Up @@ -953,6 +958,7 @@ impl DataSource {
offset: i,
vector: Some(v.vector.clone()),
score: None,
expanded_offsets: vec![],
}
})
.collect::<Vec<_>>();
Expand Down Expand Up @@ -1432,6 +1438,8 @@ impl DataSource {
== chunk.offset
{
let c_offset = parsed_results[counter].1;
chunk.expanded_offsets.push(c_offset);

if chunk.offset < c_offset {
chunk.text.push_str(
&(" ".to_owned()
Expand Down Expand Up @@ -2228,6 +2236,7 @@ fn parse_points_into_chunks(
offset: chunk_offset as usize,
vector: None,
score: maybe_score,
expanded_offsets: vec![],
},
))
})
Expand Down

0 comments on commit 99bc958

Please sign in to comment.