diff --git a/crates/re_arrow_store/src/store.rs b/crates/re_arrow_store/src/store.rs index 218d2cabdd6b..8431ebe84646 100644 --- a/crates/re_arrow_store/src/store.rs +++ b/crates/re_arrow_store/src/store.rs @@ -561,8 +561,6 @@ impl Default for IndexedBucketInner { /// ```text /// cargo test -p re_arrow_store -- --nocapture datastore_internal_repr /// ``` -// -// TODO(#1807): timeless should be row-id ordered too then #[derive(Debug)] pub struct PersistentIndexedTable { /// The entity this table is related to, for debugging purposes. diff --git a/crates/re_arrow_store/src/store_gc.rs b/crates/re_arrow_store/src/store_gc.rs index b32e31df40d6..ad8b92e7d5a6 100644 --- a/crates/re_arrow_store/src/store_gc.rs +++ b/crates/re_arrow_store/src/store_gc.rs @@ -300,10 +300,6 @@ impl DataStore { } /// For each `EntityPath`, `Timeline`, `Component` find the N latest [`RowId`]s. - /// - /// These are the rows that must be protected so as not to impact a latest-at query. - /// Note that latest for Timeless is currently based on insertion-order rather than - /// tuid. [See: #1807](https://github.com/rerun-io/rerun/issues/1807) // // TODO(jleibs): More complex functionality might required expanding this to also // *ignore* specific entities, components, timelines, etc. for this protection. @@ -366,7 +362,6 @@ impl DataStore { } // Find all protected rows in timeless tables - // TODO(#1807): this is still based on insertion order. for table in self.timeless_tables.values() { let cluster_key = table.cluster_key; let table = table.inner.read(); @@ -693,43 +688,39 @@ impl PersistentIndexedTable { cluster_key: _, inner, } = self; + + let inner = &mut *inner.write(); + inner.sort(); + let PersistentIndexedTableInner { col_insert_id, col_row_id, col_num_instances, columns, - is_sorted: _, - } = &mut *inner.write(); + is_sorted, + } = inner; let mut diff: Option = None; - // TODO(#1807): Timeless data isn't sorted, so we need to do a full scan here. - // Speed this up when we implement #1807. - if let Some(row_index) = col_row_id - .iter() - .enumerate() - .find(|(_, r)| **r == row_id) - .map(|(index, _)| index) - { + if let Ok(row_index) = col_row_id.binary_search(&row_id) { + *is_sorted = row_index.saturating_add(1) == col_row_id.len(); + // col_row_id - // TODO(jleibs) Use swap_remove once we have a notion of sorted - let removed_row_id = col_row_id.remove(row_index); + let removed_row_id = col_row_id.swap_remove(row_index); debug_assert_eq!(row_id, removed_row_id); dropped_num_bytes += removed_row_id.total_size_bytes(); // col_insert_id (if present) if !col_insert_id.is_empty() { - // TODO(jleibs) Use swap_remove once we have a notion of sorted - dropped_num_bytes += col_insert_id.remove(row_index).total_size_bytes(); + dropped_num_bytes += col_insert_id.swap_remove(row_index).total_size_bytes(); } // col_num_instances - // TODO(jleibs) Use swap_remove once we have a notion of sorted - dropped_num_bytes += col_num_instances.remove(row_index).total_size_bytes(); + dropped_num_bytes += col_num_instances.swap_remove(row_index).total_size_bytes(); // each data column for column in columns.values_mut() { - let cell = column.0.remove(row_index); + let cell = column.0.swap_remove(row_index); // TODO(#1809): once datatype deduplication is in, we should really not count // autogenerated keys as part of the memory stats (same on write path).