diff --git a/src/bucket.rs b/src/bucket.rs index a764ac9..4fe3359 100644 --- a/src/bucket.rs +++ b/src/bucket.rs @@ -281,10 +281,7 @@ impl Bucket { } pub fn allocate(&mut self, size: u32) -> Option<(u64, u32)> { - avail::remove_elem(&mut self.avail, size).map(|block| { - self.dirty = true; - block - }) + avail::remove_elem(&mut self.avail, size).inspect(|_| self.dirty = true) } pub fn free(&mut self, offset: u64, length: u32) { diff --git a/src/header.rs b/src/header.rs index e236fe2..2505599 100644 --- a/src/header.rs +++ b/src/header.rs @@ -227,10 +227,7 @@ impl Header { } pub fn allocate(&mut self, size: u32) -> Option<(u64, u32)> { - self.avail.remove_elem(size).map(|block| { - self.dirty = true; - block - }) + self.avail.remove_elem(size).inspect(|_| self.dirty = true) } pub fn free(&mut self, offset: u64, length: u32) { diff --git a/src/lib.rs b/src/lib.rs index 8c38eb4..ffd3c00 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -100,8 +100,6 @@ pub struct Gdbm { pub header: Header, pub dir: Directory, bucket_cache: BucketCache, - - iter_key: Option>, } impl Gdbm { @@ -177,7 +175,6 @@ impl Gdbm { header, dir, bucket_cache, - iter_key: None, }) } @@ -238,7 +235,7 @@ impl Gdbm { pub fn import_ascii(&mut self, reader: &mut impl Read) -> io::Result<()> { ASCIIImportIterator::new(reader)?.try_for_each(|l| { let (key, value) = l?; - self.insert(&key, &value).map(|_| ()) + self.insert(key, value).map(|_| ()) }) } @@ -303,7 +300,7 @@ impl Gdbm { BinaryImportIterator::new(alignment, reader)?.try_for_each(|l| { let (key, value) = l?; - self.insert(&key, &value).map(|_| ()) + self.insert(key, value).map(|_| ()) }) } @@ -354,9 +351,19 @@ impl Gdbm { Ok(len) } + // API: get an iterator over values + pub fn values(&mut self) -> impl std::iter::Iterator>> + '_ { + GDBMIterator::new(self, KeyOrValue::Value).map(|data| data.map(|(_, value)| value)) + } + + // API: get an iterator over keys + pub fn keys(&mut self) -> impl std::iter::Iterator>> + '_ { + GDBMIterator::new(self, KeyOrValue::Key).map(|data| data.map(|(key, _)| key)) + } + // API: get an iterator - pub fn iter(&mut self) -> GDBMIterator { - GDBMIterator::new(self) + pub fn iter(&mut self) -> impl std::iter::Iterator, Vec)>> + '_ { + GDBMIterator::new(self, KeyOrValue::Both) } // API: does key exist? @@ -612,13 +619,13 @@ impl Gdbm { Ok(offset) } - fn int_insert(&mut self, key: &[u8], data: &[u8]) -> io::Result<()> { + fn int_insert(&mut self, key: Vec, data: Vec) -> io::Result<()> { let offset = self.allocate_record((key.len() + data.len()) as u32)?; self.f.seek(SeekFrom::Start(offset))?; - self.f.write_all(key)?; - self.f.write_all(data)?; + self.f.write_all(&key)?; + self.f.write_all(&data)?; - let bucket_elem = BucketElement::new(key, data, offset); + let bucket_elem = BucketElement::new(&key, &data, offset); self.cache_load_bucket(bucket_dir(self.header.dir_bits, bucket_elem.hash))?; while self.bucket_cache.current_bucket().unwrap().count == self.header.bucket_elems { @@ -634,26 +641,25 @@ impl Gdbm { Ok(()) } - pub fn insert(&mut self, key: &[u8], data: &[u8]) -> io::Result>> { + pub fn insert(&mut self, key: Vec, data: Vec) -> io::Result>> { self.writeable() - .and_then(|_| self.remove(key)) + .and_then(|_| self.remove(&key)) .and_then(|oldkey| self.int_insert(key, data).map(|_| oldkey)) } - pub fn try_insert(&mut self, key: &[u8], data: &[u8]) -> io::Result<(bool, Option>)> { + pub fn try_insert( + &mut self, + key: Vec, + data: Vec, + ) -> io::Result<(bool, Option>)> { self.writeable() - .and_then(|_| self.get(key)) + .and_then(|_| self.get(&key)) .and_then(|olddata| match olddata { Some(_) => Ok((false, olddata)), _ => self.int_insert(key, data).map(|_| (true, None)), }) } - // API: reset iterator state - pub fn iter_reset(&mut self) { - self.iter_key = None; - } - fn split_bucket(&mut self) -> io::Result<()> { if self.bucket_cache.current_bucket().unwrap().bits == self.header.dir_bits { self.extend_directory()?; @@ -719,11 +725,18 @@ impl Gdbm { } } -pub struct GDBMIterator<'a> { +struct GDBMIterator<'a> { + key_or_value: KeyOrValue, db: &'a mut Gdbm, slot: Option>, } +enum KeyOrValue { + Key, + Value, + Both, +} + #[derive(Debug)] struct Slot { bucket: usize, @@ -768,7 +781,7 @@ impl<'a> GDBMIterator<'a> { None } - fn new(db: &'a mut Gdbm) -> GDBMIterator<'a> { + fn new(db: &'a mut Gdbm, key_or_value: KeyOrValue) -> GDBMIterator<'a> { let slot = { let slot = Slot { bucket: 0, @@ -785,7 +798,11 @@ impl<'a> GDBMIterator<'a> { Err(e) => Some(Err(e)), } }; - Self { db, slot } + Self { + db, + slot, + key_or_value, + } } } @@ -808,12 +825,24 @@ impl<'a> Iterator for GDBMIterator<'a> { .map(|e| (e.data_ofs, e.key_size as usize, e.data_size as usize)) .unwrap() }) - .and_then(|(offset, key_length, data_length)| { - read_ofs(&mut self.db.f, offset, key_length + data_length).map(|data| { - let (key, value) = data.split_at(key_length); - (key.to_vec(), value.to_vec()) - }) - }); + .and_then( + |(offset, key_length, data_length)| match self.key_or_value { + KeyOrValue::Key => read_ofs(&mut self.db.f, offset, key_length) + .map(|data| (data.to_vec(), vec![])), + KeyOrValue::Value => { + read_ofs(&mut self.db.f, offset + key_length as u64, data_length) + .map(|data| (vec![], data.to_vec())) + } + KeyOrValue::Both => { + read_ofs(&mut self.db.f, offset, key_length + data_length).map( + |data| { + let (key, value) = data.split_at(key_length); + (key.to_vec(), value.to_vec()) + }, + ) + } + }, + ); match data { Ok(data) => { diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 672f874..643c7c5 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -20,7 +20,9 @@ pub struct TestInfo { #[allow(dead_code)] pub json_path: String, pub db_path: String, + #[allow(dead_code)] pub is_basic: bool, + #[allow(dead_code)] pub n_records: usize, #[allow(dead_code)] pub metadata: TestMetadata, diff --git a/tests/iterate.rs b/tests/iterate.rs new file mode 100644 index 0000000..96ae803 --- /dev/null +++ b/tests/iterate.rs @@ -0,0 +1,120 @@ +// +// tests/export.rs -- testing GDBM export APIs +// +// Copyright (c) 2019-2024 Jeff Garzik +// +// This file is part of the gdbm-native software project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT + +extern crate gdbm_native; + +mod common; + +use std::collections::{HashMap, HashSet}; + +use common::init_tests; +use gdbm_native::Gdbm; + +#[test] +fn api_iter() { + init_tests() + .into_iter() + .try_for_each(|test| { + let mut keys_and_values = test + .metadata + .data + .iter() + .map(|kv| (kv[0].as_bytes().to_vec(), kv[1].as_bytes().to_vec())) + .collect::>(); + + Gdbm::open(&test.db_path, &test.ro_cfg()) + .map_err(|e| e.to_string()) + .and_then(|mut db| { + db.iter().try_for_each(|kv| { + kv.map_err(|e| e.to_string()).and_then(|(k, v)| { + (keys_and_values.remove(&k.to_vec()) == Some(v)) + .then_some(()) + .ok_or_else(|| format!("key {:?} not in metadata", k)) + }) + }) + }) + .and_then(|_| { + keys_and_values + .is_empty() + .then_some(()) + .ok_or_else(|| "iteration missed some keys".to_string()) + }) + .map_err(|e| format!("[{}]: {}", test.db_path, e)) + }) + .unwrap_or_else(|e| panic!("{}", e)); +} + +#[test] +fn api_keys() { + init_tests() + .into_iter() + .try_for_each(|test| { + let mut keys = test + .metadata + .data + .iter() + .map(|kv| (kv[0].as_bytes().to_vec())) + .collect::>(); + + Gdbm::open(&test.db_path, &test.ro_cfg()) + .map_err(|e| e.to_string()) + .and_then(|mut db| { + db.keys().try_for_each(|kv| { + kv.map_err(|e| e.to_string()).and_then(|k| { + keys.remove(&k.to_vec()) + .then_some(()) + .ok_or_else(|| format!("key {:?} not in metadata", k)) + }) + }) + }) + .and_then(|_| { + keys.is_empty() + .then_some(()) + .ok_or_else(|| "iteration missed some keys".to_string()) + }) + .map_err(|e| format!("[{}]: {}", test.db_path, e)) + }) + .unwrap_or_else(|e| panic!("{}", e)); +} + +#[test] +fn api_values() { + init_tests() + .into_iter() + .try_for_each(|test| { + let mut values = test + .metadata + .data + .iter() + .map(|kv| (kv[1].as_bytes().to_vec())) + .collect::>(); + + Gdbm::open(&test.db_path, &test.ro_cfg()) + .map_err(|e| e.to_string()) + .and_then(|mut db| { + db.values().try_for_each(|kv| { + kv.map_err(|e| e.to_string()).and_then(|k| { + values + .remove(&k.to_vec()) + .then_some(()) + .ok_or_else(|| format!("value {:?} not in metadata", k)) + }) + }) + }) + .and_then(|_| { + values + .is_empty() + .then_some(()) + .ok_or_else(|| "iteration missed some values".to_string()) + }) + .map_err(|e| format!("[{}]: {}", test.db_path, e)) + }) + .unwrap_or_else(|e| panic!("{}", e)); +} diff --git a/tests/update.rs b/tests/update.rs index cb76299..7a37a3d 100644 --- a/tests/update.rs +++ b/tests/update.rs @@ -76,7 +76,7 @@ fn api_insert() { let key = format!("key {}", n); let value = format!("value {}", n); - db.insert(key.as_bytes(), value.as_bytes()) + db.insert(key.as_bytes().to_vec(), value.as_bytes().to_vec()) .map_err(|e| { format!("inserting key \"{}\" with value \"{}\": {}", key, value, e) }) @@ -94,7 +94,7 @@ fn api_insert() { let key = format!("key {}", n); let value = format!("value {}", n); - db.try_insert(key.as_bytes(), value.as_bytes()) + db.try_insert(key.as_bytes().to_vec(), value.as_bytes().to_vec()) .map_err(|e| { format!("inserting key \"{}\" with value \"{}\": {}", key, value, e) })