Skip to content

Commit

Permalink
Merge pull request #84 from drystone/iterator
Browse files Browse the repository at this point in the history
Iterator
  • Loading branch information
jgarzik authored Oct 22, 2024
2 parents 702efe4 + 56477e2 commit 2e9fb01
Show file tree
Hide file tree
Showing 6 changed files with 184 additions and 39 deletions.
5 changes: 1 addition & 4 deletions src/bucket.rs
Original file line number Diff line number Diff line change
Expand Up @@ -281,10 +281,7 @@ impl Bucket {
}

pub fn allocate(&mut self, size: u32) -> Option<(u64, u32)> {
avail::remove_elem(&mut self.avail, size).map(|block| {
self.dirty = true;
block
})
avail::remove_elem(&mut self.avail, size).inspect(|_| self.dirty = true)
}

pub fn free(&mut self, offset: u64, length: u32) {
Expand Down
5 changes: 1 addition & 4 deletions src/header.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,10 +227,7 @@ impl Header {
}

pub fn allocate(&mut self, size: u32) -> Option<(u64, u32)> {
self.avail.remove_elem(size).map(|block| {
self.dirty = true;
block
})
self.avail.remove_elem(size).inspect(|_| self.dirty = true)
}

pub fn free(&mut self, offset: u64, length: u32) {
Expand Down
87 changes: 58 additions & 29 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,6 @@ pub struct Gdbm {
pub header: Header,
pub dir: Directory,
bucket_cache: BucketCache,

iter_key: Option<Vec<u8>>,
}

impl Gdbm {
Expand Down Expand Up @@ -177,7 +175,6 @@ impl Gdbm {
header,
dir,
bucket_cache,
iter_key: None,
})
}

Expand Down Expand Up @@ -238,7 +235,7 @@ impl Gdbm {
pub fn import_ascii(&mut self, reader: &mut impl Read) -> io::Result<()> {
ASCIIImportIterator::new(reader)?.try_for_each(|l| {
let (key, value) = l?;
self.insert(&key, &value).map(|_| ())
self.insert(key, value).map(|_| ())
})
}

Expand Down Expand Up @@ -303,7 +300,7 @@ impl Gdbm {

BinaryImportIterator::new(alignment, reader)?.try_for_each(|l| {
let (key, value) = l?;
self.insert(&key, &value).map(|_| ())
self.insert(key, value).map(|_| ())
})
}

Expand Down Expand Up @@ -354,9 +351,19 @@ impl Gdbm {
Ok(len)
}

// API: get an iterator over values
pub fn values(&mut self) -> impl std::iter::Iterator<Item = io::Result<Vec<u8>>> + '_ {
GDBMIterator::new(self, KeyOrValue::Value).map(|data| data.map(|(_, value)| value))
}

// API: get an iterator over keys
pub fn keys(&mut self) -> impl std::iter::Iterator<Item = io::Result<Vec<u8>>> + '_ {
GDBMIterator::new(self, KeyOrValue::Key).map(|data| data.map(|(key, _)| key))
}

// API: get an iterator
pub fn iter(&mut self) -> GDBMIterator {
GDBMIterator::new(self)
pub fn iter(&mut self) -> impl std::iter::Iterator<Item = io::Result<(Vec<u8>, Vec<u8>)>> + '_ {
GDBMIterator::new(self, KeyOrValue::Both)
}

// API: does key exist?
Expand Down Expand Up @@ -612,13 +619,13 @@ impl Gdbm {
Ok(offset)
}

fn int_insert(&mut self, key: &[u8], data: &[u8]) -> io::Result<()> {
fn int_insert(&mut self, key: Vec<u8>, data: Vec<u8>) -> io::Result<()> {
let offset = self.allocate_record((key.len() + data.len()) as u32)?;
self.f.seek(SeekFrom::Start(offset))?;
self.f.write_all(key)?;
self.f.write_all(data)?;
self.f.write_all(&key)?;
self.f.write_all(&data)?;

let bucket_elem = BucketElement::new(key, data, offset);
let bucket_elem = BucketElement::new(&key, &data, offset);
self.cache_load_bucket(bucket_dir(self.header.dir_bits, bucket_elem.hash))?;

while self.bucket_cache.current_bucket().unwrap().count == self.header.bucket_elems {
Expand All @@ -634,26 +641,25 @@ impl Gdbm {
Ok(())
}

pub fn insert(&mut self, key: &[u8], data: &[u8]) -> io::Result<Option<Vec<u8>>> {
pub fn insert(&mut self, key: Vec<u8>, data: Vec<u8>) -> io::Result<Option<Vec<u8>>> {
self.writeable()
.and_then(|_| self.remove(key))
.and_then(|_| self.remove(&key))
.and_then(|oldkey| self.int_insert(key, data).map(|_| oldkey))
}

pub fn try_insert(&mut self, key: &[u8], data: &[u8]) -> io::Result<(bool, Option<Vec<u8>>)> {
pub fn try_insert(
&mut self,
key: Vec<u8>,
data: Vec<u8>,
) -> io::Result<(bool, Option<Vec<u8>>)> {
self.writeable()
.and_then(|_| self.get(key))
.and_then(|_| self.get(&key))
.and_then(|olddata| match olddata {
Some(_) => Ok((false, olddata)),
_ => self.int_insert(key, data).map(|_| (true, None)),
})
}

// API: reset iterator state
pub fn iter_reset(&mut self) {
self.iter_key = None;
}

fn split_bucket(&mut self) -> io::Result<()> {
if self.bucket_cache.current_bucket().unwrap().bits == self.header.dir_bits {
self.extend_directory()?;
Expand Down Expand Up @@ -719,11 +725,18 @@ impl Gdbm {
}
}

pub struct GDBMIterator<'a> {
struct GDBMIterator<'a> {
key_or_value: KeyOrValue,
db: &'a mut Gdbm,
slot: Option<io::Result<Slot>>,
}

enum KeyOrValue {
Key,
Value,
Both,
}

#[derive(Debug)]
struct Slot {
bucket: usize,
Expand Down Expand Up @@ -768,7 +781,7 @@ impl<'a> GDBMIterator<'a> {
None
}

fn new(db: &'a mut Gdbm) -> GDBMIterator<'a> {
fn new(db: &'a mut Gdbm, key_or_value: KeyOrValue) -> GDBMIterator<'a> {
let slot = {
let slot = Slot {
bucket: 0,
Expand All @@ -785,7 +798,11 @@ impl<'a> GDBMIterator<'a> {
Err(e) => Some(Err(e)),
}
};
Self { db, slot }
Self {
db,
slot,
key_or_value,
}
}
}

Expand All @@ -808,12 +825,24 @@ impl<'a> Iterator for GDBMIterator<'a> {
.map(|e| (e.data_ofs, e.key_size as usize, e.data_size as usize))
.unwrap()
})
.and_then(|(offset, key_length, data_length)| {
read_ofs(&mut self.db.f, offset, key_length + data_length).map(|data| {
let (key, value) = data.split_at(key_length);
(key.to_vec(), value.to_vec())
})
});
.and_then(
|(offset, key_length, data_length)| match self.key_or_value {
KeyOrValue::Key => read_ofs(&mut self.db.f, offset, key_length)
.map(|data| (data.to_vec(), vec![])),
KeyOrValue::Value => {
read_ofs(&mut self.db.f, offset + key_length as u64, data_length)
.map(|data| (vec![], data.to_vec()))
}
KeyOrValue::Both => {
read_ofs(&mut self.db.f, offset, key_length + data_length).map(
|data| {
let (key, value) = data.split_at(key_length);
(key.to_vec(), value.to_vec())
},
)
}
},
);

match data {
Ok(data) => {
Expand Down
2 changes: 2 additions & 0 deletions tests/common/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ pub struct TestInfo {
#[allow(dead_code)]
pub json_path: String,
pub db_path: String,
#[allow(dead_code)]
pub is_basic: bool,
#[allow(dead_code)]
pub n_records: usize,
#[allow(dead_code)]
pub metadata: TestMetadata,
Expand Down
120 changes: 120 additions & 0 deletions tests/iterate.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
//
// tests/export.rs -- testing GDBM export APIs
//
// Copyright (c) 2019-2024 Jeff Garzik
//
// This file is part of the gdbm-native software project covered under
// the MIT License. For the full license text, please see the LICENSE
// file in the root directory of this project.
// SPDX-License-Identifier: MIT

extern crate gdbm_native;

mod common;

use std::collections::{HashMap, HashSet};

use common::init_tests;
use gdbm_native::Gdbm;

#[test]
fn api_iter() {
init_tests()
.into_iter()
.try_for_each(|test| {
let mut keys_and_values = test
.metadata
.data
.iter()
.map(|kv| (kv[0].as_bytes().to_vec(), kv[1].as_bytes().to_vec()))
.collect::<HashMap<_, _>>();

Gdbm::open(&test.db_path, &test.ro_cfg())
.map_err(|e| e.to_string())
.and_then(|mut db| {
db.iter().try_for_each(|kv| {
kv.map_err(|e| e.to_string()).and_then(|(k, v)| {
(keys_and_values.remove(&k.to_vec()) == Some(v))
.then_some(())
.ok_or_else(|| format!("key {:?} not in metadata", k))
})
})
})
.and_then(|_| {
keys_and_values
.is_empty()
.then_some(())
.ok_or_else(|| "iteration missed some keys".to_string())
})
.map_err(|e| format!("[{}]: {}", test.db_path, e))
})
.unwrap_or_else(|e| panic!("{}", e));
}

#[test]
fn api_keys() {
init_tests()
.into_iter()
.try_for_each(|test| {
let mut keys = test
.metadata
.data
.iter()
.map(|kv| (kv[0].as_bytes().to_vec()))
.collect::<HashSet<_>>();

Gdbm::open(&test.db_path, &test.ro_cfg())
.map_err(|e| e.to_string())
.and_then(|mut db| {
db.keys().try_for_each(|kv| {
kv.map_err(|e| e.to_string()).and_then(|k| {
keys.remove(&k.to_vec())
.then_some(())
.ok_or_else(|| format!("key {:?} not in metadata", k))
})
})
})
.and_then(|_| {
keys.is_empty()
.then_some(())
.ok_or_else(|| "iteration missed some keys".to_string())
})
.map_err(|e| format!("[{}]: {}", test.db_path, e))
})
.unwrap_or_else(|e| panic!("{}", e));
}

#[test]
fn api_values() {
init_tests()
.into_iter()
.try_for_each(|test| {
let mut values = test
.metadata
.data
.iter()
.map(|kv| (kv[1].as_bytes().to_vec()))
.collect::<HashSet<_>>();

Gdbm::open(&test.db_path, &test.ro_cfg())
.map_err(|e| e.to_string())
.and_then(|mut db| {
db.values().try_for_each(|kv| {
kv.map_err(|e| e.to_string()).and_then(|k| {
values
.remove(&k.to_vec())
.then_some(())
.ok_or_else(|| format!("value {:?} not in metadata", k))
})
})
})
.and_then(|_| {
values
.is_empty()
.then_some(())
.ok_or_else(|| "iteration missed some values".to_string())
})
.map_err(|e| format!("[{}]: {}", test.db_path, e))
})
.unwrap_or_else(|e| panic!("{}", e));
}
4 changes: 2 additions & 2 deletions tests/update.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ fn api_insert() {
let key = format!("key {}", n);
let value = format!("value {}", n);

db.insert(key.as_bytes(), value.as_bytes())
db.insert(key.as_bytes().to_vec(), value.as_bytes().to_vec())
.map_err(|e| {
format!("inserting key \"{}\" with value \"{}\": {}", key, value, e)
})
Expand All @@ -94,7 +94,7 @@ fn api_insert() {
let key = format!("key {}", n);
let value = format!("value {}", n);

db.try_insert(key.as_bytes(), value.as_bytes())
db.try_insert(key.as_bytes().to_vec(), value.as_bytes().to_vec())
.map_err(|e| {
format!("inserting key \"{}\" with value \"{}\": {}", key, value, e)
})
Expand Down

0 comments on commit 2e9fb01

Please sign in to comment.