Skip to content

Commit

Permalink
feat(homeserver): optimize chunk size for LMDB details
Browse files Browse the repository at this point in the history
  • Loading branch information
Nuhvi committed Oct 16, 2024
1 parent 8dde7b1 commit fd88986
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 4 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pubky-homeserver/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ flume = "0.11.0"
futures-util = "0.3.30"
heed = "0.20.3"
hex = "0.4.3"
libc = "0.2.159"
pkarr = { workspace = true }
postcard = { version = "1.0.8", features = ["alloc"] }
pubky-common = { version = "0.1.0", path = "../pubky-common" }
Expand Down
17 changes: 17 additions & 0 deletions pubky-homeserver/src/database.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ pub struct DB {
pub(crate) tables: Tables,
pub(crate) config: Config,
pub(crate) buffers_dir: PathBuf,
pub(crate) max_chunk_size: usize,
}

impl DB {
Expand All @@ -39,8 +40,24 @@ impl DB {
tables,
config,
buffers_dir,
max_chunk_size: max_chunk_size(),
};

Ok(db)
}
}

/// calculate optimal chunk size:
/// - https://lmdb.readthedocs.io/en/release/#storage-efficiency-limits
/// - https://github.com/lmdbjava/benchmarks/blob/master/results/20160710/README.md#test-2-determine-24816-kb-byte-values
fn max_chunk_size() -> usize {
let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) as usize };

// - 16 bytes Header per page (LMDB)
// - Each page has to contain 2 records
// - 8 bytes per record (LMDB)
// - 12 bytes key:
// - timestamp : 8 bytes
// - chunk index: 4 bytes
((page_size - 16) / 2) - 8 - 12
}
5 changes: 1 addition & 4 deletions pubky-homeserver/src/database/tables/entries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -350,9 +350,6 @@ impl<'db> EntryWriter<'db> {
pub fn commit(&self) -> anyhow::Result<Entry> {
let hash = self.hasher.finalize();

// TODO: get the chunk size from the OS's page size
let chunk_size: usize = 2000;

let mut buffer = File::open(&self.buffer_path)?;

let mut wtxn = self.db.env.write_txn()?;
Expand All @@ -363,7 +360,7 @@ impl<'db> EntryWriter<'db> {
let mut chunk_index: u32 = 0;

loop {
let mut chunk = vec![0_u8; chunk_size];
let mut chunk = vec![0_u8; self.db.max_chunk_size];

let bytes_read = buffer.read(&mut chunk)?;

Expand Down
32 changes: 32 additions & 0 deletions pubky/src/shared/public.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ mod tests {

use crate::*;

use bytes::Bytes;
use pkarr::{mainline::Testnet, Keypair};
use pubky_homeserver::Homeserver;
use reqwest::{Method, StatusCode};
Expand Down Expand Up @@ -819,4 +820,35 @@ mod tests {
]
)
}

#[tokio::test]
async fn stream() {
// TODO: test better streaming API

let testnet = Testnet::new(10);
let server = Homeserver::start_test(&testnet).await.unwrap();

let client = PubkyClient::test(&testnet);

let keypair = Keypair::random();

client.signup(&keypair, &server.public_key()).await.unwrap();

let url = format!("pubky://{}/pub/foo.txt", keypair.public_key());
let url = url.as_str();

let bytes = Bytes::from(vec![0; 1024 * 1024]);

client.put(url, &bytes).await.unwrap();

let response = client.get(url).await.unwrap().unwrap();

assert_eq!(response, bytes);

client.delete(url).await.unwrap();

let response = client.get(url).await.unwrap();

assert_eq!(response, None);
}
}

0 comments on commit fd88986

Please sign in to comment.