From 21ee701a793979153d94789ca235ba82e760cf6a Mon Sep 17 00:00:00 2001 From: brusher_ru Date: Thu, 29 Feb 2024 17:28:47 +0700 Subject: [PATCH] feat: support zstd archives --- Cargo.lock | 23 +++++++++++++++++++++-- Cargo.toml | 1 + src/checksum.rs | 3 +++ src/main.rs | 34 +++++++++++++++++++++++++++------- src/reader_with_bytes.rs | 33 +++++++++++++++++++++++++++++++++ src/{zip.rs => unpack.rs} | 26 ++++++++++++++++++++++++-- src/utils.rs | 10 ++++++++-- 7 files changed, 117 insertions(+), 13 deletions(-) create mode 100644 src/reader_with_bytes.rs rename src/{zip.rs => unpack.rs} (62%) diff --git a/Cargo.lock b/Cargo.lock index d83e6ac..4a1822f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1002,6 +1002,7 @@ dependencies = [ "rusqlite", "url", "zip", + "zstd 0.13.0", ] [[package]] @@ -1767,7 +1768,7 @@ dependencies = [ "pbkdf2", "sha1", "time", - "zstd", + "zstd 0.11.2+zstd.1.5.2", ] [[package]] @@ -1776,7 +1777,16 @@ version = "0.11.2+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" dependencies = [ - "zstd-safe", + "zstd-safe 5.0.2+zstd.1.5.2", +] + +[[package]] +name = "zstd" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bffb3309596d527cfcba7dfc6ed6052f1d39dfbd7c867aa2e865e4a449c10110" +dependencies = [ + "zstd-safe 7.0.0", ] [[package]] @@ -1789,6 +1799,15 @@ dependencies = [ "zstd-sys", ] +[[package]] +name = "zstd-safe" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43747c7422e2924c11144d5229878b98180ef8b06cca4ab5af37afc8a8d8ea3e" +dependencies = [ + "zstd-sys", +] + [[package]] name = "zstd-sys" version = "2.0.9+zstd.1.5.5" diff --git a/Cargo.toml b/Cargo.toml index 6224f27..eb7d8f4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,3 +16,4 @@ reqwest = { version = "0.11.23", features = ["json", "stream", "blocking"] } rusqlite = { version = "0.30.0", features = ["bundled"] } url = "2.5.0" zip = "0.6.6" +zstd = "0.13.0" diff --git a/src/checksum.rs b/src/checksum.rs index d603db9..513a98d 100644 --- a/src/checksum.rs +++ b/src/checksum.rs @@ -14,6 +14,9 @@ fn replace_sql_zip_with_md5(url: &Url) -> Result { if url_str.ends_with(".sql.zip") { let new_url_str = url_str.replace(".sql.zip", ".sql.md5"); Ok(Url::parse(&new_url_str)?) + } else if url_str.ends_with(".sql.zstd") { + let new_url_str = url_str.replace(".sql.zstd", ".sql.md5"); + Ok(Url::parse(&new_url_str)?) } else { anyhow::bail!("URL does not end with .sql.zip") } diff --git a/src/main.rs b/src/main.rs index bb34c10..f3b663c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,18 +8,19 @@ mod checksum; mod download; mod go_spacemesh; mod parsers; +mod reader_with_bytes; mod reader_with_progress; mod sql; +mod unpack; mod utils; -mod zip; use checksum::*; use download::download_with_retries; use go_spacemesh::get_version; use parsers::*; use sql::get_last_layer_from_db; +use unpack::{unpack_zip, unpack_zstd}; use utils::*; -use zip::unpack; #[derive(Parser, Debug)] #[clap(author, version, about, long_about = None)] @@ -159,13 +160,14 @@ fn main() -> anyhow::Result<()> { let dir_path = node_data; let temp_file_path = dir_path.join("state.download"); let redirect_file_path = dir_path.join("state.url"); - let archive_file_path = dir_path.join("state.zip"); + let archive_zip_file_path = dir_path.join("state.zip"); + let archive_zstd_file_path = dir_path.join("state.zstd"); let unpacked_file_path = dir_path.join("state_downloaded.sql"); let final_file_path = dir_path.join("state.sql"); let wal_file_path = dir_path.join("state.sql-wal"); // Download archive if needed - if !archive_file_path.exists() { + let archive_file_path = if !archive_zip_file_path.exists() && !archive_zstd_file_path.exists() { println!("Downloading the latest database..."); let url = if redirect_file_path.exists() { std::fs::read_to_string(&redirect_file_path)? @@ -174,7 +176,7 @@ fn main() -> anyhow::Result<()> { let go_path_str = go_path .to_str() .expect("Cannot resolve path to go-spacemesh"); - let path = format!("{}/state.zip", &get_version(go_path_str)?); + let path = format!("{}/state.zstd", &get_version(go_path_str)?); let url = build_url(&download_url, &path); url.to_string() }; @@ -189,10 +191,28 @@ fn main() -> anyhow::Result<()> { process::exit(1); } - // Rename `state.download` -> `state.zip` + let archive_file_path = if url.ends_with(".zip") { + archive_zip_file_path + } else { + archive_zstd_file_path + }; + + // Rename `state.download` -> `state.zstd` std::fs::rename(&temp_file_path, &archive_file_path)?; println!("Archive downloaded!"); - } + archive_file_path + } else if archive_zip_file_path.exists() { + archive_zip_file_path + } else { + archive_zstd_file_path + }; + + let archive_url = std::fs::read_to_string(&redirect_file_path)?; + let unpack = if archive_url.ends_with(".zip") { + unpack_zip + } else { + unpack_zstd + }; // Unzip match unpack(&archive_file_path, &unpacked_file_path) { diff --git a/src/reader_with_bytes.rs b/src/reader_with_bytes.rs new file mode 100644 index 0000000..cb7272f --- /dev/null +++ b/src/reader_with_bytes.rs @@ -0,0 +1,33 @@ +use std::io::{self, Read}; + +const MB: u64 = 1024 * 1024; + +pub struct ReaderWithBytes { + reader: R, + bytes_read: u64, + last_reported: u64, +} + +impl ReaderWithBytes { + pub fn new(reader: R) -> Self { + ReaderWithBytes { + reader, + bytes_read: 0, + last_reported: 0, + } + } +} + +impl Read for ReaderWithBytes { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let bytes_read = self.reader.read(buf)?; + self.bytes_read += bytes_read as u64; + + if self.bytes_read / MB > self.last_reported / MB { + println!("Unpacking... {} MB extracted", self.bytes_read / MB); + self.last_reported = self.bytes_read; + } + + Ok(bytes_read) + } +} diff --git a/src/zip.rs b/src/unpack.rs similarity index 62% rename from src/zip.rs rename to src/unpack.rs index 18462f4..08574d8 100644 --- a/src/zip.rs +++ b/src/unpack.rs @@ -1,10 +1,12 @@ use anyhow::Result; use std::fs::File; -use std::io::{BufReader, Error}; +use std::io::{BufReader, BufWriter, Error}; use std::path::Path; use zip::read::ZipFile; use zip::ZipArchive; +use zstd::stream::read::Decoder; +use crate::reader_with_bytes::ReaderWithBytes; use crate::reader_with_progress::ReaderWithProgress; fn find_file_in_archive<'a>( @@ -29,7 +31,27 @@ fn find_file_in_archive<'a>( )) } -pub fn unpack(archive_path: &Path, output_path: &Path) -> Result<()> { +pub fn unpack_zstd(archive_path: &Path, output_path: &Path) -> Result<()> { + let file = File::open(archive_path)?; + let reader = BufReader::new(file); + let decoder = Decoder::new(reader)?; + + let outpath = Path::new(output_path); + if let Some(p) = outpath.parent() { + std::fs::create_dir_all(p)?; + } + let outfile = File::create(outpath)?; + let mut writer = BufWriter::new(outfile); + + let mut reader = ReaderWithBytes::new(decoder); + + std::io::copy(&mut reader, &mut writer)?; + println!("Unpacking complete!"); + + Ok(()) +} + +pub fn unpack_zip(archive_path: &Path, output_path: &Path) -> Result<()> { let file = File::open(archive_path)?; let mut zip = ZipArchive::new(file)?; diff --git a/src/utils.rs b/src/utils.rs index d6a2fce..e712d97 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -56,7 +56,7 @@ pub fn backup_file(original_path: &PathBuf) -> Result { } fn extract_number_from_url(url: &Url) -> Result { - let re = Regex::new(r"/(\d+)\.sql\.zip$")?; + let re = Regex::new(r"/(\d+)\.sql\.(zip|zstd)$")?; let path = url.path(); let caps = re .captures(path) @@ -95,11 +95,17 @@ mod tests { use url::Url; #[test] - fn test_extract_number_valid() { + fn test_extract_number_zip_valid() { let url = Url::parse("https://quicksync-downloads.spacemesh.network/10/61579.sql.zip").unwrap(); assert_eq!(extract_number_from_url(&url).unwrap(), 61579); } + #[test] + fn test_extract_number_zstd_valid() { + let url = Url::parse("https://quicksync-downloads.spacemesh.network/10/61579.sql.zstd").unwrap(); + assert_eq!(extract_number_from_url(&url).unwrap(), 61579); + } + #[test] fn test_extract_number_invalid() { let url = Url::parse("https://quicksync.spacemesh.network/state.zip").unwrap();