diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b37b995..02b04dc 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,5 +13,18 @@ jobs: toolchain: stable profile: minimal override: true + + - name: Configure src/tables.rs cache + id: cache-tables + uses: actions/cache@v3 + with: + path: src/tables.rs + key: ${{ hashFiles('LineBreak.txt', 'gen-tables/**') }} + + - name: Generates src/tables.rs + run: cargo run + working-directory: ./gen-tables + if: steps.cache-tables.outputs.cache-hit != 'true' + - name: Run tests - run: cargo test \ No newline at end of file + run: cargo test diff --git a/.gitignore b/.gitignore index 96ef6c0..b5787a9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /target -Cargo.lock +/Cargo.lock +/src/tables.rs diff --git a/Cargo.toml b/Cargo.toml index afbce85..54e9160 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,10 +9,6 @@ readme = "README.md" keywords = ["unicode", "text", "layout"] categories = ["internationalization"] license = "Apache-2.0" -include = ["src/**/*", "build.rs", "LineBreak.txt", "LICENSE"] +include = ["src/**/*", "LICENSE"] edition = "2021" rust-version = "1.56" - -[build-dependencies] -regex = "1" -hashbrown = "0.12.3" diff --git a/README.md b/README.md index d3b439a..d3f1de5 100644 --- a/README.md +++ b/README.md @@ -21,4 +21,16 @@ assert!(linebreaks(text).eq([ ])); ``` +## Development + +After cloning the repository or modifying `LineBreak.txt` the tables +have to be (re-)generated: + +```sh +# Generate src/tables.rs +(cd gen-tables && cargo run) +# Run tests to make sure it was successful +cargo test +``` + [UAX14]: https://www.unicode.org/reports/tr14/ diff --git a/gen-tables/.gitignore b/gen-tables/.gitignore new file mode 100644 index 0000000..eb5a316 --- /dev/null +++ b/gen-tables/.gitignore @@ -0,0 +1 @@ +target diff --git a/gen-tables/Cargo.lock b/gen-tables/Cargo.lock new file mode 100644 index 0000000..0d3c6e7 --- /dev/null +++ b/gen-tables/Cargo.lock @@ -0,0 +1,100 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "ahash" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", +] + +[[package]] +name = "aho-corasick" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56fc6cf8dc8c4158eed8649f9b8b0ea1518eb62b544fe9490d66fa0b349eafe9" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "gen-tables" +version = "0.0.0" +dependencies = [ + "hashbrown", + "regex", +] + +[[package]] +name = "hashbrown" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" +dependencies = [ + "ahash", + "allocator-api2", +] + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "once_cell" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" + +[[package]] +name = "regex" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83d3daa6976cffb758ec878f108ba0e062a45b2d6ca3a2cca965338855476caf" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ab07dc67230e4a4718e70fd5c20055a4334b121f1f9db8fe63ef39ce9b8c846" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" diff --git a/gen-tables/Cargo.toml b/gen-tables/Cargo.toml new file mode 100644 index 0000000..d766dd4 --- /dev/null +++ b/gen-tables/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "gen-tables" +version = "0.0.0" +edition = "2021" +publish = false + +[dependencies] +regex = "1" +hashbrown = "0.14" + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] diff --git a/build.rs b/gen-tables/src/main.rs similarity index 98% rename from build.rs rename to gen-tables/src/main.rs index a573dde..5c346c9 100644 --- a/build.rs +++ b/gen-tables/src/main.rs @@ -20,11 +20,10 @@ use std::hash::{BuildHasher, Hash, Hasher}; use std::io::{BufRead, BufReader, BufWriter, Write}; use std::marker::PhantomData; use std::ops::Range; -use std::path::Path; use std::str::FromStr; -use std::{env, error, iter}; +use std::{error, iter}; -include!("src/shared.rs"); +include!("../../src/shared.rs"); impl FromStr for BreakClass { type Err = &'static str; @@ -88,6 +87,7 @@ static BREAK_CLASS_TABLE: [&str; NUM_CLASSES] = [ #[derive(Copy, Clone)] #[repr(u8)] +#[allow(clippy::upper_case_acronyms)] enum ExtraState { ZWSP = sot + 1, OPSP, @@ -683,8 +683,8 @@ struct CpTrie { } fn main() -> Result<(), Box> { - println!("cargo:rerun-if-changed=LineBreak.txt"); - debug_assert!(NUM_STATES <= 0x3F, "too many states"); + #[allow(clippy::assertions_on_constants)] + const _: () = debug_assert!(NUM_STATES <= 0x3F, "too many states"); let pair_table = rules2table! { // Non-tailorable Line Breaking Rules @@ -795,7 +795,7 @@ fn main() -> Result<(), Box> { ; (?P\w{2,3}) # Line_Break property", )?; - let prop_ranges = BufReader::new(File::open("LineBreak.txt")?) + let prop_ranges = BufReader::new(File::open("../LineBreak.txt")?) .lines() .map(Result::unwrap) .filter(|l| !(l.starts_with('#') || l.is_empty())) @@ -828,9 +828,7 @@ fn main() -> Result<(), Box> { builder.build() }; - let out_dir = env::var("OUT_DIR")?; - let dest_path = Path::new(&out_dir).join("tables.rs"); - let mut stream = BufWriter::new(File::create(&dest_path)?); + let mut stream = BufWriter::new(File::create("../src/tables.rs")?); writeln!( stream, "const BREAK_PROP_TRIE_HIGH_START: u32 = {}; diff --git a/src/lib.rs b/src/lib.rs index 10bc369..ca473d8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,8 +27,7 @@ use core::iter::once; pub const UNICODE_VERSION: (u8, u8, u8) = (15, 0, 0); include!("shared.rs"); - -include!(concat!(env!("OUT_DIR"), "/tables.rs")); +include!("tables.rs"); /// Returns the line break property of the specified code point. ///