From ba09250ae8785b9311e4767a6956da78681baf9e Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sun, 1 Sep 2024 18:45:43 +0900 Subject: [PATCH 01/12] =?UTF-8?q?add:=20#396:=20core=E3=82=AF=E3=83=AC?= =?UTF-8?q?=E3=83=BC=E3=83=88=E3=81=AB`criterion`=E3=82=92=E8=BF=BD?= =?UTF-8?q?=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ベンチマークテストを行なうため --- core/Cargo.toml | 5 +++++ core/benches/core_benchmark.rs | 1 + 2 files changed, 6 insertions(+) create mode 100644 core/benches/core_benchmark.rs diff --git a/core/Cargo.toml b/core/Cargo.toml index 2ad307de..469e6806 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -20,6 +20,10 @@ blocking = ["reqwest/blocking"] city-name-correction = [] format-house-number = [] +[[bench]] +name = "core_benchmark" +harness = false + [dependencies] itertools = "0.13.0" rapidfuzz = "0.5.0" @@ -29,6 +33,7 @@ reqwest = { version = "0.12.5", default-features = false, features = ["json", "r js-sys = "0.3.67" [dev-dependencies] +criterion = { version = "0.5.1", features = ["html_reports"] } tokio.workspace = true wasm-bindgen-test = { workspace = true } diff --git a/core/benches/core_benchmark.rs b/core/benches/core_benchmark.rs new file mode 100644 index 00000000..ba640cb3 --- /dev/null +++ b/core/benches/core_benchmark.rs @@ -0,0 +1 @@ +extern crate criterion; From 89c1adb347ea636a753e2627b9d5cb7f41cb1c09 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sun, 1 Sep 2024 18:49:21 +0900 Subject: [PATCH 02/12] =?UTF-8?q?update:=20#396:=20`japanese=5Faddress=5Fp?= =?UTF-8?q?arser::parser::adapter`=E3=81=AE=E5=8F=AF=E8=A6=96=E6=80=A7?= =?UTF-8?q?=E3=82=92=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ベンチマークテストから見えるようにするため --- core/src/parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/parser.rs b/core/src/parser.rs index d2cec3b1..a7735cd0 100644 --- a/core/src/parser.rs +++ b/core/src/parser.rs @@ -8,7 +8,7 @@ use crate::domain::geolonia::error::{Error, ParseErrorKind}; use crate::tokenizer::Tokenizer; use serde::Serialize; -pub(crate) mod adapter; +pub mod adapter; impl From> for Address { fn from(value: Tokenizer) -> Self { From ffb749d61c9c40f6f19e22da7bf0a866b5e57aea Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sun, 1 Sep 2024 18:56:52 +0900 Subject: [PATCH 03/12] =?UTF-8?q?add:=20#396:=20=E3=83=99=E3=83=B3?= =?UTF-8?q?=E3=83=81=E3=83=9E=E3=83=BC=E3=82=AF=E3=83=86=E3=82=B9=E3=83=88?= =?UTF-8?q?=E3=81=AB`OrthographicalVariantAdapter`=E3=81=AB=E5=AF=BE?= =?UTF-8?q?=E3=81=99=E3=82=8B=E3=83=86=E3=82=B9=E3=83=88=E3=82=92=E8=BF=BD?= =?UTF-8?q?=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/benches/core_benchmark.rs | 8 +++- .../benches/orthographical_variant_adapter.rs | 48 +++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 core/benches/orthographical_variant_adapter.rs diff --git a/core/benches/core_benchmark.rs b/core/benches/core_benchmark.rs index ba640cb3..ff5fb974 100644 --- a/core/benches/core_benchmark.rs +++ b/core/benches/core_benchmark.rs @@ -1 +1,7 @@ -extern crate criterion; +mod orthographical_variant_adapter; + +use crate::orthographical_variant_adapter::bench_orthographical_variant_adapter; +use criterion::{criterion_group, criterion_main}; + +criterion_group!(benches, bench_orthographical_variant_adapter); +criterion_main!(benches); diff --git a/core/benches/orthographical_variant_adapter.rs b/core/benches/orthographical_variant_adapter.rs new file mode 100644 index 00000000..97f4fa26 --- /dev/null +++ b/core/benches/orthographical_variant_adapter.rs @@ -0,0 +1,48 @@ +use criterion::measurement::WallTime; +use criterion::{BatchSize, BenchmarkGroup, BenchmarkId, Criterion}; +use japanese_address_parser::parser::adapter::orthographical_variant_adapter::{ + OrthographicalVariantAdapter, OrthographicalVariants, Variant, +}; + +pub fn bench_orthographical_variant_adapter(c: &mut Criterion) { + let mut group = c.benchmark_group("orthographical_variant_adapter"); + add_tests( + &mut group, + TestSuite { + expected: "松ケ崎東池ノ内町", + inputs: vec![ + "松が崎東池ノ内町", + "松ヶ崎東池ノ内町", + "松ケ﨑東池ノ内町", + "松ケ﨑東池の内町", + "松ガ﨑東池の内町", + ], + variants_to_be_used: vec![Variant::ケ, Variant::崎, Variant::の], + }, + ); + group.finish(); +} + +fn add_tests(group: &mut BenchmarkGroup, test_suite: TestSuite) { + for input in test_suite.inputs { + let benchmark_id = BenchmarkId::new(test_suite.expected, input); + group.bench_with_input(benchmark_id, input, |b, input| { + b.iter_batched( + || OrthographicalVariantAdapter { + variant_list: test_suite.variants_to_be_used.clone(), + }, + |adapter| { + let (region_name, _) = adapter.apply(input, test_suite.expected).unwrap(); + assert_eq!(region_name, test_suite.expected); + }, + BatchSize::SmallInput, + ) + }); + } +} + +struct TestSuite { + expected: &'static str, + inputs: Vec<&'static str>, + variants_to_be_used: Vec, +} From 9ef65fb267d779d4de12b4c72940e06a35f01cb1 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sun, 1 Sep 2024 20:34:46 +0900 Subject: [PATCH 04/12] =?UTF-8?q?fix:=20#396:=20`criterion`=E3=81=AEdefaul?= =?UTF-8?q?t-features=E3=82=92=E7=84=A1=E5=8A=B9=E3=81=AB=E3=81=97?= =?UTF-8?q?=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `wasm-pack test`が失敗するため see also: https://github.com/bheisler/criterion.rs/blob/5d169ae1ec2604cb1f2e89551bf44a0cfaa28a76/src/lib.rs#L27 --- core/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/Cargo.toml b/core/Cargo.toml index 469e6806..8a9a88ab 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -33,7 +33,7 @@ reqwest = { version = "0.12.5", default-features = false, features = ["json", "r js-sys = "0.3.67" [dev-dependencies] -criterion = { version = "0.5.1", features = ["html_reports"] } +criterion = { version = "0.5.1", default-features = false, features = ["html_reports"] } tokio.workspace = true wasm-bindgen-test = { workspace = true } From c309ecc2147dc3b64614f45451ad9b88c603ebf0 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sun, 1 Sep 2024 20:53:24 +0900 Subject: [PATCH 05/12] =?UTF-8?q?update:=20#396:=20`code-quality-check.yam?= =?UTF-8?q?l`=E3=81=AB=E3=83=99=E3=83=B3=E3=83=81=E3=83=9E=E3=83=BC?= =?UTF-8?q?=E3=82=AF=E3=82=92=E5=AE=9F=E8=A1=8C=E3=81=99=E3=82=8B=E3=82=B9?= =?UTF-8?q?=E3=83=86=E3=83=83=E3=83=97=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/code-quality-check.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/code-quality-check.yaml b/.github/workflows/code-quality-check.yaml index 936a7f5f..692c2bcf 100644 --- a/.github/workflows/code-quality-check.yaml +++ b/.github/workflows/code-quality-check.yaml @@ -21,3 +21,8 @@ jobs: reporter: 'github-pr-review' filter_mode: 'nofilter' github_token: ${{ secrets.GITHUB_TOKEN }} + - name: Run benchmark + uses: boa-dev/criterion-compare-action@v3 + with: + token: ${{ secrets.GITHUB_TOKEN }} + branchName: ${{ github.base_ref }} From ab4cc16610734b06d3881557738bda6ed8495cc5 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sun, 1 Sep 2024 21:07:32 +0900 Subject: [PATCH 06/12] =?UTF-8?q?update:=20#396:=20=E3=83=99=E3=83=B3?= =?UTF-8?q?=E3=83=81=E3=83=9E=E3=83=BC=E3=82=AF=E3=83=86=E3=82=B9=E3=83=88?= =?UTF-8?q?=E3=82=92=E5=AE=9F=E8=A1=8C=E3=81=99=E3=82=8B=E3=83=87=E3=82=A3?= =?UTF-8?q?=E3=83=AC=E3=82=AF=E3=83=88=E3=83=AA(=E3=82=AF=E3=83=AC?= =?UTF-8?q?=E3=83=BC=E3=83=88)=E3=82=92=E8=A8=AD=E5=AE=9A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/code-quality-check.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/code-quality-check.yaml b/.github/workflows/code-quality-check.yaml index 692c2bcf..3b7554ef 100644 --- a/.github/workflows/code-quality-check.yaml +++ b/.github/workflows/code-quality-check.yaml @@ -26,3 +26,4 @@ jobs: with: token: ${{ secrets.GITHUB_TOKEN }} branchName: ${{ github.base_ref }} + cwd: 'core' From 8c6d32745954de1429ea3a4978af307535b3b1aa Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sun, 1 Sep 2024 21:18:39 +0900 Subject: [PATCH 07/12] =?UTF-8?q?fix:=20#396:=20`cargo=20bench`=E5=AE=9F?= =?UTF-8?q?=E8=A1=8C=E6=99=82=E3=81=AB=E3=82=A8=E3=83=A9=E3=83=BC=E3=81=8C?= =?UTF-8?q?=E5=87=BA=E3=82=8B=E3=81=9F=E3=82=81workaround=E3=81=A8?= =?UTF-8?q?=E3=81=97=E3=81=A6`lib.bench=20=3D=20false`=E3=82=92=E8=A8=AD?= =?UTF-8?q?=E5=AE=9A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit see also: https://bheisler.github.io/criterion.rs/book/faq.html#cargo-bench-gives-unrecognized-option-errors-for-valid-command-line-options --- core/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/core/Cargo.toml b/core/Cargo.toml index 8a9a88ab..aaddccd1 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -13,6 +13,7 @@ rust-version = "1.73.0" [lib] crate-type = ["rlib", "cdylib"] +bench = false [features] default = ["city-name-correction"] From e76721bdcc8177692ba9dd502073452309435ce8 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sun, 1 Sep 2024 21:30:50 +0900 Subject: [PATCH 08/12] =?UTF-8?q?fix:=20#396:=20=E3=83=99=E3=83=B3?= =?UTF-8?q?=E3=83=81=E3=83=9E=E3=83=BC=E3=82=AF=E3=81=AE=E5=90=8D=E5=89=8D?= =?UTF-8?q?=E3=82=92=E6=8C=87=E5=AE=9A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/code-quality-check.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/code-quality-check.yaml b/.github/workflows/code-quality-check.yaml index 3b7554ef..97b99b95 100644 --- a/.github/workflows/code-quality-check.yaml +++ b/.github/workflows/code-quality-check.yaml @@ -27,3 +27,4 @@ jobs: token: ${{ secrets.GITHUB_TOKEN }} branchName: ${{ github.base_ref }} cwd: 'core' + benchName: 'core_benchmark' From c879641a859f00edc110eef11f99f66344e33d9d Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sun, 1 Sep 2024 21:37:28 +0900 Subject: [PATCH 09/12] =?UTF-8?q?update:=20#396:=20`code-quality-check.yam?= =?UTF-8?q?l`=E3=81=AE=E5=AE=9F=E8=A1=8C=E6=9D=A1=E4=BB=B6=E3=82=92?= =?UTF-8?q?=E8=AA=BF=E6=95=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 拡張子が`.rs`であるファイル、または`Cargo.toml`が変更されたときにのみ実行されるようにした --- .github/workflows/code-quality-check.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/code-quality-check.yaml b/.github/workflows/code-quality-check.yaml index 97b99b95..a1be635e 100644 --- a/.github/workflows/code-quality-check.yaml +++ b/.github/workflows/code-quality-check.yaml @@ -2,6 +2,9 @@ name: Code quality check on: pull_request: + paths: + - '**.rs' + - '**/Cargo.toml' jobs: build: From da5a42de7656a467e3334bf6ae7ad9c9139ee002 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sun, 1 Sep 2024 21:56:11 +0900 Subject: [PATCH 10/12] =?UTF-8?q?update:=20#396:=20`Vec`=E3=81=AE?= =?UTF-8?q?=E4=B8=AD=E3=81=8B=E3=82=89=E5=BF=85=E8=A6=81=E3=81=AA=E3=82=82?= =?UTF-8?q?=E3=81=AE=E3=81=AE=E3=81=BF=E3=82=92=E5=8F=96=E3=82=8A=E5=87=BA?= =?UTF-8?q?=E3=81=99=E5=87=A6=E7=90=86=E3=82=92`filter`=E3=82=92=E4=BD=BF?= =?UTF-8?q?=E3=81=A3=E3=81=A6=E6=9B=B8=E3=81=8D=E7=9B=B4=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../adapter/orthographical_variant_adapter.rs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/core/src/parser/adapter/orthographical_variant_adapter.rs b/core/src/parser/adapter/orthographical_variant_adapter.rs index 6cd327aa..1be29016 100644 --- a/core/src/parser/adapter/orthographical_variant_adapter.rs +++ b/core/src/parser/adapter/orthographical_variant_adapter.rs @@ -66,21 +66,20 @@ pub struct OrthographicalVariantAdapter { impl OrthographicalVariantAdapter { pub fn apply(self, input: &str, region_name: &str) -> Option<(String, String)> { - let mut filtered_variant_list: Vec = vec![]; // 必要なパターンのみを選別する - for variant in self.variant_list.clone() { - if variant.iter().any(|character| input.contains(character)) { - filtered_variant_list.push(variant); - } - } - if filtered_variant_list.is_empty() { + let variant_list: Vec<&Variant> = self + .variant_list + .iter() + .filter(|v| v.iter().any(|c| input.contains(c))) + .collect(); + if variant_list.is_empty() { return None; } // マッチ候補を容れておくためのVector let mut candidates: Vec = vec![region_name.to_string()]; // パターンを一つづつ検証していく - for variant in filtered_variant_list { + for variant in variant_list { let mut semi_candidates: Vec = vec![]; // variantから順列を作成 // ["ケ", "ヶ", "が"] -> (ケ, ヶ), (ケ, が), (ヶ, ケ), (ヶ, が), (が, ケ), (が, ヶ) From 02a6b8a579ed8126821d2ec822cd5441a5b8032b Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sun, 1 Sep 2024 22:22:23 +0900 Subject: [PATCH 11/12] =?UTF-8?q?update:=20#396:=20=E4=B8=8D=E8=A6=81?= =?UTF-8?q?=E3=81=AAclone=E3=82=92=E5=89=8A=E9=99=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/src/parser/adapter/orthographical_variant_adapter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/parser/adapter/orthographical_variant_adapter.rs b/core/src/parser/adapter/orthographical_variant_adapter.rs index 1be29016..352fd021 100644 --- a/core/src/parser/adapter/orthographical_variant_adapter.rs +++ b/core/src/parser/adapter/orthographical_variant_adapter.rs @@ -99,7 +99,7 @@ impl OrthographicalVariantAdapter { )); } else { // マッチしなければsemi_candidatesに置き換え後の文字列をpush - semi_candidates.push(edited_region_name.clone()); + semi_candidates.push(edited_region_name); }; } } From cf057a6d192a73013d5b26257a42fae39ae10884 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sun, 1 Sep 2024 22:41:41 +0900 Subject: [PATCH 12/12] =?UTF-8?q?update:=20#396:=20if=E6=96=87=E3=82=92fil?= =?UTF-8?q?ter=E3=81=AB=E7=BD=AE=E3=81=8D=E6=8F=9B=E3=81=88=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../adapter/orthographical_variant_adapter.rs | 32 +++++++++---------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/core/src/parser/adapter/orthographical_variant_adapter.rs b/core/src/parser/adapter/orthographical_variant_adapter.rs index 352fd021..d115efb2 100644 --- a/core/src/parser/adapter/orthographical_variant_adapter.rs +++ b/core/src/parser/adapter/orthographical_variant_adapter.rs @@ -84,24 +84,22 @@ impl OrthographicalVariantAdapter { // variantから順列を作成 // ["ケ", "ヶ", "が"] -> (ケ, ヶ), (ケ, が), (ヶ, ケ), (ヶ, が), (が, ケ), (が, ヶ) for permutation in variant.iter().permutations(2) { - for candidate in &candidates { + for candidate in candidates.iter().filter(|c| c.contains(permutation[0])) { // マッチ候補の中でパターンに引っかかるものがあれば文字を置き換えてマッチを試す - if candidate.contains(permutation[0]) { - let edited_region_name = candidate.replace(permutation[0], permutation[1]); - if input.starts_with(&edited_region_name) { - // マッチすれば早期リターン - return Some(( - region_name.to_string(), - input - .chars() - .skip(edited_region_name.chars().count()) - .collect(), - )); - } else { - // マッチしなければsemi_candidatesに置き換え後の文字列をpush - semi_candidates.push(edited_region_name); - }; - } + let edited_region_name = candidate.replace(permutation[0], permutation[1]); + if input.starts_with(&edited_region_name) { + // マッチすれば早期リターン + return Some(( + region_name.to_string(), + input + .chars() + .skip(edited_region_name.chars().count()) + .collect(), + )); + } else { + // マッチしなければsemi_candidatesに置き換え後の文字列をpush + semi_candidates.push(edited_region_name); + }; } } candidates = semi_candidates;