Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

表記ゆれアダプタのリファクタをrelease/v0.1.17にマージ #435

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
ba09250
add: #396: coreクレートに`criterion`を追加
YuukiToriyama Sep 1, 2024
89c1adb
update: #396: `japanese_address_parser::parser::adapter`の可視性を変更
YuukiToriyama Sep 1, 2024
ffb749d
add: #396: ベンチマークテストに`OrthographicalVariantAdapter`に対するテストを追加
YuukiToriyama Sep 1, 2024
9ef65fb
fix: #396: `criterion`のdefault-featuresを無効にした
YuukiToriyama Sep 1, 2024
c19678d
Merge pull request #397 from YuukiToriyama/feature/refactor-orthograp…
YuukiToriyama Sep 1, 2024
c309ecc
update: #396: `code-quality-check.yaml`にベンチマークを実行するステップを追加
YuukiToriyama Sep 1, 2024
ab4cc16
update: #396: ベンチマークテストを実行するディレクトリ(クレート)を設定
YuukiToriyama Sep 1, 2024
8c6d327
fix: #396: `cargo bench`実行時にエラーが出るためworkaroundとして`lib.bench = false`を設定
YuukiToriyama Sep 1, 2024
e76721b
fix: #396: ベンチマークの名前を指定
YuukiToriyama Sep 1, 2024
c879641
update: #396: `code-quality-check.yaml`の実行条件を調整
YuukiToriyama Sep 1, 2024
fba74d2
Merge pull request #398 from YuukiToriyama/feature/refactor-orthograp…
YuukiToriyama Sep 1, 2024
da5a42d
update: #396: `Vec<Variant>`の中から必要なもののみを取り出す処理を`filter`を使って書き直した
YuukiToriyama Sep 1, 2024
02a6b8a
update: #396: 不要なcloneを削除
YuukiToriyama Sep 1, 2024
cf057a6
update: #396: if文をfilterに置き換えた
YuukiToriyama Sep 1, 2024
50db4ed
Merge pull request #399 from YuukiToriyama/feature/refactor-orthograp…
YuukiToriyama Sep 4, 2024
5a191ec
Merge branch 'main' into feature/refactor-orthographical-variant-adap…
YuukiToriyama Sep 5, 2024
fc8e7d8
Merge pull request #405 from YuukiToriyama/feature/refactor-orthograp…
YuukiToriyama Sep 5, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .github/workflows/code-quality-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ name: Code quality check

on:
pull_request:
paths:
- '**.rs'
- '**/Cargo.toml'

jobs:
build:
Expand All @@ -21,3 +24,10 @@ jobs:
reporter: 'github-pr-review'
filter_mode: 'nofilter'
github_token: ${{ secrets.GITHUB_TOKEN }}
- name: Run benchmark
uses: boa-dev/criterion-compare-action@v3
with:
token: ${{ secrets.GITHUB_TOKEN }}
branchName: ${{ github.base_ref }}
cwd: 'core'
benchName: 'core_benchmark'
6 changes: 6 additions & 0 deletions core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,18 @@ rust-version = "1.73.0"

[lib]
crate-type = ["rlib", "cdylib"]
bench = false

[features]
default = ["city-name-correction"]
blocking = ["reqwest/blocking"]
city-name-correction = []
format-house-number = []

[[bench]]
name = "core_benchmark"
harness = false

[dependencies]
itertools = "0.13.0"
rapidfuzz = "0.5.0"
Expand All @@ -29,6 +34,7 @@ reqwest = { version = "0.12.5", default-features = false, features = ["json", "r
js-sys = "0.3.67"

[dev-dependencies]
criterion = { version = "0.5.1", default-features = false, features = ["html_reports"] }
tokio.workspace = true
wasm-bindgen-test = { workspace = true }

Expand Down
7 changes: 7 additions & 0 deletions core/benches/core_benchmark.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
mod orthographical_variant_adapter;

use crate::orthographical_variant_adapter::bench_orthographical_variant_adapter;
use criterion::{criterion_group, criterion_main};

criterion_group!(benches, bench_orthographical_variant_adapter);
criterion_main!(benches);
48 changes: 48 additions & 0 deletions core/benches/orthographical_variant_adapter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
use criterion::measurement::WallTime;
use criterion::{BatchSize, BenchmarkGroup, BenchmarkId, Criterion};
use japanese_address_parser::parser::adapter::orthographical_variant_adapter::{
OrthographicalVariantAdapter, OrthographicalVariants, Variant,
};

pub fn bench_orthographical_variant_adapter(c: &mut Criterion) {
let mut group = c.benchmark_group("orthographical_variant_adapter");
add_tests(
&mut group,
TestSuite {
expected: "松ケ崎東池ノ内町",
inputs: vec![
"松が崎東池ノ内町",
"松ヶ崎東池ノ内町",
"松ケ﨑東池ノ内町",
"松ケ﨑東池の内町",
"松ガ﨑東池の内町",
],
variants_to_be_used: vec![Variant::ケ, Variant::崎, Variant::の],
},
);
group.finish();
}

fn add_tests(group: &mut BenchmarkGroup<WallTime>, test_suite: TestSuite) {
for input in test_suite.inputs {
let benchmark_id = BenchmarkId::new(test_suite.expected, input);
group.bench_with_input(benchmark_id, input, |b, input| {
b.iter_batched(
|| OrthographicalVariantAdapter {
variant_list: test_suite.variants_to_be_used.clone(),
},
|adapter| {
let (region_name, _) = adapter.apply(input, test_suite.expected).unwrap();
assert_eq!(region_name, test_suite.expected);
},
BatchSize::SmallInput,
)
});
}
}

struct TestSuite {
expected: &'static str,
inputs: Vec<&'static str>,
variants_to_be_used: Vec<Variant>,
}
2 changes: 1 addition & 1 deletion core/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::domain::geolonia::error::{Error, ParseErrorKind};
use crate::tokenizer::Tokenizer;
use serde::Serialize;

pub(crate) mod adapter;
pub mod adapter;

impl<T> From<Tokenizer<T>> for Address {
fn from(value: Tokenizer<T>) -> Self {
Expand Down
47 changes: 22 additions & 25 deletions core/src/parser/adapter/orthographical_variant_adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,43 +64,40 @@ pub struct OrthographicalVariantAdapter {

impl OrthographicalVariantAdapter {
pub fn apply(self, input: &str, region_name: &str) -> Option<(String, String)> {
let mut filtered_variant_list: Vec<Variant> = vec![];
// 必要なパターンのみを選別する
for variant in self.variant_list.clone() {
if variant.iter().any(|character| input.contains(character)) {
filtered_variant_list.push(variant);
}
}
if filtered_variant_list.is_empty() {
let variant_list: Vec<&Variant> = self
.variant_list
.iter()
.filter(|v| v.iter().any(|c| input.contains(c)))
.collect();
if variant_list.is_empty() {
return None;
}

// マッチ候補を容れておくためのVector
let mut candidates: Vec<String> = vec![region_name.to_string()];
// パターンを一つづつ検証していく
for variant in filtered_variant_list {
for variant in variant_list {
let mut semi_candidates: Vec<String> = vec![];
// variantから順列を作成
// ["ケ", "ヶ", "が"] -> (ケ, ヶ), (ケ, が), (ヶ, ケ), (ヶ, が), (が, ケ), (が, ヶ)
for permutation in variant.iter().permutations(2) {
for candidate in &candidates {
for candidate in candidates.iter().filter(|c| c.contains(permutation[0])) {
// マッチ候補の中でパターンに引っかかるものがあれば文字を置き換えてマッチを試す
if candidate.contains(permutation[0]) {
let edited_region_name = candidate.replace(permutation[0], permutation[1]);
if input.starts_with(&edited_region_name) {
// マッチすれば早期リターン
return Some((
region_name.to_string(),
input
.chars()
.skip(edited_region_name.chars().count())
.collect(),
));
} else {
// マッチしなければsemi_candidatesに置き換え後の文字列をpush
semi_candidates.push(edited_region_name.clone());
};
}
let edited_region_name = candidate.replace(permutation[0], permutation[1]);
if input.starts_with(&edited_region_name) {
// マッチすれば早期リターン
return Some((
region_name.to_string(),
input
.chars()
.skip(edited_region_name.chars().count())
.collect(),
));
} else {
// マッチしなければsemi_candidatesに置き換え後の文字列をpush
semi_candidates.push(edited_region_name);
};
}
}
candidates = semi_candidates;
Expand Down
Loading