Skip to content

Commit

Permalink
Merge pull request #434 from YuukiToriyama/release/v0.1.16
Browse files Browse the repository at this point in the history
release/v0.1.16をmainブランチにマージ
  • Loading branch information
YuukiToriyama authored Sep 25, 2024
2 parents fce588d + 09bf76a commit 42e476a
Show file tree
Hide file tree
Showing 7 changed files with 19 additions and 3 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ members = [
resolver = "2"

[workspace.package]
version = "0.1.15"
version = "0.1.16"
edition = "2021"
description = "A Rust Library to parse japanese addresses."
repository = "https://github.com/YuukiToriyama/japanese-address-parser"
Expand Down
2 changes: 1 addition & 1 deletion core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ eliminate-whitespaces = []
[dependencies]
itertools = "0.13.0"
rapidfuzz = "0.5.0"
regex = "1.10.2"
regex = { version = "1.10.6", default-features = false, features = ["std", "unicode-perl"] }
serde.workspace = true
reqwest = { version = "0.12.5", default-features = false, features = ["json", "rustls-tls"] }
js-sys = "0.3.67"
Expand Down
4 changes: 3 additions & 1 deletion core/src/parser/adapter/orthographical_variant_adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,11 @@ pub trait OrthographicalVariants {
const: Variant;
const: Variant;
const: Variant;
const: Variant;
}

impl OrthographicalVariants for Variant {
const: Variant = &["の", "ノ"];
const: Variant = &["の", "ノ", "之"];
const: Variant = &["ツ", "ッ"];
const: Variant = &["ケ", "ヶ", "が", "ガ"];
const: Variant = &["薮", "藪", "籔"];
Expand All @@ -60,6 +61,7 @@ impl OrthographicalVariants for Variant {
const: Variant = &["籠", "篭"];
const: Variant = &["濱", "浜"];
const: Variant = &["祗", "祇"];
const: Variant = &["曾", "曽"];
}

pub struct OrthographicalVariantAdapter {
Expand Down
1 change: 1 addition & 0 deletions core/src/tokenizer/read_town.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ fn find_town(input: &str, candidates: &Vec<String>) -> Option<(String, String)>
Variant::籠,
Variant::濱,
Variant::祗,
Variant::曾,
],
};
if let Some(result) = adapter.apply(input, candidate) {
Expand Down
5 changes: 5 additions & 0 deletions tests/integration_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ async fn 異字体旧字体への対応テスト() {
run_data_driven_tests("./test_data/異字体旧字体への対応.csv").await
}

#[tokio::test]
async fn 異字体ではない表記ゆれへの対応テスト() {
run_data_driven_tests("./test_data/異字体ではない表記ゆれへの対応.csv").await
}

#[tokio::test]
async fn 丁目が算用数字の場合への対応テスト() {
run_data_driven_tests("./test_data/丁目が算用数字の場合への対応.csv").await
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
address,prefecture,city,town,rest
# 「ノ」「の」「之」の表記ゆれへの対応
神奈川県鎌倉市山ノ内189,神奈川県,鎌倉市,山ノ内,189
神奈川県鎌倉市山の内189,神奈川県,鎌倉市,山ノ内,189
神奈川県鎌倉市山之内189,神奈川県,鎌倉市,山ノ内,189
3 changes: 3 additions & 0 deletions tests/test_data/異字体旧字体への対応.csv
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,6 @@ address,prefecture,city,town,rest
# 「竃」と「竈」の表記ゆれへの対応
静岡県御殿場市竈1032,静岡県,御殿場市,竈,1032
静岡県御殿場市竃1032,静岡県,御殿場市,竈,1032
# 「小曾根」と「小曽根」の表記ゆれへの対応
埼玉県熊谷市小曽根1220,埼玉県,熊谷市,小曽根,1220
埼玉県熊谷市小曾根1220,埼玉県,熊谷市,小曽根,1220

0 comments on commit 42e476a

Please sign in to comment.