From bbb4bcfd0b2e09b94b97c6a5883dcd4f5f5e3bfd Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sun, 21 Jan 2024 00:50:00 +0900 Subject: [PATCH 1/9] =?UTF-8?q?rename:=20=E3=80=8C=E5=A4=A7=E5=AD=97?= =?UTF-8?q?=E3=80=8D=E3=81=AE=E7=9C=81=E7=95=A5=E3=81=AB=E5=AF=BE=E5=BF=9C?= =?UTF-8?q?:=20`parser=5Ftests`=20->=20`tests`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/parser/read_town.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/read_town.rs b/src/parser/read_town.rs index 8d0b1d43..8ccd1596 100644 --- a/src/parser/read_town.rs +++ b/src/parser/read_town.rs @@ -51,7 +51,7 @@ fn find_town(input: &String, city: &City) -> Option<(String, String)> { #[cfg(test)] #[cfg(not(target_arch = "wasm32"))] -mod parser_tests { +mod tests { use crate::api::{BlockingApi, BlockingApiImpl}; use crate::entity::{City, Town}; use crate::parser::read_town::read_town; From 9b19a9a497d98de3836df7bdb2f0304beeef3e37 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sun, 21 Jan 2024 00:51:17 +0900 Subject: [PATCH 2/9] =?UTF-8?q?fix:=20=E3=80=8C=E5=A4=A7=E5=AD=97=E3=80=8D?= =?UTF-8?q?=E3=81=AE=E7=9C=81=E7=95=A5=E3=81=AB=E5=AF=BE=E5=BF=9C:=20?= =?UTF-8?q?=E8=A4=87=E6=95=B0=E8=A1=8C=E3=81=AEcfg=E3=82=A2=E3=83=88?= =?UTF-8?q?=E3=83=AA=E3=83=93=E3=83=A5=E3=83=BC=E3=82=B7=E3=83=A7=E3=83=B3?= =?UTF-8?q?=E3=82=92=E4=B8=80=E8=A1=8C=E3=81=AB=E3=81=BE=E3=81=A8=E3=82=81?= =?UTF-8?q?=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/parser/read_town.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/parser/read_town.rs b/src/parser/read_town.rs index 8ccd1596..6cfb236b 100644 --- a/src/parser/read_town.rs +++ b/src/parser/read_town.rs @@ -49,8 +49,7 @@ fn find_town(input: &String, city: &City) -> Option<(String, String)> { None } -#[cfg(test)] -#[cfg(not(target_arch = "wasm32"))] +#[cfg(all(test, not(target_arch = "wasm32")))] mod tests { use crate::api::{BlockingApi, BlockingApiImpl}; use crate::entity::{City, Town}; From b9d8527b8d9a66494c04ae70fe41bd486ca5d02a Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sun, 21 Jan 2024 00:59:15 +0900 Subject: [PATCH 3/9] =?UTF-8?q?update:=20=E3=80=8C=E5=A4=A7=E5=AD=97?= =?UTF-8?q?=E3=80=8D=E3=81=AE=E7=9C=81=E7=95=A5=E3=81=AB=E5=AF=BE=E5=BF=9C?= =?UTF-8?q?:=20`=E5=B9=B3=E4=BA=95`,`=E5=A4=A7=E5=AD=97=E5=B9=B3=E4=BA=95`?= =?UTF-8?q?=E3=81=AE=E3=83=86=E3=82=B9=E3=83=88=E3=82=B1=E3=83=BC=E3=82=B9?= =?UTF-8?q?=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/parser/read_town.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/parser/read_town.rs b/src/parser/read_town.rs index 6cfb236b..86393127 100644 --- a/src/parser/read_town.rs +++ b/src/parser/read_town.rs @@ -161,4 +161,19 @@ mod tests { assert_eq!(town, town_name); } } + + #[test] + fn read_town_大字の省略_東京都西多摩郡日の出町大字平井() { + let blocking_api = BlockingApiImpl::new(); + let city = blocking_api + .get_city_master("東京都", "西多摩郡日の出町") + .unwrap(); + + let (rest, town) = read_town("大字平井2780番地", &city).unwrap(); + assert_eq!(town, "大字平井"); + assert_eq!(rest, "2780番地"); + let (rest, town) = read_town("平井2780番地", &city).unwrap(); + assert_eq!(town, "大字平井"); + assert_eq!(rest, "2780番地"); + } } From dbc5507aa03caa375a842464f8bfc4646c0ebbc7 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sun, 21 Jan 2024 01:28:27 +0900 Subject: [PATCH 4/9] =?UTF-8?q?update:=20=E3=80=8C=E5=A4=A7=E5=AD=97?= =?UTF-8?q?=E3=80=8D=E3=81=AE=E7=9C=81=E7=95=A5=E3=81=AB=E5=AF=BE=E5=BF=9C?= =?UTF-8?q?:=20=E7=B5=90=E5=90=88=E3=83=86=E3=82=B9=E3=83=88=E3=81=AB`?= =?UTF-8?q?=E5=A4=A7=E5=AD=97=E8=A1=A8=E8=A8=98=E7=9C=81=E7=95=A5=E3=81=B8?= =?UTF-8?q?=E3=81=AE=E5=AF=BE=E5=BF=9C=E3=83=86=E3=82=B9=E3=83=88`?= =?UTF-8?q?=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/integration_tests.rs | 5 +++++ ...45\343\201\270\343\201\256\345\257\276\345\277\234.csv" | 7 +++++++ 2 files changed, 12 insertions(+) create mode 100644 "tests/test_data/\345\244\247\345\255\227\350\241\250\350\250\230\347\234\201\347\225\245\343\201\270\343\201\256\345\257\276\345\277\234.csv" diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index 808ac088..e01baee7 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -25,3 +25,8 @@ async fn 住居表示実施済みの住所において正式でない表記へ ) .await } + +#[tokio::test] +async fn 大字表記省略への対応テスト() { + run_data_driven_tests("./tests/test_data/大字表記省略への対応.csv").await +} diff --git "a/tests/test_data/\345\244\247\345\255\227\350\241\250\350\250\230\347\234\201\347\225\245\343\201\270\343\201\256\345\257\276\345\277\234.csv" "b/tests/test_data/\345\244\247\345\255\227\350\241\250\350\250\230\347\234\201\347\225\245\343\201\270\343\201\256\345\257\276\345\277\234.csv" new file mode 100644 index 00000000..a65be308 --- /dev/null +++ "b/tests/test_data/\345\244\247\345\255\227\350\241\250\350\250\230\347\234\201\347\225\245\343\201\270\343\201\256\345\257\276\345\277\234.csv" @@ -0,0 +1,7 @@ +address,prefecture,city,town,rest +東京都西多摩郡日の出町大字平井2780番地,東京都,西多摩郡日の出町,大字平井,2780番地 +東京都西多摩郡日の出町平井2780番地,東京都,西多摩郡日の出町,大字平井,2780番地 +埼玉県南埼玉郡宮代町大字東粂原110,埼玉県,南埼玉郡宮代町,大字東粂原,110 +埼玉県南埼玉郡宮代町東粂原110,埼玉県,南埼玉郡宮代町,大字東粂原,110 +愛知県名古屋市守山区大字上志段味馬洗淵,愛知県,名古屋市守山区,大字上志段味,馬洗淵 +愛知県名古屋市守山区上志段味馬洗淵,愛知県,名古屋市守山区,大字上志段味,馬洗淵 \ No newline at end of file From f128f76a83a6a5c7576c543ba0f31c720576eae0 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sun, 21 Jan 2024 14:03:21 +0900 Subject: [PATCH 5/9] =?UTF-8?q?update:=20=E3=80=8C=E5=A4=A7=E5=AD=97?= =?UTF-8?q?=E3=80=8D=E3=81=AE=E7=9C=81=E7=95=A5=E3=81=AB=E5=AF=BE=E5=BF=9C?= =?UTF-8?q?:=20=E4=BD=8F=E5=B1=85=E8=A1=A8=E7=A4=BA=E4=B8=80=E9=83=A8?= =?UTF-8?q?=E5=AE=9F=E6=96=BD=E6=B8=88=E3=81=BF=E3=81=A0=E3=81=8C=E5=A4=A7?= =?UTF-8?q?=E5=AD=97=E3=82=82=E6=AE=8B=E3=81=A3=E3=81=A6=E3=81=84=E3=82=8B?= =?UTF-8?q?=E5=9C=B0=E5=9F=9F=E3=81=AE=E5=A0=B4=E5=90=88=E3=81=AE=E3=82=B1?= =?UTF-8?q?=E3=83=BC=E3=82=B9=E3=82=92=E7=B5=90=E5=90=88=E3=83=86=E3=82=B9?= =?UTF-8?q?=E3=83=88=E3=81=AB=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...45\343\201\270\343\201\256\345\257\276\345\277\234.csv" | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git "a/tests/test_data/\345\244\247\345\255\227\350\241\250\350\250\230\347\234\201\347\225\245\343\201\270\343\201\256\345\257\276\345\277\234.csv" "b/tests/test_data/\345\244\247\345\255\227\350\241\250\350\250\230\347\234\201\347\225\245\343\201\270\343\201\256\345\257\276\345\277\234.csv" index a65be308..f00eb9a6 100644 --- "a/tests/test_data/\345\244\247\345\255\227\350\241\250\350\250\230\347\234\201\347\225\245\343\201\270\343\201\256\345\257\276\345\277\234.csv" +++ "b/tests/test_data/\345\244\247\345\255\227\350\241\250\350\250\230\347\234\201\347\225\245\343\201\270\343\201\256\345\257\276\345\277\234.csv" @@ -4,4 +4,9 @@ address,prefecture,city,town,rest 埼玉県南埼玉郡宮代町大字東粂原110,埼玉県,南埼玉郡宮代町,大字東粂原,110 埼玉県南埼玉郡宮代町東粂原110,埼玉県,南埼玉郡宮代町,大字東粂原,110 愛知県名古屋市守山区大字上志段味馬洗淵,愛知県,名古屋市守山区,大字上志段味,馬洗淵 -愛知県名古屋市守山区上志段味馬洗淵,愛知県,名古屋市守山区,大字上志段味,馬洗淵 \ No newline at end of file +愛知県名古屋市守山区上志段味馬洗淵,愛知県,名古屋市守山区,大字上志段味,馬洗淵 +# 住居表示一部実施済みだが大字も残っている地域の場合 +福岡県福岡市南区大字桧原853-9,福岡県,福岡市南区,大字桧原,853-9 +福岡県福岡市南区桧原6-44-20,福岡県,福岡市南区,桧原六丁目,44-20 +福岡県遠賀郡遠賀町浅木463,福岡県,遠賀郡遠賀町,大字浅木,463 +福岡県遠賀郡遠賀町浅木1-16,福岡県,遠賀郡遠賀町,浅木一丁目,16 \ No newline at end of file From a3e6b1a00da1d6d00ada687af1646ad0cc5ff6f1 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sun, 21 Jan 2024 02:20:47 +0900 Subject: [PATCH 6/9] =?UTF-8?q?update:=20=E3=80=8C=E5=A4=A7=E5=AD=97?= =?UTF-8?q?=E3=80=8D=E3=81=AE=E7=9C=81=E7=95=A5=E3=81=AB=E5=AF=BE=E5=BF=9C?= =?UTF-8?q?:=20=E5=A4=A7=E5=AD=97=E3=81=8C=E7=9C=81=E7=95=A5=E3=81=95?= =?UTF-8?q?=E3=82=8C=E3=81=A6=E3=81=84=E3=82=8B=E5=A0=B4=E5=90=88=E3=81=AF?= =?UTF-8?q?=E8=A3=9C=E3=81=A3=E3=81=A6=E3=81=8B=E3=82=89=E5=87=A6=E7=90=86?= =?UTF-8?q?=E3=82=92=E3=81=99=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB=E5=A4=89?= =?UTF-8?q?=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/parser/read_town.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/parser/read_town.rs b/src/parser/read_town.rs index 86393127..e9a0b535 100644 --- a/src/parser/read_town.rs +++ b/src/parser/read_town.rs @@ -22,6 +22,10 @@ pub fn read_town(input: &str, city: &City) -> Option<(String, String)> { if let Some(result) = find_town(&input, city) { return Some(result); } + // ここまでで町名の検出に成功しない場合は、「大字」の省略の可能性を検討する + if let Some(result) = find_town(&format!("大字{}", input), city) { + return Some(result); + } None } From 5c0b58bd42eaf669a026e9f0d5a6cf705b9ca22d Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sat, 3 Feb 2024 07:32:05 +0900 Subject: [PATCH 7/9] =?UTF-8?q?update:=20=E3=80=8C=E5=A4=A7=E5=AD=97?= =?UTF-8?q?=E3=80=8D=E3=81=AE=E7=9C=81=E7=95=A5=E3=81=AB=E5=AF=BE=E5=BF=9C?= =?UTF-8?q?:=20`InvalidTownNameFormatFilter`=E3=81=AB=E3=81=8A=E3=81=84?= =?UTF-8?q?=E3=81=A643=E4=BB=A5=E4=B8=8A=E3=81=AE=E6=95=B0=E5=AD=97?= =?UTF-8?q?=E3=81=8C=E6=9D=A5=E3=81=9F=E5=A0=B4=E5=90=88=E3=81=AFNone?= =?UTF-8?q?=E3=82=92=E8=BF=94=E3=81=99=E3=82=88=E3=81=86=E3=81=AB=E3=81=99?= =?UTF-8?q?=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 住居表示が一部実施されている地域で番地を丁目として誤認識してしまう事象への暫定的な処置 - 丁目は高々42である - 住居表示未実施地域の地番は3桁以上のものが多い --- src/parser/filter/invalid_town_name_format.rs | 45 +++++++++++-------- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/src/parser/filter/invalid_town_name_format.rs b/src/parser/filter/invalid_town_name_format.rs index f1446085..f335d1ec 100644 --- a/src/parser/filter/invalid_town_name_format.rs +++ b/src/parser/filter/invalid_town_name_format.rs @@ -26,17 +26,22 @@ fn extract_town_name_with_regex(input: &str) -> Option { } else { return None; }; - let block_number = if let Some(matched) = captures.name("block_number") { - matched.as_str().parse::().ok()?.to_japanese_form()? - } else { + let block_number = captures.name("block_number")?.as_str().parse::().ok()?; + // 帯広市西十九条四十二丁目の42が最大なので、43以上の値の場合はNoneを返すようにする + if block_number > 42 { return None; - }; + } let rest = if let Some(matched) = captures.name("rest") { matched.as_str() } else { "" }; - Some(format!("{}{}丁目{}", town_name, block_number, rest)) + Some(format!( + "{}{}丁目{}", + town_name, + block_number.to_japanese_form()?, + rest + )) } #[cfg(target_arch = "wasm32")] @@ -47,17 +52,21 @@ fn extract_town_name_with_js_sys_regexp(input: &str) -> Option { ); let captures = expression.exec(input)?; let town_name = captures.get(1).as_string()?; - let block_number = captures - .get(2) - .as_string()? - .parse::() - .ok()? - .to_japanese_form()?; + let block_number = captures.get(2).as_string()?.parse::().ok()?; + // 帯広市西十九条四十二丁目の42が最大なので、43以上の値の場合はNoneを返すようにする + if block_number > 42 { + return None; + } let rest = captures .get(3) .as_string() .unwrap_or_else(|| "".to_string()); - Some(format!("{}{}丁目{}", town_name, block_number, rest)) + Some(format!( + "{}{}丁目{}", + town_name, + block_number.to_japanese_form()?, + rest + )) } #[cfg(all(test, not(target_arch = "wasm32")))] @@ -103,10 +112,10 @@ mod tests { #[test] fn extract_town_name_with_regex_block_number_boundary_value() { - let result = extract_town_name_with_regex("有楽町127"); + let result = extract_town_name_with_regex("西十九条南42"); assert!(result.is_some()); - assert_eq!(result.unwrap(), "有楽町百二十七丁目"); - let result = extract_town_name_with_regex("有楽町128"); + assert_eq!(result.unwrap(), "西十九条南四十二丁目"); + let result = extract_town_name_with_regex("西十九条南43"); assert!(result.is_none()); } } @@ -157,10 +166,10 @@ mod wasm_tests { #[wasm_bindgen_test] fn extract_town_name_with_js_sys_block_number_boundary_value() { - let result = extract_town_name_with_js_sys_regexp("有楽町127"); + let result = extract_town_name_with_js_sys_regexp("西十九条南42"); assert!(result.is_some()); - assert_eq!(result.unwrap(), "有楽町百二十七丁目"); - let result = extract_town_name_with_js_sys_regexp("有楽町128"); + assert_eq!(result.unwrap(), "西十九条南四十二丁目"); + let result = extract_town_name_with_js_sys_regexp("西十九条南43"); assert!(result.is_none()); } From c065dfebe0e506081d6dc6ee4f733a3b82679cd7 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sat, 3 Feb 2024 08:31:26 +0900 Subject: [PATCH 8/9] =?UTF-8?q?PR=E4=BD=9C=E6=88=90=E6=99=82=E3=82=84main?= =?UTF-8?q?=E3=83=96=E3=83=A9=E3=83=B3=E3=83=81=E3=81=B8=E3=81=AEpush?= =?UTF-8?q?=E6=99=82=E3=81=AB=E8=B5=B0=E3=82=8BCI=E3=81=A7`cargo=20fmt`?= =?UTF-8?q?=E3=82=92=E5=AE=9F=E8=A1=8C=E3=81=97=E3=81=AA=E3=81=84=E3=82=88?= =?UTF-8?q?=E3=81=86=E8=A8=AD=E5=AE=9A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - コードの整形は開発中に手元で行なう - マシンの使用時間を減らしたい --- .github/workflows/rust.yaml | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/.github/workflows/rust.yaml b/.github/workflows/rust.yaml index 5a7504d8..10ad4b22 100644 --- a/.github/workflows/rust.yaml +++ b/.github/workflows/rust.yaml @@ -15,19 +15,6 @@ jobs: steps: - uses: actions/checkout@v3 - - name: Set up rustfmt - run: rustup component add rustfmt - - name: Code formatting - run: | - cargo fmt - git config user.name github-actions[bot] - git config user.email github-actions[bot]@users.noreply.github.com - git add --update - git commit -m "cargo fmt" - git push - continue-on-error: true - - name: Code style check - run: cargo fmt --check - name: Set up clippy run: rustup component add clippy - name: Code review with clippy From 0bfb3f4e9dfbb741854919e68963df1d7453a717 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sat, 3 Feb 2024 08:34:04 +0900 Subject: [PATCH 9/9] =?UTF-8?q?package.version=E3=82=92=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 0.1.0-beta.6 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index bc31132a..8f9ee4c5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "japanese-address-parser" -version = "0.1.0-beta.5" +version = "0.1.0-beta.6" edition = "2021" description = "A Rust Library to parse japanses addresses." authors = ["Yuuki Toriyama "]