From 12c4e05626c7fbb788218bf97ec0b58f8ee92bb3 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sat, 10 Feb 2024 15:27:18 +0900 Subject: [PATCH 1/7] =?UTF-8?q?add:=20=E9=B9=BF=E5=B6=8B=E5=B8=82=E3=81=AE?= =?UTF-8?q?=E8=A1=A8=E8=A8=98=E3=82=86=E3=82=8C:=20=E5=B8=82=E5=8C=BA?= =?UTF-8?q?=E7=94=BA=E6=9D=91=E5=90=8D=E3=83=AC=E3=83=99=E3=83=AB=E3=81=A7?= =?UTF-8?q?=E3=81=AE=E8=A1=A8=E8=A8=98=E3=82=86=E3=82=8C=E3=81=AE=E7=B5=90?= =?UTF-8?q?=E5=90=88=E3=83=86=E3=82=B9=E3=83=88=E3=82=92=E4=BD=9C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/integration_tests.rs | 5 +++++ ...\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" | 1 + 2 files changed, 6 insertions(+) create mode 100644 "tests/test_data/\345\270\202\345\214\272\347\224\272\346\235\221\345\220\215\343\203\254\343\203\231\343\203\253\343\201\247\343\201\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index e01baee7..db35e860 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -8,6 +8,11 @@ async fn 県庁所在地のデータテスト() { run_data_driven_tests("./tests/test_data/県庁所在地の住所データ.csv").await } +#[tokio::test] +async fn 市区町村名レベルでの表記ゆれ() { + run_data_driven_tests("./tests/test_data/市区町村名レベルでの表記ゆれ.csv").await +} + #[tokio::test] async fn 異字体旧字体への対応テスト() { run_data_driven_tests("./tests/test_data/異字体旧字体への対応.csv").await diff --git "a/tests/test_data/\345\270\202\345\214\272\347\224\272\346\235\221\345\220\215\343\203\254\343\203\231\343\203\253\343\201\247\343\201\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" "b/tests/test_data/\345\270\202\345\214\272\347\224\272\346\235\221\345\220\215\343\203\254\343\203\231\343\203\253\343\201\247\343\201\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" new file mode 100644 index 00000000..ffce3709 --- /dev/null +++ "b/tests/test_data/\345\270\202\345\214\272\347\224\272\346\235\221\345\220\215\343\203\254\343\203\231\343\203\253\343\201\247\343\201\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" @@ -0,0 +1 @@ +address,prefecture,city,town,rest From 173c9fd8811fe93b9bd98ca7701ccf4855b0a2b8 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sat, 10 Feb 2024 15:29:22 +0900 Subject: [PATCH 2/7] =?UTF-8?q?update:=20=E9=B9=BF=E5=B6=8B=E5=B8=82?= =?UTF-8?q?=E3=81=AE=E8=A1=A8=E8=A8=98=E3=82=86=E3=82=8C:=20=E7=B5=90?= =?UTF-8?q?=E5=90=88=E8=A9=A6=E9=A8=93=E3=81=AB=E3=82=B1=E3=83=BC=E3=82=B9?= =?UTF-8?q?=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...01\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" | 3 +++ 1 file changed, 3 insertions(+) diff --git "a/tests/test_data/\345\270\202\345\214\272\347\224\272\346\235\221\345\220\215\343\203\254\343\203\231\343\203\253\343\201\247\343\201\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" "b/tests/test_data/\345\270\202\345\214\272\347\224\272\346\235\221\345\220\215\343\203\254\343\203\231\343\203\253\343\201\247\343\201\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" index ffce3709..b10daf02 100644 --- "a/tests/test_data/\345\270\202\345\214\272\347\224\272\346\235\221\345\220\215\343\203\254\343\203\231\343\203\253\343\201\247\343\201\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" +++ "b/tests/test_data/\345\270\202\345\214\272\347\224\272\346\235\221\345\220\215\343\203\254\343\203\231\343\203\253\343\201\247\343\201\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" @@ -1 +1,4 @@ address,prefecture,city,town,rest +# 茨城県 +茨城県鹿嶋市大字平井1187-1,茨城県,鹿嶋市,大字平井,1187-1 +茨城県鹿島市大字平井1187-1,茨城県,鹿嶋市,大字平井,1187-1 \ No newline at end of file From fc4ad95e3031592d646032da301cbc4ab5644e77 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sat, 10 Feb 2024 15:33:36 +0900 Subject: [PATCH 3/7] =?UTF-8?q?update:=20=E9=B9=BF=E5=B6=8B=E5=B8=82?= =?UTF-8?q?=E3=81=AE=E8=A1=A8=E8=A8=98=E3=82=86=E3=82=8C:=20=E3=80=8C?= =?UTF-8?q?=E8=8C=A8=E5=9F=8E=E7=9C=8C=E9=B9=BF=E5=B3=B6=E5=B8=82=E3=80=8D?= =?UTF-8?q?=E3=82=92=E3=80=8C=E8=8C=A8=E5=9F=8E=E7=9C=8C=E9=B9=BF=E5=B6=8B?= =?UTF-8?q?=E5=B8=82=E3=80=8D=E3=81=A8=E8=AA=8D=E8=AD=98=E3=81=A7=E3=81=8D?= =?UTF-8?q?=E3=82=8B=E3=82=88=E3=81=86=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/parser/adapter/orthographical_variant_adapter.rs | 2 ++ src/parser/read_city.rs | 1 + 2 files changed, 3 insertions(+) diff --git a/src/parser/adapter/orthographical_variant_adapter.rs b/src/parser/adapter/orthographical_variant_adapter.rs index 717aa04b..734bbcee 100644 --- a/src/parser/adapter/orthographical_variant_adapter.rs +++ b/src/parser/adapter/orthographical_variant_adapter.rs @@ -13,6 +13,7 @@ pub trait OrthographicalVariants { const 崎: Variant; const 檜: Variant; const 龍: Variant; + const 嶋: Variant; } impl OrthographicalVariants for Variant { @@ -23,6 +24,7 @@ impl OrthographicalVariants for Variant { const 崎: Variant = &["崎", "﨑"]; const 檜: Variant = &["桧", "檜"]; const 龍: Variant = &["龍", "竜"]; + const 嶋: Variant = &["嶋", "島"]; } pub struct OrthographicalVariantAdapter { diff --git a/src/parser/read_city.rs b/src/parser/read_city.rs index dabc9f2f..e325b1d3 100644 --- a/src/parser/read_city.rs +++ b/src/parser/read_city.rs @@ -17,6 +17,7 @@ pub fn read_city(input: &str, prefecture: Prefecture) -> Option<(String, String) match prefecture.name.as_str() { "茨城県" => { variant_list.push(Variant::龍); + variant_list.push(Variant::嶋); } "東京都" => { variant_list.push(Variant::檜); From 2772226aa704296c36c43039dc443567d1d77d2e Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sat, 10 Feb 2024 18:29:12 +0900 Subject: [PATCH 4/7] =?UTF-8?q?package.version=E3=82=92=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 0.1.0-beta.8 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 1a1ddee1..46a4ae22 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "japanese-address-parser" -version = "0.1.0-beta.7" +version = "0.1.0-beta.8" edition = "2021" description = "A Rust Library to parse japanses addresses." authors = ["Yuuki Toriyama "] From 649aec4b60d14bd146878515a5b29f094318a05f Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sat, 10 Feb 2024 15:27:18 +0900 Subject: [PATCH 5/7] =?UTF-8?q?add:=20=E9=B9=BF=E5=B6=8B=E5=B8=82=E3=81=AE?= =?UTF-8?q?=E8=A1=A8=E8=A8=98=E3=82=86=E3=82=8C:=20=E5=B8=82=E5=8C=BA?= =?UTF-8?q?=E7=94=BA=E6=9D=91=E5=90=8D=E3=83=AC=E3=83=99=E3=83=AB=E3=81=A7?= =?UTF-8?q?=E3=81=AE=E8=A1=A8=E8=A8=98=E3=82=86=E3=82=8C=E3=81=AE=E7=B5=90?= =?UTF-8?q?=E5=90=88=E3=83=86=E3=82=B9=E3=83=88=E3=82=92=E4=BD=9C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/integration_tests.rs | 5 +++++ ...\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" | 1 + 2 files changed, 6 insertions(+) create mode 100644 "tests/test_data/\345\270\202\345\214\272\347\224\272\346\235\221\345\220\215\343\203\254\343\203\231\343\203\253\343\201\247\343\201\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index e01baee7..db35e860 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -8,6 +8,11 @@ async fn 県庁所在地のデータテスト() { run_data_driven_tests("./tests/test_data/県庁所在地の住所データ.csv").await } +#[tokio::test] +async fn 市区町村名レベルでの表記ゆれ() { + run_data_driven_tests("./tests/test_data/市区町村名レベルでの表記ゆれ.csv").await +} + #[tokio::test] async fn 異字体旧字体への対応テスト() { run_data_driven_tests("./tests/test_data/異字体旧字体への対応.csv").await diff --git "a/tests/test_data/\345\270\202\345\214\272\347\224\272\346\235\221\345\220\215\343\203\254\343\203\231\343\203\253\343\201\247\343\201\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" "b/tests/test_data/\345\270\202\345\214\272\347\224\272\346\235\221\345\220\215\343\203\254\343\203\231\343\203\253\343\201\247\343\201\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" new file mode 100644 index 00000000..ffce3709 --- /dev/null +++ "b/tests/test_data/\345\270\202\345\214\272\347\224\272\346\235\221\345\220\215\343\203\254\343\203\231\343\203\253\343\201\247\343\201\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" @@ -0,0 +1 @@ +address,prefecture,city,town,rest From a38472dd3dca09b3c07f918825c429e6f29d22ab Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sat, 10 Feb 2024 23:09:09 +0900 Subject: [PATCH 6/7] =?UTF-8?q?update:=20=E5=A1=A9=E7=AB=88=E5=B8=82?= =?UTF-8?q?=E3=81=AE=E8=A1=A8=E8=A8=98=E3=82=86=E3=82=8C:=20=E7=B5=90?= =?UTF-8?q?=E5=90=88=E8=A9=A6=E9=A8=93=E3=81=AB=E3=82=B1=E3=83=BC=E3=82=B9?= =?UTF-8?q?=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...01\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" | 3 +++ 1 file changed, 3 insertions(+) diff --git "a/tests/test_data/\345\270\202\345\214\272\347\224\272\346\235\221\345\220\215\343\203\254\343\203\231\343\203\253\343\201\247\343\201\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" "b/tests/test_data/\345\270\202\345\214\272\347\224\272\346\235\221\345\220\215\343\203\254\343\203\231\343\203\253\343\201\247\343\201\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" index ffce3709..a8a2d129 100644 --- "a/tests/test_data/\345\270\202\345\214\272\347\224\272\346\235\221\345\220\215\343\203\254\343\203\231\343\203\253\343\201\247\343\201\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" +++ "b/tests/test_data/\345\270\202\345\214\272\347\224\272\346\235\221\345\220\215\343\203\254\343\203\231\343\203\253\343\201\247\343\201\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" @@ -1 +1,4 @@ address,prefecture,city,town,rest +# 宮城県 +宮城県塩竈市海岸通15-1,宮城県,塩竈市,海岸通,15-1 +宮城県塩釜市海岸通15-1,宮城県,塩竈市,海岸通,15-1 \ No newline at end of file From a86202cec1d1254b4a00719df5bd9fc02a1030ff Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sat, 10 Feb 2024 23:11:43 +0900 Subject: [PATCH 7/7] =?UTF-8?q?update:=20=E5=A1=A9=E7=AB=88=E5=B8=82?= =?UTF-8?q?=E3=81=AE=E8=A1=A8=E8=A8=98=E3=82=86=E3=82=8C:=20=E3=80=8C?= =?UTF-8?q?=E5=AE=AE=E5=9F=8E=E7=9C=8C=E5=A1=A9=E9=87=9C=E5=B8=82=E3=80=8D?= =?UTF-8?q?=E3=82=92=E3=80=8C=E5=AE=AE=E5=9F=8E=E7=9C=8C=E5=A1=A9=E7=AB=88?= =?UTF-8?q?=E5=B8=82=E3=80=8D=E3=81=A8=E8=AA=8D=E8=AD=98=E3=81=A7=E3=81=8D?= =?UTF-8?q?=E3=82=8B=E3=82=88=E3=81=86=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/parser/adapter/orthographical_variant_adapter.rs | 2 ++ src/parser/read_city.rs | 3 +++ 2 files changed, 5 insertions(+) diff --git a/src/parser/adapter/orthographical_variant_adapter.rs b/src/parser/adapter/orthographical_variant_adapter.rs index 717aa04b..e3bacb42 100644 --- a/src/parser/adapter/orthographical_variant_adapter.rs +++ b/src/parser/adapter/orthographical_variant_adapter.rs @@ -13,6 +13,7 @@ pub trait OrthographicalVariants { const 崎: Variant; const 檜: Variant; const 龍: Variant; + const 竈: Variant; } impl OrthographicalVariants for Variant { @@ -23,6 +24,7 @@ impl OrthographicalVariants for Variant { const 崎: Variant = &["崎", "﨑"]; const 檜: Variant = &["桧", "檜"]; const 龍: Variant = &["龍", "竜"]; + const 竈: Variant = &["竈", "釜"]; } pub struct OrthographicalVariantAdapter { diff --git a/src/parser/read_city.rs b/src/parser/read_city.rs index dabc9f2f..99c6c072 100644 --- a/src/parser/read_city.rs +++ b/src/parser/read_city.rs @@ -15,6 +15,9 @@ pub fn read_city(input: &str, prefecture: Prefecture) -> Option<(String, String) } let mut variant_list = vec![Variant::ケ]; match prefecture.name.as_str() { + "宮城県" => { + variant_list.push(Variant::竈); + } "茨城県" => { variant_list.push(Variant::龍); }