From 12c4e05626c7fbb788218bf97ec0b58f8ee92bb3 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sat, 10 Feb 2024 15:27:18 +0900 Subject: [PATCH 1/3] =?UTF-8?q?add:=20=E9=B9=BF=E5=B6=8B=E5=B8=82=E3=81=AE?= =?UTF-8?q?=E8=A1=A8=E8=A8=98=E3=82=86=E3=82=8C:=20=E5=B8=82=E5=8C=BA?= =?UTF-8?q?=E7=94=BA=E6=9D=91=E5=90=8D=E3=83=AC=E3=83=99=E3=83=AB=E3=81=A7?= =?UTF-8?q?=E3=81=AE=E8=A1=A8=E8=A8=98=E3=82=86=E3=82=8C=E3=81=AE=E7=B5=90?= =?UTF-8?q?=E5=90=88=E3=83=86=E3=82=B9=E3=83=88=E3=82=92=E4=BD=9C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/integration_tests.rs | 5 +++++ ...\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" | 1 + 2 files changed, 6 insertions(+) create mode 100644 "tests/test_data/\345\270\202\345\214\272\347\224\272\346\235\221\345\220\215\343\203\254\343\203\231\343\203\253\343\201\247\343\201\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index e01baee7..db35e860 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -8,6 +8,11 @@ async fn 県庁所在地のデータテスト() { run_data_driven_tests("./tests/test_data/県庁所在地の住所データ.csv").await } +#[tokio::test] +async fn 市区町村名レベルでの表記ゆれ() { + run_data_driven_tests("./tests/test_data/市区町村名レベルでの表記ゆれ.csv").await +} + #[tokio::test] async fn 異字体旧字体への対応テスト() { run_data_driven_tests("./tests/test_data/異字体旧字体への対応.csv").await diff --git "a/tests/test_data/\345\270\202\345\214\272\347\224\272\346\235\221\345\220\215\343\203\254\343\203\231\343\203\253\343\201\247\343\201\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" "b/tests/test_data/\345\270\202\345\214\272\347\224\272\346\235\221\345\220\215\343\203\254\343\203\231\343\203\253\343\201\247\343\201\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" new file mode 100644 index 00000000..ffce3709 --- /dev/null +++ "b/tests/test_data/\345\270\202\345\214\272\347\224\272\346\235\221\345\220\215\343\203\254\343\203\231\343\203\253\343\201\247\343\201\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" @@ -0,0 +1 @@ +address,prefecture,city,town,rest From 173c9fd8811fe93b9bd98ca7701ccf4855b0a2b8 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sat, 10 Feb 2024 15:29:22 +0900 Subject: [PATCH 2/3] =?UTF-8?q?update:=20=E9=B9=BF=E5=B6=8B=E5=B8=82?= =?UTF-8?q?=E3=81=AE=E8=A1=A8=E8=A8=98=E3=82=86=E3=82=8C:=20=E7=B5=90?= =?UTF-8?q?=E5=90=88=E8=A9=A6=E9=A8=93=E3=81=AB=E3=82=B1=E3=83=BC=E3=82=B9?= =?UTF-8?q?=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...01\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" | 3 +++ 1 file changed, 3 insertions(+) diff --git "a/tests/test_data/\345\270\202\345\214\272\347\224\272\346\235\221\345\220\215\343\203\254\343\203\231\343\203\253\343\201\247\343\201\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" "b/tests/test_data/\345\270\202\345\214\272\347\224\272\346\235\221\345\220\215\343\203\254\343\203\231\343\203\253\343\201\247\343\201\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" index ffce3709..b10daf02 100644 --- "a/tests/test_data/\345\270\202\345\214\272\347\224\272\346\235\221\345\220\215\343\203\254\343\203\231\343\203\253\343\201\247\343\201\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" +++ "b/tests/test_data/\345\270\202\345\214\272\347\224\272\346\235\221\345\220\215\343\203\254\343\203\231\343\203\253\343\201\247\343\201\256\350\241\250\350\250\230\343\202\206\343\202\214.csv" @@ -1 +1,4 @@ address,prefecture,city,town,rest +# 茨城県 +茨城県鹿嶋市大字平井1187-1,茨城県,鹿嶋市,大字平井,1187-1 +茨城県鹿島市大字平井1187-1,茨城県,鹿嶋市,大字平井,1187-1 \ No newline at end of file From fc4ad95e3031592d646032da301cbc4ab5644e77 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sat, 10 Feb 2024 15:33:36 +0900 Subject: [PATCH 3/3] =?UTF-8?q?update:=20=E9=B9=BF=E5=B6=8B=E5=B8=82?= =?UTF-8?q?=E3=81=AE=E8=A1=A8=E8=A8=98=E3=82=86=E3=82=8C:=20=E3=80=8C?= =?UTF-8?q?=E8=8C=A8=E5=9F=8E=E7=9C=8C=E9=B9=BF=E5=B3=B6=E5=B8=82=E3=80=8D?= =?UTF-8?q?=E3=82=92=E3=80=8C=E8=8C=A8=E5=9F=8E=E7=9C=8C=E9=B9=BF=E5=B6=8B?= =?UTF-8?q?=E5=B8=82=E3=80=8D=E3=81=A8=E8=AA=8D=E8=AD=98=E3=81=A7=E3=81=8D?= =?UTF-8?q?=E3=82=8B=E3=82=88=E3=81=86=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/parser/adapter/orthographical_variant_adapter.rs | 2 ++ src/parser/read_city.rs | 1 + 2 files changed, 3 insertions(+) diff --git a/src/parser/adapter/orthographical_variant_adapter.rs b/src/parser/adapter/orthographical_variant_adapter.rs index 717aa04b..734bbcee 100644 --- a/src/parser/adapter/orthographical_variant_adapter.rs +++ b/src/parser/adapter/orthographical_variant_adapter.rs @@ -13,6 +13,7 @@ pub trait OrthographicalVariants { const 崎: Variant; const 檜: Variant; const 龍: Variant; + const 嶋: Variant; } impl OrthographicalVariants for Variant { @@ -23,6 +24,7 @@ impl OrthographicalVariants for Variant { const 崎: Variant = &["崎", "﨑"]; const 檜: Variant = &["桧", "檜"]; const 龍: Variant = &["龍", "竜"]; + const 嶋: Variant = &["嶋", "島"]; } pub struct OrthographicalVariantAdapter { diff --git a/src/parser/read_city.rs b/src/parser/read_city.rs index dabc9f2f..e325b1d3 100644 --- a/src/parser/read_city.rs +++ b/src/parser/read_city.rs @@ -17,6 +17,7 @@ pub fn read_city(input: &str, prefecture: Prefecture) -> Option<(String, String) match prefecture.name.as_str() { "茨城県" => { variant_list.push(Variant::龍); + variant_list.push(Variant::嶋); } "東京都" => { variant_list.push(Variant::檜);