diff --git a/.github/workflows/rust.yaml b/.github/workflows/rust.yaml index 5a7504d8..10ad4b22 100644 --- a/.github/workflows/rust.yaml +++ b/.github/workflows/rust.yaml @@ -15,19 +15,6 @@ jobs: steps: - uses: actions/checkout@v3 - - name: Set up rustfmt - run: rustup component add rustfmt - - name: Code formatting - run: | - cargo fmt - git config user.name github-actions[bot] - git config user.email github-actions[bot]@users.noreply.github.com - git add --update - git commit -m "cargo fmt" - git push - continue-on-error: true - - name: Code style check - run: cargo fmt --check - name: Set up clippy run: rustup component add clippy - name: Code review with clippy diff --git a/Cargo.toml b/Cargo.toml index bc31132a..8f9ee4c5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "japanese-address-parser" -version = "0.1.0-beta.5" +version = "0.1.0-beta.6" edition = "2021" description = "A Rust Library to parse japanses addresses." authors = ["Yuuki Toriyama "] diff --git a/src/parser/filter/invalid_town_name_format.rs b/src/parser/filter/invalid_town_name_format.rs index f1446085..f335d1ec 100644 --- a/src/parser/filter/invalid_town_name_format.rs +++ b/src/parser/filter/invalid_town_name_format.rs @@ -26,17 +26,22 @@ fn extract_town_name_with_regex(input: &str) -> Option { } else { return None; }; - let block_number = if let Some(matched) = captures.name("block_number") { - matched.as_str().parse::().ok()?.to_japanese_form()? - } else { + let block_number = captures.name("block_number")?.as_str().parse::().ok()?; + // 帯広市西十九条四十二丁目の42が最大なので、43以上の値の場合はNoneを返すようにする + if block_number > 42 { return None; - }; + } let rest = if let Some(matched) = captures.name("rest") { matched.as_str() } else { "" }; - Some(format!("{}{}丁目{}", town_name, block_number, rest)) + Some(format!( + "{}{}丁目{}", + town_name, + block_number.to_japanese_form()?, + rest + )) } #[cfg(target_arch = "wasm32")] @@ -47,17 +52,21 @@ fn extract_town_name_with_js_sys_regexp(input: &str) -> Option { ); let captures = expression.exec(input)?; let town_name = captures.get(1).as_string()?; - let block_number = captures - .get(2) - .as_string()? - .parse::() - .ok()? - .to_japanese_form()?; + let block_number = captures.get(2).as_string()?.parse::().ok()?; + // 帯広市西十九条四十二丁目の42が最大なので、43以上の値の場合はNoneを返すようにする + if block_number > 42 { + return None; + } let rest = captures .get(3) .as_string() .unwrap_or_else(|| "".to_string()); - Some(format!("{}{}丁目{}", town_name, block_number, rest)) + Some(format!( + "{}{}丁目{}", + town_name, + block_number.to_japanese_form()?, + rest + )) } #[cfg(all(test, not(target_arch = "wasm32")))] @@ -103,10 +112,10 @@ mod tests { #[test] fn extract_town_name_with_regex_block_number_boundary_value() { - let result = extract_town_name_with_regex("有楽町127"); + let result = extract_town_name_with_regex("西十九条南42"); assert!(result.is_some()); - assert_eq!(result.unwrap(), "有楽町百二十七丁目"); - let result = extract_town_name_with_regex("有楽町128"); + assert_eq!(result.unwrap(), "西十九条南四十二丁目"); + let result = extract_town_name_with_regex("西十九条南43"); assert!(result.is_none()); } } @@ -157,10 +166,10 @@ mod wasm_tests { #[wasm_bindgen_test] fn extract_town_name_with_js_sys_block_number_boundary_value() { - let result = extract_town_name_with_js_sys_regexp("有楽町127"); + let result = extract_town_name_with_js_sys_regexp("西十九条南42"); assert!(result.is_some()); - assert_eq!(result.unwrap(), "有楽町百二十七丁目"); - let result = extract_town_name_with_js_sys_regexp("有楽町128"); + assert_eq!(result.unwrap(), "西十九条南四十二丁目"); + let result = extract_town_name_with_js_sys_regexp("西十九条南43"); assert!(result.is_none()); } diff --git a/src/parser/read_town.rs b/src/parser/read_town.rs index 8d0b1d43..e9a0b535 100644 --- a/src/parser/read_town.rs +++ b/src/parser/read_town.rs @@ -22,6 +22,10 @@ pub fn read_town(input: &str, city: &City) -> Option<(String, String)> { if let Some(result) = find_town(&input, city) { return Some(result); } + // ここまでで町名の検出に成功しない場合は、「大字」の省略の可能性を検討する + if let Some(result) = find_town(&format!("大字{}", input), city) { + return Some(result); + } None } @@ -49,9 +53,8 @@ fn find_town(input: &String, city: &City) -> Option<(String, String)> { None } -#[cfg(test)] -#[cfg(not(target_arch = "wasm32"))] -mod parser_tests { +#[cfg(all(test, not(target_arch = "wasm32")))] +mod tests { use crate::api::{BlockingApi, BlockingApiImpl}; use crate::entity::{City, Town}; use crate::parser::read_town::read_town; @@ -162,4 +165,19 @@ mod parser_tests { assert_eq!(town, town_name); } } + + #[test] + fn read_town_大字の省略_東京都西多摩郡日の出町大字平井() { + let blocking_api = BlockingApiImpl::new(); + let city = blocking_api + .get_city_master("東京都", "西多摩郡日の出町") + .unwrap(); + + let (rest, town) = read_town("大字平井2780番地", &city).unwrap(); + assert_eq!(town, "大字平井"); + assert_eq!(rest, "2780番地"); + let (rest, town) = read_town("平井2780番地", &city).unwrap(); + assert_eq!(town, "大字平井"); + assert_eq!(rest, "2780番地"); + } } diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index 808ac088..e01baee7 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -25,3 +25,8 @@ async fn 住居表示実施済みの住所において正式でない表記へ ) .await } + +#[tokio::test] +async fn 大字表記省略への対応テスト() { + run_data_driven_tests("./tests/test_data/大字表記省略への対応.csv").await +} diff --git "a/tests/test_data/\345\244\247\345\255\227\350\241\250\350\250\230\347\234\201\347\225\245\343\201\270\343\201\256\345\257\276\345\277\234.csv" "b/tests/test_data/\345\244\247\345\255\227\350\241\250\350\250\230\347\234\201\347\225\245\343\201\270\343\201\256\345\257\276\345\277\234.csv" new file mode 100644 index 00000000..f00eb9a6 --- /dev/null +++ "b/tests/test_data/\345\244\247\345\255\227\350\241\250\350\250\230\347\234\201\347\225\245\343\201\270\343\201\256\345\257\276\345\277\234.csv" @@ -0,0 +1,12 @@ +address,prefecture,city,town,rest +東京都西多摩郡日の出町大字平井2780番地,東京都,西多摩郡日の出町,大字平井,2780番地 +東京都西多摩郡日の出町平井2780番地,東京都,西多摩郡日の出町,大字平井,2780番地 +埼玉県南埼玉郡宮代町大字東粂原110,埼玉県,南埼玉郡宮代町,大字東粂原,110 +埼玉県南埼玉郡宮代町東粂原110,埼玉県,南埼玉郡宮代町,大字東粂原,110 +愛知県名古屋市守山区大字上志段味馬洗淵,愛知県,名古屋市守山区,大字上志段味,馬洗淵 +愛知県名古屋市守山区上志段味馬洗淵,愛知県,名古屋市守山区,大字上志段味,馬洗淵 +# 住居表示一部実施済みだが大字も残っている地域の場合 +福岡県福岡市南区大字桧原853-9,福岡県,福岡市南区,大字桧原,853-9 +福岡県福岡市南区桧原6-44-20,福岡県,福岡市南区,桧原六丁目,44-20 +福岡県遠賀郡遠賀町浅木463,福岡県,遠賀郡遠賀町,大字浅木,463 +福岡県遠賀郡遠賀町浅木1-16,福岡県,遠賀郡遠賀町,浅木一丁目,16 \ No newline at end of file