diff --git a/.github/workflows/upload-pypi-org.yaml b/.github/workflows/upload-pypi-org.yaml index 590aef68..675d22a6 100644 --- a/.github/workflows/upload-pypi-org.yaml +++ b/.github/workflows/upload-pypi-org.yaml @@ -4,6 +4,7 @@ on: push: tags: - 'v*' + workflow_dispatch: permissions: contents: read @@ -31,6 +32,9 @@ jobs: - uses: actions/setup-python@v5 with: python-version: '3.10' + # 0.12.0にするとppc64leでビルドに失敗するため、0.11.0を手動インストールしている + - name: Install Zig 0.11.0 + run: python3 -m pip install ziglang==0.11.0 - name: Build wheels uses: PyO3/maturin-action@v1 with: @@ -120,7 +124,6 @@ jobs: name: Release runs-on: ubuntu-latest environment: pypi - if: "startsWith(github.ref, 'refs/tags/')" needs: [linux, windows, macos, sdist] steps: - uses: actions/download-artifact@v4 diff --git a/Cargo.toml b/Cargo.toml index 90f03711..7d230a83 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ members = [ resolver = "2" [workspace.package] -version = "0.1.0-beta.14" +version = "0.1.0-beta.15" edition = "2021" description = "A Rust Library to parse japanese addresses." repository = "https://github.com/YuukiToriyama/japanese-address-parser" diff --git a/core/src/util/sequence_matcher.rs b/core/src/util/sequence_matcher.rs index 06c9e18a..b21923dc 100644 --- a/core/src/util/sequence_matcher.rs +++ b/core/src/util/sequence_matcher.rs @@ -11,21 +11,15 @@ pub enum Error { impl SequenceMatcher { pub fn get_most_similar_match( input: &str, - possibilities: &Vec, + possibilities: &[String], threshold: Option, ) -> Result { let mut highest_similarity: f64 = 0.0; let mut highest_matches: Vec = vec![]; - let length_of_longest_possibility = possibilities.iter().map(|x| x.len()).max().unwrap(); + let length_of_longest_possibility = Self::get_length_of_longest_one(possibilities).unwrap(); + let input = Self::cut_text(input, length_of_longest_possibility); for possibility in possibilities { - let similarity = Self::evaluate_match_ratio( - possibility, - if input.len() > length_of_longest_possibility { - input.get(0..length_of_longest_possibility).unwrap() - } else { - input - }, - ); + let similarity = Self::evaluate_match_ratio(possibility, &input); if similarity >= highest_similarity { if similarity > highest_similarity { highest_matches.clear(); @@ -43,6 +37,18 @@ impl SequenceMatcher { } } + fn get_length_of_longest_one(text_list: &[String]) -> Option { + text_list.iter().map(|x| x.chars().count()).max() + } + + fn cut_text(input: &str, length: usize) -> String { + if input.chars().count() > length { + input.chars().take(length).collect::() + } else { + input.to_string() + } + } + fn evaluate_match_ratio(left: &str, right: &str) -> f64 { if left == right { return 1.0; @@ -56,6 +62,24 @@ mod tests { use crate::util::sequence_matcher::Error::{MoreThanOneCandidateExist, NoCandidateExist}; use crate::util::sequence_matcher::SequenceMatcher; + #[test] + fn get_length_of_longest_one() { + assert_eq!(SequenceMatcher::get_length_of_longest_one(&vec![]), None); + assert_eq!( + SequenceMatcher::get_length_of_longest_one(&generate_city_name_list()), + Some(8) + ); + } + + #[test] + fn cut_text() { + let city_name = "南会津郡檜枝岐村"; + assert_eq!(SequenceMatcher::cut_text(city_name, 0), ""); + assert_eq!(SequenceMatcher::cut_text(city_name, 1), "南"); + assert_eq!(SequenceMatcher::cut_text(city_name, 8), "南会津郡檜枝岐村"); + assert_eq!(SequenceMatcher::cut_text(city_name, 9), "南会津郡檜枝岐村"); + } + #[test] fn evaluate_match_ratio_一致度100() { assert_eq!( @@ -82,7 +106,58 @@ mod tests { #[test] fn get_most_similar_match() { - let possibilities = vec![ + let possibilities = generate_city_name_list(); + let result = SequenceMatcher::get_most_similar_match( + "西郷村大字熊倉字折口原40番地", + &possibilities, + None, + ); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), "西白河郡西郷村"); + let result = SequenceMatcher::get_most_similar_match( + "小野町大字小野新町字舘廻", + &possibilities, + None, + ); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), "田村郡小野町"); + let result = SequenceMatcher::get_most_similar_match( + "桑折町大字谷地字道下22番地7", + &possibilities, + None, + ); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), "伊達郡桑折町"); + } + + #[test] + fn get_most_similar_match_類似度が同じものが複数ある場合() { + let possibilities = vec!["周智郡森町".to_string(), "茅部郡森町".to_string()]; + assert_eq!( + SequenceMatcher::evaluate_match_ratio("森町", &possibilities[0]), + SequenceMatcher::evaluate_match_ratio("森町", &possibilities[1]) + ); + let result = SequenceMatcher::get_most_similar_match("森町", &possibilities, None); + assert!(result.is_err()); + assert_eq!( + result.err().unwrap(), + MoreThanOneCandidateExist(vec!["周智郡森町".to_string(), "茅部郡森町".to_string()]) + ); + } + + #[test] + fn get_most_similar_match_マッチ候補が一つもない場合() { + let result = SequenceMatcher::get_most_similar_match( + "上町", + &vec!["上村".to_string(), "下町".to_string()], + Some(0.9), + ); + assert!(result.is_err()); + assert_eq!(result.err().unwrap(), NoCandidateExist); + } + + fn generate_city_name_list() -> Vec { + vec![ "福島市".to_string(), "会津若松市".to_string(), "郡山市".to_string(), @@ -142,53 +217,6 @@ mod tests { "双葉郡葛尾村".to_string(), "相馬郡新地町".to_string(), "相馬郡飯舘村".to_string(), - ]; - let result = SequenceMatcher::get_most_similar_match( - "西郷村大字熊倉字折口原40番地", - &possibilities, - None, - ); - assert!(result.is_ok()); - assert_eq!(result.unwrap(), "西白河郡西郷村"); - let result = SequenceMatcher::get_most_similar_match( - "小野町大字小野新町字舘廻", - &possibilities, - None, - ); - assert!(result.is_ok()); - assert_eq!(result.unwrap(), "田村郡小野町"); - let result = SequenceMatcher::get_most_similar_match( - "桑折町大字谷地字道下22番地7", - &possibilities, - None, - ); - assert!(result.is_ok()); - assert_eq!(result.unwrap(), "伊達郡桑折町"); - } - - #[test] - fn get_most_similar_match_類似度が同じものが複数ある場合() { - let possibilities = vec!["周智郡森町".to_string(), "茅部郡森町".to_string()]; - assert_eq!( - SequenceMatcher::evaluate_match_ratio("森町", &possibilities[0]), - SequenceMatcher::evaluate_match_ratio("森町", &possibilities[1]) - ); - let result = SequenceMatcher::get_most_similar_match("森町", &possibilities, None); - assert!(result.is_err()); - assert_eq!( - result.err().unwrap(), - MoreThanOneCandidateExist(vec!["周智郡森町".to_string(), "茅部郡森町".to_string()]) - ); - } - - #[test] - fn get_most_similar_match_マッチ候補が一つもない場合() { - let result = SequenceMatcher::get_most_similar_match( - "上町", - &vec!["上村".to_string(), "下町".to_string()], - Some(0.9), - ); - assert!(result.is_err()); - assert_eq!(result.err().unwrap(), NoCandidateExist); + ] } }