From a9fc843aaba7bd142d38f61cae2644a03bd3d059 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sun, 21 Apr 2024 08:59:06 +0900 Subject: [PATCH 1/7] =?UTF-8?q?fix:=20#236:=20`get=5Fmost=5Fsimilar=5Fmatc?= =?UTF-8?q?h()`=E3=81=AB=E3=81=8A=E3=81=84=E3=81=A6=E6=96=87=E5=AD=97?= =?UTF-8?q?=E6=95=B0=E3=82=92=E8=AA=A4=E3=81=A3=E3=81=A6=E3=82=AB=E3=82=A6?= =?UTF-8?q?=E3=83=B3=E3=83=88=E3=81=97=E3=81=A6=E3=81=84=E3=82=8B=E5=95=8F?= =?UTF-8?q?=E9=A1=8C=E3=82=92=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/src/util/sequence_matcher.rs | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/core/src/util/sequence_matcher.rs b/core/src/util/sequence_matcher.rs index 06c9e18a..54acd874 100644 --- a/core/src/util/sequence_matcher.rs +++ b/core/src/util/sequence_matcher.rs @@ -16,16 +16,10 @@ impl SequenceMatcher { ) -> Result { let mut highest_similarity: f64 = 0.0; let mut highest_matches: Vec = vec![]; - let length_of_longest_possibility = possibilities.iter().map(|x| x.len()).max().unwrap(); + let length_of_longest_possibility = Self::get_length_of_longest_one(possibilities).unwrap(); + let input = Self::cut_text(input, length_of_longest_possibility); for possibility in possibilities { - let similarity = Self::evaluate_match_ratio( - possibility, - if input.len() > length_of_longest_possibility { - input.get(0..length_of_longest_possibility).unwrap() - } else { - input - }, - ); + let similarity = Self::evaluate_match_ratio(possibility, &input); if similarity >= highest_similarity { if similarity > highest_similarity { highest_matches.clear(); @@ -43,6 +37,18 @@ impl SequenceMatcher { } } + fn get_length_of_longest_one(text_list: &Vec) -> Option { + text_list.iter().map(|x| x.chars().count()).max() + } + + fn cut_text(input: &str, length: usize) -> String { + if input.chars().count() > length { + input.chars().take(length).collect::() + } else { + input.to_string() + } + } + fn evaluate_match_ratio(left: &str, right: &str) -> f64 { if left == right { return 1.0; From 80d61b26684c9ed24fa33dd4b4840265a0291cce Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sun, 21 Apr 2024 09:00:09 +0900 Subject: [PATCH 2/7] =?UTF-8?q?fix:=20#236:=20=E6=96=87=E5=AD=97=E5=88=97?= =?UTF-8?q?=E3=81=AE=E5=88=87=E3=82=8A=E5=87=BA=E3=81=97=E5=87=A6=E7=90=86?= =?UTF-8?q?=E3=81=AB=E4=BD=BF=E7=94=A8=E3=81=99=E3=82=8B=E3=83=A1=E3=82=BD?= =?UTF-8?q?=E3=83=83=E3=83=89=E3=81=AB=E5=AF=BE=E3=81=99=E3=82=8B=E3=83=86?= =?UTF-8?q?=E3=82=B9=E3=83=88=E3=82=B3=E3=83=BC=E3=83=89=E3=82=92=E8=BF=BD?= =?UTF-8?q?=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/src/util/sequence_matcher.rs | 120 ++++++++++++++++++------------ 1 file changed, 71 insertions(+), 49 deletions(-) diff --git a/core/src/util/sequence_matcher.rs b/core/src/util/sequence_matcher.rs index 54acd874..1f8e92fd 100644 --- a/core/src/util/sequence_matcher.rs +++ b/core/src/util/sequence_matcher.rs @@ -62,6 +62,24 @@ mod tests { use crate::util::sequence_matcher::Error::{MoreThanOneCandidateExist, NoCandidateExist}; use crate::util::sequence_matcher::SequenceMatcher; + #[test] + fn get_length_of_longest_one() { + assert_eq!(SequenceMatcher::get_length_of_longest_one(&vec![]), None); + assert_eq!( + SequenceMatcher::get_length_of_longest_one(&generate_city_name_list()), + Some(8) + ); + } + + #[test] + fn cut_text() { + let city_name = "南会津郡檜枝岐村"; + assert_eq!(SequenceMatcher::cut_text(city_name, 0), ""); + assert_eq!(SequenceMatcher::cut_text(city_name, 1), "南"); + assert_eq!(SequenceMatcher::cut_text(city_name, 8), "南会津郡檜枝岐村"); + assert_eq!(SequenceMatcher::cut_text(city_name, 9), "南会津郡檜枝岐村"); + } + #[test] fn evaluate_match_ratio_一致度100() { assert_eq!( @@ -88,7 +106,58 @@ mod tests { #[test] fn get_most_similar_match() { - let possibilities = vec![ + let possibilities = generate_city_name_list(); + let result = SequenceMatcher::get_most_similar_match( + "西郷村大字熊倉字折口原40番地", + &possibilities, + None, + ); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), "西白河郡西郷村"); + let result = SequenceMatcher::get_most_similar_match( + "小野町大字小野新町字舘廻", + &possibilities, + None, + ); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), "田村郡小野町"); + let result = SequenceMatcher::get_most_similar_match( + "桑折町大字谷地字道下22番地7", + &possibilities, + None, + ); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), "伊達郡桑折町"); + } + + #[test] + fn get_most_similar_match_類似度が同じものが複数ある場合() { + let possibilities = vec!["周智郡森町".to_string(), "茅部郡森町".to_string()]; + assert_eq!( + SequenceMatcher::evaluate_match_ratio("森町", &possibilities[0]), + SequenceMatcher::evaluate_match_ratio("森町", &possibilities[1]) + ); + let result = SequenceMatcher::get_most_similar_match("森町", &possibilities, None); + assert!(result.is_err()); + assert_eq!( + result.err().unwrap(), + MoreThanOneCandidateExist(vec!["周智郡森町".to_string(), "茅部郡森町".to_string()]) + ); + } + + #[test] + fn get_most_similar_match_マッチ候補が一つもない場合() { + let result = SequenceMatcher::get_most_similar_match( + "上町", + &vec!["上村".to_string(), "下町".to_string()], + Some(0.9), + ); + assert!(result.is_err()); + assert_eq!(result.err().unwrap(), NoCandidateExist); + } + + fn generate_city_name_list() -> Vec { + vec![ "福島市".to_string(), "会津若松市".to_string(), "郡山市".to_string(), @@ -148,53 +217,6 @@ mod tests { "双葉郡葛尾村".to_string(), "相馬郡新地町".to_string(), "相馬郡飯舘村".to_string(), - ]; - let result = SequenceMatcher::get_most_similar_match( - "西郷村大字熊倉字折口原40番地", - &possibilities, - None, - ); - assert!(result.is_ok()); - assert_eq!(result.unwrap(), "西白河郡西郷村"); - let result = SequenceMatcher::get_most_similar_match( - "小野町大字小野新町字舘廻", - &possibilities, - None, - ); - assert!(result.is_ok()); - assert_eq!(result.unwrap(), "田村郡小野町"); - let result = SequenceMatcher::get_most_similar_match( - "桑折町大字谷地字道下22番地7", - &possibilities, - None, - ); - assert!(result.is_ok()); - assert_eq!(result.unwrap(), "伊達郡桑折町"); - } - - #[test] - fn get_most_similar_match_類似度が同じものが複数ある場合() { - let possibilities = vec!["周智郡森町".to_string(), "茅部郡森町".to_string()]; - assert_eq!( - SequenceMatcher::evaluate_match_ratio("森町", &possibilities[0]), - SequenceMatcher::evaluate_match_ratio("森町", &possibilities[1]) - ); - let result = SequenceMatcher::get_most_similar_match("森町", &possibilities, None); - assert!(result.is_err()); - assert_eq!( - result.err().unwrap(), - MoreThanOneCandidateExist(vec!["周智郡森町".to_string(), "茅部郡森町".to_string()]) - ); - } - - #[test] - fn get_most_similar_match_マッチ候補が一つもない場合() { - let result = SequenceMatcher::get_most_similar_match( - "上町", - &vec!["上村".to_string(), "下町".to_string()], - Some(0.9), - ); - assert!(result.is_err()); - assert_eq!(result.err().unwrap(), NoCandidateExist); + ] } } From 5d358036b6ad99ce54a8d97fca4584e92d3e07d4 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sun, 21 Apr 2024 09:04:15 +0900 Subject: [PATCH 3/7] =?UTF-8?q?fix:=20#236:=20clippy=E6=8C=87=E6=91=98?= =?UTF-8?q?=E5=AF=BE=E5=BF=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/src/util/sequence_matcher.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/util/sequence_matcher.rs b/core/src/util/sequence_matcher.rs index 1f8e92fd..b21923dc 100644 --- a/core/src/util/sequence_matcher.rs +++ b/core/src/util/sequence_matcher.rs @@ -11,7 +11,7 @@ pub enum Error { impl SequenceMatcher { pub fn get_most_similar_match( input: &str, - possibilities: &Vec, + possibilities: &[String], threshold: Option, ) -> Result { let mut highest_similarity: f64 = 0.0; @@ -37,7 +37,7 @@ impl SequenceMatcher { } } - fn get_length_of_longest_one(text_list: &Vec) -> Option { + fn get_length_of_longest_one(text_list: &[String]) -> Option { text_list.iter().map(|x| x.chars().count()).max() } From 6cbece84613cf592c2534331690b5d39bc6d1cd1 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sun, 21 Apr 2024 09:07:58 +0900 Subject: [PATCH 4/7] =?UTF-8?q?package.version=E3=81=AE=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 0.1.0-beta.15 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 90f03711..7d230a83 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ members = [ resolver = "2" [workspace.package] -version = "0.1.0-beta.14" +version = "0.1.0-beta.15" edition = "2021" description = "A Rust Library to parse japanese addresses." repository = "https://github.com/YuukiToriyama/japanese-address-parser" From bc13fb0144f78ec927644cf172aac77c2aec6839 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sun, 21 Apr 2024 20:40:23 +0900 Subject: [PATCH 5/7] =?UTF-8?q?fix:=20linux=E5=90=91=E3=81=91=E3=81=AEmatu?= =?UTF-8?q?rin=20build=E3=81=8C=E5=A4=B1=E6=95=97=E3=81=99=E3=82=8B:=20zig?= =?UTF-8?q?lang=E3=81=AE=E3=83=90=E3=83=BC=E3=82=B8=E3=83=A7=E3=83=B3?= =?UTF-8?q?=E3=82=920.11.0=E3=81=AB=E5=9B=BA=E5=AE=9A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `PyO3/maturin-action`はziglangがインストールされていないとき最新版をインストールするが、0.12.0にするとppc64leアーキテクチャでビルドが失敗するため、明示的に0.11.0をインストールする --- .github/workflows/upload-pypi-org.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/upload-pypi-org.yaml b/.github/workflows/upload-pypi-org.yaml index 590aef68..dc1d78f4 100644 --- a/.github/workflows/upload-pypi-org.yaml +++ b/.github/workflows/upload-pypi-org.yaml @@ -31,6 +31,9 @@ jobs: - uses: actions/setup-python@v5 with: python-version: '3.10' + # 0.12.0にするとppc64leでビルドに失敗するため、0.11.0を手動インストールしている + - name: Install Zig 0.11.0 + run: python3 -m pip install ziglang==0.11.0 - name: Build wheels uses: PyO3/maturin-action@v1 with: From 26b99c62690b435aa03d540b482a0b50845c9952 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sun, 21 Apr 2024 20:49:12 +0900 Subject: [PATCH 6/7] =?UTF-8?q?fix:=20linux=E5=90=91=E3=81=91=E3=81=AEmatu?= =?UTF-8?q?rin=20build=E3=81=8C=E5=A4=B1=E6=95=97=E3=81=99=E3=82=8B:=20?= =?UTF-8?q?=E3=83=AF=E3=83=BC=E3=82=AF=E3=83=95=E3=83=AD=E3=83=BC=E3=81=AE?= =?UTF-8?q?=E3=83=88=E3=83=AA=E3=82=AC=E3=83=BC=E3=81=AB`workflow=5Fdispat?= =?UTF-8?q?ch`=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ワークフローが失敗した時に手動で実行できるようにした --- .github/workflows/upload-pypi-org.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/upload-pypi-org.yaml b/.github/workflows/upload-pypi-org.yaml index dc1d78f4..325d2144 100644 --- a/.github/workflows/upload-pypi-org.yaml +++ b/.github/workflows/upload-pypi-org.yaml @@ -4,6 +4,7 @@ on: push: tags: - 'v*' + workflow_dispatch: permissions: contents: read From 9c8f9fa334c0862325d12a4a8e9abc166975896c Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sun, 21 Apr 2024 21:01:16 +0900 Subject: [PATCH 7/7] =?UTF-8?q?fix:=20linux=E5=90=91=E3=81=91=E3=81=AEmatu?= =?UTF-8?q?rin=20build=E3=81=8C=E5=A4=B1=E6=95=97=E3=81=99=E3=82=8B:=20pyp?= =?UTF-8?q?i.org=E3=81=B8=E3=81=AE=E3=82=A2=E3=83=83=E3=83=97=E3=83=AD?= =?UTF-8?q?=E3=83=BC=E3=83=89=E3=82=B8=E3=83=A7=E3=83=96=E3=81=AB=E5=AE=9F?= =?UTF-8?q?=E8=A1=8C=E6=9D=A1=E4=BB=B6=E3=82=92=E3=81=A4=E3=81=91=E3=81=A6?= =?UTF-8?q?=E3=81=84=E3=81=9F=E3=81=8C=E4=B8=8D=E8=A6=81=E3=81=AA=E3=81=9F?= =?UTF-8?q?=E3=82=81=E5=89=8A=E9=99=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/upload-pypi-org.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/upload-pypi-org.yaml b/.github/workflows/upload-pypi-org.yaml index 325d2144..675d22a6 100644 --- a/.github/workflows/upload-pypi-org.yaml +++ b/.github/workflows/upload-pypi-org.yaml @@ -124,7 +124,6 @@ jobs: name: Release runs-on: ubuntu-latest environment: pypi - if: "startsWith(github.ref, 'refs/tags/')" needs: [linux, windows, macos, sdist] steps: - uses: actions/download-artifact@v4