Skip to content

Commit

Permalink
Merge pull request #238 from YuukiToriyama/release/v0.1.0-beta.15
Browse files Browse the repository at this point in the history
release/v0.1.0-beta.15をmainブランチにマージ
  • Loading branch information
YuukiToriyama authored Apr 21, 2024
2 parents 5d80f07 + 9c8f9fa commit ee68ffd
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 61 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/upload-pypi-org.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ on:
push:
tags:
- 'v*'
workflow_dispatch:

permissions:
contents: read
Expand Down Expand Up @@ -31,6 +32,9 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: '3.10'
# 0.12.0にするとppc64leでビルドに失敗するため、0.11.0を手動インストールしている
- name: Install Zig 0.11.0
run: python3 -m pip install ziglang==0.11.0
- name: Build wheels
uses: PyO3/maturin-action@v1
with:
Expand Down Expand Up @@ -120,7 +124,6 @@ jobs:
name: Release
runs-on: ubuntu-latest
environment: pypi
if: "startsWith(github.ref, 'refs/tags/')"
needs: [linux, windows, macos, sdist]
steps:
- uses: actions/download-artifact@v4
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ members = [
resolver = "2"

[workspace.package]
version = "0.1.0-beta.14"
version = "0.1.0-beta.15"
edition = "2021"
description = "A Rust Library to parse japanese addresses."
repository = "https://github.com/YuukiToriyama/japanese-address-parser"
Expand Down
146 changes: 87 additions & 59 deletions core/src/util/sequence_matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,15 @@ pub enum Error {
impl SequenceMatcher {
pub fn get_most_similar_match(
input: &str,
possibilities: &Vec<String>,
possibilities: &[String],
threshold: Option<f64>,
) -> Result<String, Error> {
let mut highest_similarity: f64 = 0.0;
let mut highest_matches: Vec<String> = vec![];
let length_of_longest_possibility = possibilities.iter().map(|x| x.len()).max().unwrap();
let length_of_longest_possibility = Self::get_length_of_longest_one(possibilities).unwrap();
let input = Self::cut_text(input, length_of_longest_possibility);
for possibility in possibilities {
let similarity = Self::evaluate_match_ratio(
possibility,
if input.len() > length_of_longest_possibility {
input.get(0..length_of_longest_possibility).unwrap()
} else {
input
},
);
let similarity = Self::evaluate_match_ratio(possibility, &input);
if similarity >= highest_similarity {
if similarity > highest_similarity {
highest_matches.clear();
Expand All @@ -43,6 +37,18 @@ impl SequenceMatcher {
}
}

fn get_length_of_longest_one(text_list: &[String]) -> Option<usize> {
text_list.iter().map(|x| x.chars().count()).max()
}

fn cut_text(input: &str, length: usize) -> String {
if input.chars().count() > length {
input.chars().take(length).collect::<String>()
} else {
input.to_string()
}
}

fn evaluate_match_ratio(left: &str, right: &str) -> f64 {
if left == right {
return 1.0;
Expand All @@ -56,6 +62,24 @@ mod tests {
use crate::util::sequence_matcher::Error::{MoreThanOneCandidateExist, NoCandidateExist};
use crate::util::sequence_matcher::SequenceMatcher;

#[test]
fn get_length_of_longest_one() {
assert_eq!(SequenceMatcher::get_length_of_longest_one(&vec![]), None);
assert_eq!(
SequenceMatcher::get_length_of_longest_one(&generate_city_name_list()),
Some(8)
);
}

#[test]
fn cut_text() {
let city_name = "南会津郡檜枝岐村";
assert_eq!(SequenceMatcher::cut_text(city_name, 0), "");
assert_eq!(SequenceMatcher::cut_text(city_name, 1), "南");
assert_eq!(SequenceMatcher::cut_text(city_name, 8), "南会津郡檜枝岐村");
assert_eq!(SequenceMatcher::cut_text(city_name, 9), "南会津郡檜枝岐村");
}

#[test]
fn evaluate_match_ratio_一致度100() {
assert_eq!(
Expand All @@ -82,7 +106,58 @@ mod tests {

#[test]
fn get_most_similar_match() {
let possibilities = vec![
let possibilities = generate_city_name_list();
let result = SequenceMatcher::get_most_similar_match(
"西郷村大字熊倉字折口原40番地",
&possibilities,
None,
);
assert!(result.is_ok());
assert_eq!(result.unwrap(), "西白河郡西郷村");
let result = SequenceMatcher::get_most_similar_match(
"小野町大字小野新町字舘廻",
&possibilities,
None,
);
assert!(result.is_ok());
assert_eq!(result.unwrap(), "田村郡小野町");
let result = SequenceMatcher::get_most_similar_match(
"桑折町大字谷地字道下22番地7",
&possibilities,
None,
);
assert!(result.is_ok());
assert_eq!(result.unwrap(), "伊達郡桑折町");
}

#[test]
fn get_most_similar_match_類似度が同じものが複数ある場合() {
let possibilities = vec!["周智郡森町".to_string(), "茅部郡森町".to_string()];
assert_eq!(
SequenceMatcher::evaluate_match_ratio("森町", &possibilities[0]),
SequenceMatcher::evaluate_match_ratio("森町", &possibilities[1])
);
let result = SequenceMatcher::get_most_similar_match("森町", &possibilities, None);
assert!(result.is_err());
assert_eq!(
result.err().unwrap(),
MoreThanOneCandidateExist(vec!["周智郡森町".to_string(), "茅部郡森町".to_string()])
);
}

#[test]
fn get_most_similar_match_マッチ候補が一つもない場合() {
let result = SequenceMatcher::get_most_similar_match(
"上町",
&vec!["上村".to_string(), "下町".to_string()],
Some(0.9),
);
assert!(result.is_err());
assert_eq!(result.err().unwrap(), NoCandidateExist);
}

fn generate_city_name_list() -> Vec<String> {
vec![
"福島市".to_string(),
"会津若松市".to_string(),
"郡山市".to_string(),
Expand Down Expand Up @@ -142,53 +217,6 @@ mod tests {
"双葉郡葛尾村".to_string(),
"相馬郡新地町".to_string(),
"相馬郡飯舘村".to_string(),
];
let result = SequenceMatcher::get_most_similar_match(
"西郷村大字熊倉字折口原40番地",
&possibilities,
None,
);
assert!(result.is_ok());
assert_eq!(result.unwrap(), "西白河郡西郷村");
let result = SequenceMatcher::get_most_similar_match(
"小野町大字小野新町字舘廻",
&possibilities,
None,
);
assert!(result.is_ok());
assert_eq!(result.unwrap(), "田村郡小野町");
let result = SequenceMatcher::get_most_similar_match(
"桑折町大字谷地字道下22番地7",
&possibilities,
None,
);
assert!(result.is_ok());
assert_eq!(result.unwrap(), "伊達郡桑折町");
}

#[test]
fn get_most_similar_match_類似度が同じものが複数ある場合() {
let possibilities = vec!["周智郡森町".to_string(), "茅部郡森町".to_string()];
assert_eq!(
SequenceMatcher::evaluate_match_ratio("森町", &possibilities[0]),
SequenceMatcher::evaluate_match_ratio("森町", &possibilities[1])
);
let result = SequenceMatcher::get_most_similar_match("森町", &possibilities, None);
assert!(result.is_err());
assert_eq!(
result.err().unwrap(),
MoreThanOneCandidateExist(vec!["周智郡森町".to_string(), "茅部郡森町".to_string()])
);
}

#[test]
fn get_most_similar_match_マッチ候補が一つもない場合() {
let result = SequenceMatcher::get_most_similar_match(
"上町",
&vec!["上村".to_string(), "下町".to_string()],
Some(0.9),
);
assert!(result.is_err());
assert_eq!(result.err().unwrap(), NoCandidateExist);
]
}
}

0 comments on commit ee68ffd

Please sign in to comment.