Skip to content

Commit

Permalink
Merge pull request #488 from YuukiToriyama/main
Browse files Browse the repository at this point in the history
chimei-ruiju.orgへの対応: main(v0.1.21)との差分を取り込み
  • Loading branch information
YuukiToriyama authored Oct 31, 2024
2 parents 4bccc95 + ee97b64 commit 4915a3a
Show file tree
Hide file tree
Showing 14 changed files with 750 additions and 16 deletions.
1 change: 1 addition & 0 deletions .github/workflows/run-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ jobs:
run: |
cargo test
cargo test --features=blocking
cargo test --features=experimental
- name: Integration test
working-directory: tests
run: cargo test
Expand Down
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ members = [
resolver = "2"

[workspace.package]
version = "0.1.19"
version = "0.1.21"
edition = "2021"
description = "A Rust Library to parse japanese addresses."
repository = "https://github.com/YuukiToriyama/japanese-address-parser"
Expand All @@ -18,6 +18,7 @@ keywords = ["parser", "geo", "wasm"]
categories = ["parser-implementations", "wasm"]

[workspace.dependencies]
log = "0.4.22"
serde = { version = "1.0.192", features = ["derive"] }
tokio = { version = "1.38.0", features = ["rt", "macros"] }
wasm-bindgen = "0.2.92"
Expand Down
7 changes: 7 additions & 0 deletions core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,15 @@ blocking = ["reqwest/blocking"]
city-name-correction = []
format-house-number = []
eliminate-whitespaces = []
experimental = []

[[bench]]
name = "core_benchmark"
harness = false

[dependencies]
itertools = "0.13.0"
log.workspace = true
rapidfuzz = "0.5.0"
regex = { version = "1.10.6", default-features = false, features = ["std", "unicode-perl"] }
serde.workspace = true
Expand All @@ -43,3 +45,8 @@ wasm-bindgen-test = { workspace = true }

[target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies]
mockito = "1.4.0" # mockitoがwasm32に対応していないため

[package.metadata.docs.rs]
all-features = true
targets = ["x86_64-unknown-linux-gnu"]
rustdoc-args = ["--cfg", "docsrs"]
6 changes: 3 additions & 3 deletions core/src/domain/common/latlng.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#[derive(Clone, Debug, PartialEq)]
pub struct LatLng {
/// 緯度
latitude: f64,
/// 軽度
longitude: f64,
pub(crate) latitude: f64,
/// 経度
pub(crate) longitude: f64,
}
76 changes: 76 additions & 0 deletions core/src/domain/common/token.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use crate::domain::common::latlng::LatLng;
use std::cmp::Ordering;
use std::cmp::Ordering::{Equal, Greater, Less};

#[derive(Clone, Debug, PartialEq)]
pub enum Token {
Expand All @@ -8,6 +10,37 @@ pub enum Token {
Rest(String),
}

impl PartialOrd for Token {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
match self {
Token::Prefecture(_) => match other {
Token::Prefecture(_) => Some(Equal),
Token::City(_) => Some(Less),
Token::Town(_) => Some(Less),
Token::Rest(_) => Some(Less),
},
Token::City(_) => match other {
Token::Prefecture(_) => Some(Greater),
Token::City(_) => Some(Equal),
Token::Town(_) => Some(Less),
Token::Rest(_) => Some(Less),
},
Token::Town(_) => match other {
Token::Prefecture(_) => Some(Greater),
Token::City(_) => Some(Greater),
Token::Town(_) => Some(Equal),
Token::Rest(_) => Some(Less),
},
Token::Rest(_) => match other {
Token::Prefecture(_) => Some(Greater),
Token::City(_) => Some(Greater),
Token::Town(_) => Some(Greater),
Token::Rest(_) => Some(Equal),
},
}
}
}

#[derive(Debug, PartialEq, Clone)]
pub(crate) struct Prefecture {
pub(crate) prefecture_name: String,
Expand All @@ -29,3 +62,46 @@ pub(crate) struct Town {
pub(crate) fn append_token(tokens: &[Token], token: Token) -> Vec<Token> {
[tokens.to_owned(), vec![token]].concat()
}

#[cfg(test)]
mod tests {
use crate::domain::common::token::{City, Prefecture, Token, Town};

#[test]
fn sort_token_vector() {
let mut tokens = vec![
Token::Rest("2-1".to_string()),
Token::City(City {
city_name: "小金井市".to_string(),
representative_point: None,
}),
Token::Prefecture(Prefecture {
prefecture_name: "東京都".to_string(),
representative_point: None,
}),
Token::Town(Town {
town_name: "貫井北町四丁目".to_string(),
representative_point: None,
}),
];
tokens.sort_by(|a, b| a.partial_cmp(b).unwrap());
assert_eq!(
tokens,
vec![
Token::Prefecture(Prefecture {
prefecture_name: "東京都".to_string(),
representative_point: None,
}),
Token::City(City {
city_name: "小金井市".to_string(),
representative_point: None,
}),
Token::Town(Town {
town_name: "貫井北町四丁目".to_string(),
representative_point: None,
}),
Token::Rest("2-1".to_string()),
]
);
}
}
10 changes: 10 additions & 0 deletions core/src/experimental.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
//! 🚧 Experimental module 🚧
//!
//! This module contains unstable functions.
//!
//! Please note that these functions may be removed or changed disruptively without any announcement.
//!
//! If you are eager to use this module, please enable `experimental` feature flag.

mod parse_with_geolonia;
pub mod parser;
193 changes: 193 additions & 0 deletions core/src/experimental/parse_with_geolonia.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
use crate::api::AsyncApi;
use crate::domain::common::token::Token;
use crate::experimental::parser::Parser;
use crate::tokenizer::Tokenizer;

impl Parser {
#[inline]
pub(crate) async fn parse_with_geolonia(&self, address: &str) -> Vec<Token> {
let geolonia_api = AsyncApi::default();
let tokenizer = Tokenizer::new(address);

// 都道府県名の検出
let (prefecture, tokenizer) = match tokenizer.read_prefecture() {
Ok(found) => found,
Err(not_found) => {
if self.options.verbose {
log::error!("都道府県名の検出に失敗しました")
}
return not_found.tokens;
}
};

// 市区町村名の検出
let prefecture_master = match geolonia_api
.get_prefecture_master(prefecture.name_ja())
.await
{
Ok(result) => result,
Err(error) => {
if self.options.verbose {
log::error!("{}", error.error_message)
}
return tokenizer.finish().tokens;
}
};
let (city_name, tokenizer) = match tokenizer.read_city(&prefecture_master.cities) {
Ok(found) => found,
Err(not_found) => {
if self.options.correct_incomplete_city_names {
match not_found.read_city_with_county_name_completion(&prefecture_master.cities)
{
Ok(result) => result,
Err(not_found) => {
if self.options.verbose {
log::error!("市区町村名の検出に失敗しました")
}
return not_found.tokens;
}
}
} else {
if self.options.verbose {
log::error!("市区町村名の検出に失敗しました")
}
return not_found.finish().tokens;
}
}
};

// 町名の検出
let city_master = match geolonia_api
.get_city_master(prefecture.name_ja(), &city_name)
.await
{
Ok(result) => result,
Err(error) => {
if self.options.verbose {
log::error!("{}", error.error_message)
}
return tokenizer.finish().tokens;
}
};
let (_, tokenizer) =
match tokenizer.read_town(city_master.towns.iter().map(|x| x.name.clone()).collect()) {
Ok(found) => found,
Err(not_found) => {
if self.options.verbose {
log::error!("町名の検出に失敗しました")
}
return not_found.tokens;
}
};

tokenizer.finish().tokens
}
}

#[cfg(test)]
mod tests {
use crate::domain::common::token::{City, Prefecture, Token, Town};
use crate::experimental::parser::{DataSource, Parser, ParserOptions};

#[tokio::test]
async fn 都道府県名が誤っている場合() {
let parser = Parser {
options: ParserOptions {
data_source: DataSource::Geolonia,
correct_incomplete_city_names: false,
verbose: false,
},
};
let result = parser
.parse_with_geolonia("奈川県横浜市磯子区洋光台3-10-3")
.await;
assert_eq!(
result,
vec![Token::Rest("奈川県横浜市磯子区洋光台3-10-3".to_string())]
)
}

#[tokio::test]
async fn 市区町村名が誤っている場合() {
let parser = Parser {
options: ParserOptions {
data_source: DataSource::Geolonia,
correct_incomplete_city_names: false,
verbose: false,
},
};
let result = parser
.parse_with_geolonia("神奈川県横浜県磯子市洋光台3-10-3")
.await;
assert_eq!(
result,
vec![
Token::Prefecture(Prefecture {
prefecture_name: "神奈川県".to_string(),
representative_point: None,
}),
Token::Rest("横浜県磯子市洋光台3-10-3".to_string())
]
)
}

#[tokio::test]
async fn 町名が誤っている場合() {
let parser = Parser {
options: ParserOptions {
data_source: DataSource::Geolonia,
correct_incomplete_city_names: false,
verbose: false,
},
};
let result = parser
.parse_with_geolonia("神奈川県横浜市磯子区陽光台3-10-3")
.await;
assert_eq!(
result,
vec![
Token::Prefecture(Prefecture {
prefecture_name: "神奈川県".to_string(),
representative_point: None,
}),
Token::City(City {
city_name: "横浜市磯子区".to_string(),
representative_point: None,
}),
Token::Rest("陽光台3-10-3".to_string())
]
)
}

#[tokio::test]
async fn パースに成功した場合() {
let parser = Parser {
options: ParserOptions {
data_source: DataSource::Geolonia,
correct_incomplete_city_names: false,
verbose: false,
},
};
let result = parser
.parse_with_geolonia("神奈川県横浜市磯子区洋光台3-10-3")
.await;
assert_eq!(
result,
vec![
Token::Prefecture(Prefecture {
prefecture_name: "神奈川県".to_string(),
representative_point: None,
}),
Token::City(City {
city_name: "横浜市磯子区".to_string(),
representative_point: None,
}),
Token::Town(Town {
town_name: "洋光台三丁目".to_string(),
representative_point: None,
}),
Token::Rest("10-3".to_string())
]
)
}
}
Loading

0 comments on commit 4915a3a

Please sign in to comment.