Skip to content

Commit

Permalink
Merge pull request #382 from YuukiToriyama/feature/format-house-numbe…
Browse files Browse the repository at this point in the history
…r/master

住居番号の正規化オプションをrelease/v0.1.11にマージ
  • Loading branch information
YuukiToriyama authored Aug 23, 2024
2 parents 060725c + 1003372 commit 5d7162a
Show file tree
Hide file tree
Showing 14 changed files with 298 additions and 93 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/ghpages.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ jobs:
run: wasm-pack test --firefox --headless
- name: Build wasm module
working-directory: wasm
run: wasm-pack build --target web --scope toriyama --out-name japanese_address_parser --features debug
run: |
wasm-pack build --target web --scope toriyama --out-name japanese_address_parser_debug --features debug
wasm-pack build --target web --scope toriyama --out-name japanese_address_parser_nightly --features nightly
wasm-pack build --target web --scope toriyama --out-name japanese_address_parser
- name: Move files
run: |
mkdir ./publish
Expand Down
5 changes: 4 additions & 1 deletion core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,18 @@ crate-type = ["rlib", "cdylib"]
default = ["city-name-correction"]
blocking = ["reqwest/blocking"]
city-name-correction = []
format-house-number = []

[dependencies]
itertools = "0.13.0"
js-sys = "0.3.67"
rapidfuzz = "0.5.0"
regex = "1.10.2"
serde.workspace = true
reqwest = { version = "0.12.5", default-features = false, features = ["json", "rustls-tls"] }

[target.'cfg(target_arch = "wasm32")'.dependencies]
js-sys = "0.3.67"

[dev-dependencies]
tokio.workspace = true
wasm-bindgen-test = { workspace = true }
Expand Down
1 change: 1 addition & 0 deletions core/src/formatter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub(crate) mod house_number;
84 changes: 84 additions & 0 deletions core/src/formatter/house_number.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#[cfg(not(target_arch = "wasm32"))]
pub(crate) fn format_house_number(input: &str) -> Result<String, &'static str> {
let captures = regex::Regex::new(r"(?<block_number>\d+)\D+(?<house_number>\d+)(?<rest>.*)$")
.unwrap()
.captures(input)
.ok_or("マッチするものがありませんでした")?;
let block_number = captures
.name("block_number")
.ok_or("街区符号を検出できませんでした")?;
let house_number = captures
.name("house_number")
.ok_or("住居番号を検出できませんでした")?;
let rest = match captures.name("rest") {
Some(matched) => matched.as_str(),
None => "",
};
Ok(format!(
"{}番{}号{}",
block_number.as_str(),
house_number.as_str(),
rest
))
}

#[cfg(target_arch = "wasm32")]
pub(crate) fn format_house_number(input: &str) -> Result<String, &'static str> {
let captures = js_sys::RegExp::new(
r"(?<block_number>\d+)\D+(?<house_number>\d+)(?<rest>.*)$",
"",
)
.exec(input)
.ok_or("マッチするものがありませんでした")?;
let block_number = captures
.get(1)
.as_string()
.ok_or("街区符号を検出できませんでした")?;
let house_number = captures
.get(2)
.as_string()
.ok_or("住居番号を検出できませんでした")?;
let rest = captures
.get(3)
.as_string()
.unwrap_or_else(|| "".to_string());
Ok(format!("{}番{}号{}", block_number, house_number, rest))
}

#[cfg(all(test, not(target_arch = "wasm32")))]
mod tests {
use crate::formatter::house_number::format_house_number;

#[test]
fn format_house_number_1番1号() {
let result = format_house_number("1-1");
assert!(result.is_ok());
assert_eq!(result.unwrap(), "1番1号");
}

#[test]
fn format_house_number_3番2号レジデンシャルマンション101号室() {
let result = format_house_number("3-2レジデンシャルマンション101号室");
assert!(result.is_ok());
assert_eq!(result.unwrap(), "3番2号レジデンシャルマンション101号室");
}
}

#[cfg(all(test, target_arch = "wasm32"))]
mod wasm_tests {
use crate::formatter::house_number::format_house_number;
use wasm_bindgen_test::{wasm_bindgen_test, wasm_bindgen_test_configure};

wasm_bindgen_test_configure!(run_in_browser);

#[wasm_bindgen_test]
fn format_house_number_success() {
let result = format_house_number("1-1");
assert!(result.is_ok());
assert_eq!(result.unwrap(), "1番1号");

let result = format_house_number("3-2レジデンシャルマンション101号室");
assert!(result.is_ok());
assert_eq!(result.unwrap(), "3番2号レジデンシャルマンション101号室");
}
}
2 changes: 2 additions & 0 deletions core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
//! ## Feature flags
//! - `blocking`: Provide method that works synchronously
//! - `city-name-correction`*(enabled by default)*: Enable autocorrection if ambiguous city name was typed
//! - `format-house-number`: Enable normalization of addresses after town name

#[cfg(all(target_family = "wasm", feature = "blocking"))]
compile_error! {
Expand All @@ -13,6 +14,7 @@ pub mod api;
mod domain;
#[deprecated(since = "0.1.6", note = "This module will be deleted in v0.2")]
pub mod entity;
mod formatter;
pub mod parser;
mod repository;
mod service;
Expand Down
1 change: 0 additions & 1 deletion core/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ use serde::Serialize;

pub(crate) mod adapter;
pub(crate) mod filter;
mod read_house_number;

impl<T> From<Tokenizer<T>> for Address {
fn from(value: Tokenizer<T>) -> Self {
Expand Down
68 changes: 0 additions & 68 deletions core/src/parser/read_house_number.rs

This file was deleted.

22 changes: 19 additions & 3 deletions core/src/tokenizer/read_town.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::marker::PhantomData;

use crate::formatter::house_number::format_house_number;
use crate::parser::adapter::orthographical_variant_adapter::{
OrthographicalVariantAdapter, OrthographicalVariants, Variant,
};
Expand All @@ -24,7 +25,12 @@ impl Tokenizer<CityNameFound> {
prefecture_name: self.prefecture_name.clone(),
city_name: self.city_name.clone(),
town_name: Some(town_name),
rest,
rest: if cfg!(feature = "format-house-number") && format_house_number(&rest).is_ok()
{
format_house_number(&rest).unwrap()
} else {
rest
},
_state: PhantomData::<TownNameFound>,
});
}
Expand All @@ -36,7 +42,12 @@ impl Tokenizer<CityNameFound> {
prefecture_name: self.prefecture_name.clone(),
city_name: self.city_name.clone(),
town_name: Some(town_name),
rest,
rest: if cfg!(feature = "format-house-number") && format_house_number(&rest).is_ok()
{
format_house_number(&rest).unwrap()
} else {
rest
},
_state: PhantomData::<TownNameFound>,
});
}
Expand All @@ -47,7 +58,12 @@ impl Tokenizer<CityNameFound> {
prefecture_name: self.prefecture_name.clone(),
city_name: self.city_name.clone(),
town_name: Some(town_name),
rest,
rest: if cfg!(feature = "format-house-number") && format_house_number(&rest).is_ok()
{
format_house_number(&rest).unwrap()
} else {
rest
},
_state: PhantomData::<TownNameFound>,
});
}
Expand Down
68 changes: 68 additions & 0 deletions public/debug.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
<!DOCTYPE html>
<html lang="ja">
<head>
<meta charset="UTF-8">
<title>Demo | japanese-address-parser</title>
<link rel="stylesheet" href="./style.css" type="text/css">
</head>
<body>
<div class="ribbon">
<span class="ribbon-label">Debug</span>
</div>
<h2>YuukiToriyama/japanese-address-parser</h2>
<p>Rust製の住所パーサーです</p>

<h3>住所を入力してください</h3>
<div class="input">
<input class="address" id="input" type="text" placeholder="例) 東京都中央区日本橋一丁目1-1"/>
<button class="button" id="exec">パースを実行</button>
</div>

<h3>処理結果</h3>
<table class="output">
<thead>
<tr>
<th>入力値</th>
<th>ステータス</th>
<th>address.prefecture</th>
<th>address.city</th>
<th>address.town</th>
<th>address.rest</th>
<th>JSON</th>
</tr>
</thead>
<tbody id="result">
<tr>
<td><p>東京都中央区日本橋一丁目1-1</p></td>
<td><p>成功</p></td>
<td><p>東京都</p></td>
<td><p>中央区</p></td>
<td><p>日本橋一丁目</p></td>
<td><p>1-1</p></td>
<td><code>{"address":{"prefecture":"東京都","city":"中央区","town":"日本橋一丁目","rest":"1-1"}}</code>
</td>
</tr>
</tbody>
</table>
<script src="table_util.js"></script>
<script type="module">
import init, {Parser} from "../pkg/japanese_address_parser_debug.js"

const inputTextArea = document.getElementById("input")

init().then(() => {
document.getElementById("exec").addEventListener("click", () => {
const input = inputTextArea.value
alert("input: " + input)
const parser = new Parser()
parser.parse(input).then(result => {
document.getElementById("result").appendChild(
createRow(input, result)
)
})
})
})
</script>

</body>
</html>
20 changes: 19 additions & 1 deletion public/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,25 @@ <h3>処理結果</h3>
</tr>
</tbody>
</table>
<script type="module" src="./main.js"></script>
<script src="table_util.js"></script>
<script type="module">
import init, {Parser} from "../pkg/japanese_address_parser.js"

const inputTextArea = document.getElementById("input")

init().then(() => {
document.getElementById("exec").addEventListener("click", () => {
const input = inputTextArea.value
alert("input: " + input)
const parser = new Parser()
parser.parse(input).then(result => {
document.getElementById("result").appendChild(
createRow(input, result)
)
})
})
})
</script>

</body>
</html>
Loading

0 comments on commit 5d7162a

Please sign in to comment.