From 5853b80a3a8c36c2a6c06e6be2a600d460d493c0 Mon Sep 17 00:00:00 2001 From: Paul Delafosse Date: Fri, 27 Oct 2023 16:40:28 +0200 Subject: [PATCH] fix: embbed tagger assets in binary --- .github/workflows/ci.yaml | 2 +- crates/libs/tagger/src/tagger/address.rs | 1 + crates/libs/tagger/src/tagger/brand.rs | 19 ++----- crates/libs/tagger/src/tagger/category.rs | 22 +++----- crates/libs/tagger/src/tagger/location.rs | 62 +++++++++-------------- crates/libs/tagger/src/tagger/mod.rs | 1 + 6 files changed, 40 insertions(+), 67 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a18b43090..88fd3ba39 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -51,7 +51,7 @@ jobs: MIMIR__ELASTICSEARCH__URL: 'http://es1:9200' with: command: llvm-cov - args: --workspace --lcov --output-path lcov.info -- --nocapture + args: --workspace --lcov --output-path lcov.info - name: Upload coverage to Codecov uses: codecov/codecov-action@v3 diff --git a/crates/libs/tagger/src/tagger/address.rs b/crates/libs/tagger/src/tagger/address.rs index 05c68adc2..68d018b0d 100644 --- a/crates/libs/tagger/src/tagger/address.rs +++ b/crates/libs/tagger/src/tagger/address.rs @@ -42,6 +42,7 @@ pub enum AddressTag { } impl Tagger for AddressTagger { + const ASSETS: &'static str = ""; type Output = Result, TaggerError>; fn tag(&self, input: &str, _: Option) -> Self::Output { let mut tag = None; diff --git a/crates/libs/tagger/src/tagger/brand.rs b/crates/libs/tagger/src/tagger/brand.rs index 6443a482a..cc51be719 100644 --- a/crates/libs/tagger/src/tagger/brand.rs +++ b/crates/libs/tagger/src/tagger/brand.rs @@ -1,18 +1,12 @@ -use crate::tagger::{Tagger, TaggerAutocomplete}; -use crate::ASSETS_PATH; use bk_tree::BKTree; use once_cell::sync::Lazy; use serde::Deserialize; -use std::fs; -use std::path::PathBuf; use trie_rs::{Trie, TrieBuilder}; +use crate::tagger::{Tagger, TaggerAutocomplete}; + pub static BRAND_AUTOCOMPLETE_TAGGER: Lazy = Lazy::new(|| { - let assets = - ASSETS_PATH.get_or_init(|| PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("assets")); - let path = assets.join("brand.json"); - let brands = fs::read(path).expect("brands data"); - let brands: Vec = serde_json::from_slice(&brands).expect("json brand data"); + let brands: Vec = serde_json::from_str(BrandTagger::ASSETS).expect("json brand data"); let mut tree = TrieBuilder::new(); brands @@ -42,11 +36,7 @@ impl TaggerAutocomplete for BrandAutocompleteTagger { } pub static BRAND_TAGGER: Lazy = Lazy::new(|| { - let assets = - ASSETS_PATH.get_or_init(|| PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("assets")); - let path = assets.join("brand.json"); - let brands = fs::read(path).expect("brands data"); - let brands: Vec = serde_json::from_slice(&brands).expect("json brand data"); + let brands: Vec = serde_json::from_str(BrandTagger::ASSETS).expect("json brand data"); let mut tree = BKTree::default(); brands @@ -72,6 +62,7 @@ pub struct BrandTagger { } impl Tagger for BrandTagger { + const ASSETS: &'static str = include_str!("../../assets/brand.json"); type Output = bool; fn tag(&self, input: &str, tolerance: Option) -> bool { self.inner diff --git a/crates/libs/tagger/src/tagger/category.rs b/crates/libs/tagger/src/tagger/category.rs index 94df10c4a..322a95490 100644 --- a/crates/libs/tagger/src/tagger/category.rs +++ b/crates/libs/tagger/src/tagger/category.rs @@ -1,20 +1,15 @@ -use crate::tagger::{Tagger, TaggerAutocomplete}; -use crate::ASSETS_PATH; +use std::collections::HashMap; + use bk_tree::BKTree; use once_cell::sync::Lazy; use serde::Deserialize; -use std::collections::HashMap; -use std::fs; -use std::path::PathBuf; use trie_rs::{Trie, TrieBuilder}; +use crate::tagger::{Tagger, TaggerAutocomplete}; + pub static CATEGORY_AUTOCOMPLETE_TAGGER: Lazy = Lazy::new(|| { - let assets = - ASSETS_PATH.get_or_init(|| PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("assets")); - let path = assets.join("categories.json"); - let categories = fs::read(path).expect("category data"); let categories: Vec = - serde_json::from_slice(&categories).expect("json category data"); + serde_json::from_str(CategoryTagger::ASSETS).expect("json category data"); let mut keywords = TrieBuilder::new(); let mut category_map = HashMap::new(); for category in categories.into_iter() { @@ -60,12 +55,8 @@ impl TaggerAutocomplete for CategoryAutocompleteTagger { } pub static CATEGORY_TAGGER: Lazy = Lazy::new(|| { - let assets = - ASSETS_PATH.get_or_init(|| PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("assets")); - let path = assets.join("categories.json"); - let categories = fs::read(path).expect("category data"); let categories: Vec = - serde_json::from_slice(&categories).expect("json category data"); + serde_json::from_str(CategoryTagger::ASSETS).expect("json category data"); let mut keywords = BKTree::default(); let mut category_map = HashMap::new(); for category in categories.into_iter() { @@ -104,6 +95,7 @@ pub struct CategoryTagger { } impl Tagger for CategoryTagger { + const ASSETS: &'static str = include_str!("../../assets/categories.json"); type Output = Option; fn tag(&self, input: &str, tolerance: Option) -> Self::Output { diff --git a/crates/libs/tagger/src/tagger/location.rs b/crates/libs/tagger/src/tagger/location.rs index fa9ab051f..cb23fecd4 100644 --- a/crates/libs/tagger/src/tagger/location.rs +++ b/crates/libs/tagger/src/tagger/location.rs @@ -1,18 +1,13 @@ -use crate::tagger::Tagger; -use crate::tokens::normalize_diacritics; -use crate::ASSETS_PATH; use bk_tree::BKTree; use once_cell::sync::Lazy; use serde::Deserialize; -use std::fs; -use std::path::PathBuf; + +use crate::tagger::Tagger; +use crate::tokens::normalize_diacritics; pub static COUNTRY_TAGGER: Lazy = Lazy::new(|| { - let assets = - ASSETS_PATH.get_or_init(|| PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("assets")); - let path = assets.join("countries.json"); - let locations = fs::read(path).expect("countries data"); - let locations: Countries = serde_json::from_slice(&locations).expect("valid json locations"); + let locations: Countries = + serde_json::from_str(CountryTagger::ASSETS).expect("valid json locations"); let mut tree = BKTree::default(); @@ -26,11 +21,8 @@ pub static COUNTRY_TAGGER: Lazy = Lazy::new(|| { }); pub static STATE_TAGGER: Lazy = Lazy::new(|| { - let assets = - ASSETS_PATH.get_or_init(|| PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("assets")); - let path = assets.join("states.json"); - let locations = fs::read(path).expect("state data"); - let locations: States = serde_json::from_slice(&locations).expect("valid json locations"); + let locations: States = + serde_json::from_str(StateTagger::ASSETS).expect("valid json locations"); let mut tree = BKTree::default(); @@ -44,11 +36,8 @@ pub static STATE_TAGGER: Lazy = Lazy::new(|| { }); pub static DISTRICT_TAGGER: Lazy = Lazy::new(|| { - let assets = - ASSETS_PATH.get_or_init(|| PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("assets")); - let path = assets.join("districts.json"); - let locations = fs::read(path).expect("district data"); - let locations: Districts = serde_json::from_slice(&locations).expect("valid json locations"); + let locations: Districts = + serde_json::from_str(DistrictTagger::ASSETS).expect("valid json locations"); let mut tree = BKTree::default(); @@ -62,13 +51,7 @@ pub static DISTRICT_TAGGER: Lazy = Lazy::new(|| { }); pub static CITY_TAGGER: Lazy = Lazy::new(|| { - let assets = - ASSETS_PATH.get_or_init(|| PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("assets")); - println!("LES ASSETS"); - println!("{:?}", assets); - let path = assets.join("cities.json"); - let locations = fs::read(path).expect("cities data"); - let locations: Cities = serde_json::from_slice(&locations).expect("valid json locations"); + let locations: Cities = serde_json::from_str(CityTagger::ASSETS).expect("valid json locations"); let mut tree = BKTree::default(); @@ -82,12 +65,8 @@ pub static CITY_TAGGER: Lazy = Lazy::new(|| { }); pub static CITY_DISTRICT_TAGGER: Lazy = Lazy::new(|| { - let assets = - ASSETS_PATH.get_or_init(|| PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("assets")); - let path = assets.join("cities_districts.json"); - let locations = fs::read(path).expect("city district data"); let locations: CitiesDistricts = - serde_json::from_slice(&locations).expect("valid json locations"); + serde_json::from_str(CityDistrictTagger::ASSETS).expect("valid json locations"); let mut tree = BKTree::default(); @@ -104,11 +83,8 @@ pub static CITY_DISTRICT_TAGGER: Lazy = Lazy::new(|| { }); pub static SUBURBS_TAGGER: Lazy = Lazy::new(|| { - let assets = - ASSETS_PATH.get_or_init(|| PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("assets")); - let path = assets.join("suburbs.json"); - let locations = fs::read(path).expect("suburb data"); - let locations: Suburbs = serde_json::from_slice(&locations).expect("valid json locations"); + let locations: Suburbs = + serde_json::from_str(SuburbTagger::ASSETS).expect("valid json locations"); let mut tree = BKTree::default(); @@ -182,6 +158,8 @@ pub struct CountryTagger { } impl Tagger for CityTagger { + const ASSETS: &'static str = include_str!("../../assets/cities.json"); + type Output = bool; fn tag(&self, input: &str, tolerance: Option) -> bool { self.inner @@ -192,6 +170,8 @@ impl Tagger for CityTagger { } impl Tagger for StateTagger { + const ASSETS: &'static str = include_str!("../../assets/states.json"); + type Output = bool; fn tag(&self, input: &str, tolerance: Option) -> bool { self.inner @@ -202,6 +182,8 @@ impl Tagger for StateTagger { } impl Tagger for DistrictTagger { + const ASSETS: &'static str = include_str!("../../assets/districts.json"); + type Output = bool; fn tag(&self, input: &str, tolerance: Option) -> bool { self.inner @@ -212,6 +194,8 @@ impl Tagger for DistrictTagger { } impl Tagger for CityDistrictTagger { + const ASSETS: &'static str = include_str!("../../assets/cities_districts.json"); + type Output = bool; fn tag(&self, input: &str, tolerance: Option) -> bool { self.inner @@ -222,6 +206,8 @@ impl Tagger for CityDistrictTagger { } impl Tagger for SuburbTagger { + const ASSETS: &'static str = include_str!("../../assets/suburbs.json"); + type Output = bool; fn tag(&self, input: &str, tolerance: Option) -> bool { self.inner @@ -232,6 +218,8 @@ impl Tagger for SuburbTagger { } impl Tagger for CountryTagger { + const ASSETS: &'static str = include_str!("../../assets/countries.json"); + type Output = bool; fn tag(&self, input: &str, tolerance: Option) -> bool { self.inner diff --git a/crates/libs/tagger/src/tagger/mod.rs b/crates/libs/tagger/src/tagger/mod.rs index 48859a51d..18efbc6e0 100644 --- a/crates/libs/tagger/src/tagger/mod.rs +++ b/crates/libs/tagger/src/tagger/mod.rs @@ -17,6 +17,7 @@ pub trait TaggerAutocomplete { /// Utility trait to implement tagging logic, not that the Output type can /// be anything if additional info needs to be conveyed. pub trait Tagger { + const ASSETS: &'static str; type Output; /// Apply implementor tagging with the given levenshtein distance. fn tag(&self, input: &str, tolerance: Option) -> Self::Output;