diff --git a/.github/workflows/github-ci.yml b/.github/workflows/github-ci.yml
index 374a4945..c8624c30 100644
--- a/.github/workflows/github-ci.yml
+++ b/.github/workflows/github-ci.yml
@@ -8,20 +8,20 @@ on:
workflow_dispatch:
jobs:
- build-nugets:
- name: 🚀 Pack ${{matrix.projectName}}
- runs-on: ubuntu-latest
+ build-native:
+ name: 💾 Build ${{matrix.task.title}} for ${{matrix.task.os}} (${{matrix.task.arch}})
+ runs-on: ${{matrix.task.os}}
strategy:
fail-fast: false
matrix:
- projectName:
- - LanguageIdentification.CLD2
- - LanguageIdentification.CLD3
- - LanguageIdentification.FastText
- - LanguageIdentification.Whatlang
- - LanguageIdentification.MediaPipe
- - LanguageIdentification.Lingua
- - LanguageCode
+ task:
+ - { title: FastText, projectName: LanguageIdentification.FastText.Native, os: windows-latest, arch: x86_64, script: ./Scripts/run-build.ps1, artifact: fasttext.dll }
+ - { title: FastText, projectName: LanguageIdentification.FastText.Native, os: macos-13, arch: x86_64, script: ./Scripts/run-build.macos.sh, artifact: libfasttext.x86_64.dylib }
+ - { title: FastText, projectName: LanguageIdentification.FastText.Native, os: macos-14, arch: arm64, script: ./Scripts/run-build.macos.sh, artifact: libfasttext.arm64.dylib }
+ - { title: CLD2, projectName: LanguageIdentification.CLD2.Native, os: macos-13, arch: x86_64, script: ./Scripts/run-build.macos.sh, artifact: libcld2.x86_64.dylib }
+ - { title: CLD2, projectName: LanguageIdentification.CLD2.Native, os: macos-14, arch: arm64, script: ./Scripts/run-build.macos.sh, artifact: libcld2.arm64.dylib }
+ - { title: CLD3, projectName: LanguageIdentification.CLD3.Native, os: macos-13, arch: x86_64, script: ./Scripts/run-build.macos.sh, artifact: libcld3.x86_64.dylib }
+ - { title: CLD3, projectName: LanguageIdentification.CLD3.Native, os: macos-14, arch: arm64, script: ./Scripts/run-build.macos.sh, artifact: libcld3.arm64.dylib }
permissions:
contents: read
packages: write
@@ -38,14 +38,79 @@ jobs:
with:
filters: |
src:
- - '${{env.ROOT}}/${{matrix.projectName}}/**'
+ - '${{env.ROOT}}/${{matrix.task.projectName}}/**'
+
+ - uses: dorny/paths-filter@v3
+ id: changes-native
+ with:
+ filters: |
+ src:
+ - '${{env.ROOT}}/${{matrix.task.projectName}}.Native/**'
+
+ - name: 🚀 Build ${{matrix.task.projectName}}
+ if: steps.changes.outputs.src == 'true'
+ working-directory: ${{env.ROOT}}/${{matrix.task.projectName}}
+ run: |
+ echo $(pwd)
+ git update-index --chmod=+x ${{matrix.task.script}}
+ chmod +x ${{matrix.task.script}}
+ ${{matrix.task.script}} ${{matrix.task.arch}}
+
+ - uses: actions/upload-artifact@v4
+ if: steps.changes.outputs.src == 'true'
+ with:
+ name: native-${{matrix.task.projectName}}-${{matrix.task.os}}
+ path: ${{env.ROOT}}/${{matrix.task.projectName}}/${{matrix.task.artifact}}
+ retention-days: 1
+ overwrite: 'true'
+ compression-level: 0
+ if-no-files-found: 'error'
+
+ build-package:
+ name: 🚀 Pack ${{matrix.task.title}}
+ runs-on: ${{matrix.task.os}}
+ needs: build-native
+ strategy:
+ fail-fast: false
+ matrix:
+ task:
+ - { title: CLD2, projectName: LanguageIdentification.CLD2, os: ubuntu-latest }
+ - { title: CLD3, projectName: LanguageIdentification.CLD3, os: ubuntu-latest }
+ - { title: FastText, projectName: LanguageIdentification.FastText, os: ubuntu-latest }
+ - { title: Whatlang, projectName: LanguageIdentification.Whatlang, os: ubuntu-latest }
+ - { title: MediaPipe, projectName: LanguageIdentification.MediaPipe, os: ubuntu-latest }
+ - { title: Lingua, projectName: LanguageIdentification.Lingua, os: ubuntu-latest }
+ - { title: LanguageCode, projectName: LanguageCode, os: ubuntu-latest }
+ permissions:
+ contents: read
+ packages: write
+ env:
+ ROOT: ./src
+ NUGET_AUTH_TOKEN: ${{secrets.GITHUB_TOKEN}}
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ submodules: 'true'
+
+ - uses: actions/download-artifact@v4
+ with:
+ pattern: native-${{matrix.task.projectName}}.Native-*
+ merge-multiple: true
+ path: ${{env.ROOT}}/${{matrix.task.projectName}}.Native/
+
+ - uses: dorny/paths-filter@v3
+ id: changes
+ with:
+ filters: |
+ src:
+ - '${{env.ROOT}}/${{matrix.task.projectName}}/**'
- uses: dorny/paths-filter@v3
id: changes-native
with:
filters: |
src:
- - '${{env.ROOT}}/${{matrix.projectName}}.Native/**'
+ - '${{env.ROOT}}/${{matrix.task.projectName}}.Native/**'
- name: 📂 Setup .NET Core
uses: actions/setup-dotnet@v4
@@ -57,21 +122,21 @@ jobs:
8.0.x
source-url: https://nuget.pkg.github.com/${{github.repository_owner}}/index.json
- - name: 🚀 Pack ${{matrix.projectName}}.Native
+ - name: 🚀 Pack ${{matrix.task.projectName}}.Native
if: steps.changes-native.outputs.src == 'true'
- working-directory: ${{env.ROOT}}/${{matrix.projectName}}.Native
+ working-directory: ${{env.ROOT}}/${{matrix.task.projectName}}.Native
run: dotnet pack -c Release -o out
- - name: 🚀 Pack ${{matrix.projectName}}
+ - name: 🚀 Pack ${{matrix.task.projectName}}
if: steps.changes.outputs.src == 'true'
- working-directory: ${{env.ROOT}}/${{matrix.projectName}}
+ working-directory: ${{env.ROOT}}/${{matrix.task.projectName}}
run: dotnet pack -c Release -o out
- uses: actions/upload-artifact@v4
if: steps.changes-native.outputs.src == 'true'
with:
- name: build-${{matrix.projectName}}.Native
- path: ${{env.ROOT}}/${{matrix.projectName}}.Native/out
+ name: build-${{matrix.task.projectName}}.Native
+ path: ${{env.ROOT}}/${{matrix.task.projectName}}.Native/out
retention-days: 1
overwrite: 'true'
compression-level: 0
@@ -79,16 +144,29 @@ jobs:
- uses: actions/upload-artifact@v4
if: steps.changes.outputs.src == 'true'
with:
- name: build-${{matrix.projectName}}
- path: ${{env.ROOT}}/${{matrix.projectName}}/out
+ name: build-${{matrix.task.projectName}}
+ path: ${{env.ROOT}}/${{matrix.task.projectName}}/out
retention-days: 1
overwrite: 'true'
compression-level: 0
- test-nugets:
- name: 🧪 Test ${{matrix.projectName}}
- runs-on: ubuntu-latest
- needs: build-nugets
+ test-package:
+ name: 🧪 Test on ${{matrix.task.os}}
+ runs-on: ${{matrix.task.os}}
+ needs: build-package
+ strategy:
+ fail-fast: false
+ # https://docs.github.com/en/actions/writing-workflows/workflow-syntax-for-github-actions#standard-github-hosted-runners-for--private-repositories
+ matrix:
+ task:
+ - { os: ubuntu-24.04 }
+ - { os: ubuntu-22.04 }
+ # - { os: ubuntu-20.04 }
+ - { os: windows-2022 }
+ - { os: windows-2019 }
+ - { os: macos-13 }
+ - { os: macos-14 }
+ - { os: macos-15 }
permissions:
contents: read
packages: write
@@ -100,36 +178,48 @@ jobs:
- name: 📂 Files
working-directory: ${{env.ROOT}}
- run: mkdir -p local-nugets
+ run: mkdir -p local-packages
- uses: actions/download-artifact@v4
with:
pattern: build-*
merge-multiple: true
- path: ${{env.ROOT}}/local-nugets
+ path: ${{env.ROOT}}/local-packages
- name: 📂 Files
- working-directory: ${{env.ROOT}}
+ working-directory: ${{env.ROOT}}/local-packages
run: ls -R
- - name: 📂 Build Docker Image
- working-directory: ${{env.ROOT}}
+ - name: 📂 Use local NuGet (Linux)
+ if: ${{ startsWith(matrix.task.os, 'ubuntu') }}
run: |
- docker build -f ./test.Dockerfile -t langunage-identification-test-runner:latest .
- docker build -f ./test-ci.Dockerfile -t langunage-identification-test-runner-ci:latest .
+ path=$(realpath "${{env.ROOT}}/local-packages")
+ dotnet nuget add source $path
- - name: 🧪 Run Tests in Docker
- working-directory: ${{env.ROOT}}
- run: docker run --rm -v $(pwd):/src langunage-identification-test-runner-ci:latest
+ - name: 📂 Use local NuGet (Windows)
+ if: ${{ startsWith(matrix.task.os, 'windows') }}
+ run: |
+ $path = [System.IO.Path]::GetFullPath('${{env.ROOT}}/local-packages')
+ dotnet nuget add source $path
- - name: 🧪 Run Test for LanguageCode
- working-directory: ${{env.ROOT}}/LanguageCode.Tests
+ - name: 📂 Use local NuGet (OSX)
+ if: ${{ startsWith(matrix.task.os, 'macos') }}
+ run: |
+ path=$(realpath "${{env.ROOT}}/local-packages")
+ dotnet nuget add source $path
+
+ - name: 🧪 Run Tests for LanguageIdentification
+ working-directory: ${{env.ROOT}}/LanguageIdentification.Tests
run: dotnet test -c CI
- deploy-nugets:
+ - name: 🧪 Run Tests for LanguageCode
+ working-directory: ${{env.ROOT}}/LanguageCode.Tests
+ run: dotnet test
+
+ deploy-package:
name: 🚚 Push ${{matrix.projectName}}
runs-on: ubuntu-latest
- needs: test-nugets
+ needs: test-package
if: github.ref == 'refs/heads/master'
strategy:
fail-fast: false
diff --git a/README.md b/README.md
index c9cb57d3..73e52774 100644
--- a/README.md
+++ b/README.md
@@ -89,14 +89,14 @@ functionality into their applications.
## Platform support
-| Model | Linux | Windows | macOS | Blazor WASM |
-| :-------- | :----------------: | :--------------: | :----: | :------------: |
-| CLD2 | :white_check_mark: | :construction: | :x: | :x: |
-| CLD3 | :white_check_mark: | :construction: | :x: | :x: |
-| FastText | :white_check_mark: | :construction: | :x: | :x: |
-| Whatlang | :white_check_mark: | :construction: | :x: | :x: |
-| MediaPipe | :white_check_mark: | :construction: | :x: | :x: |
-| Lingua | :white_check_mark: | :construction: | :x: | :x: |
+| Model | Linux | Windows | macOS | Blazor WASM |
+| :-------- | :----------------: | :----------------: | :----------------: | :------------: |
+| CLD2 | :white_check_mark: | :white_check_mark: | :white_check_mark: | :x: |
+| CLD3 | :white_check_mark: | :white_check_mark: | :construction: | :x: |
+| FastText | :white_check_mark: | :white_check_mark: | :white_check_mark: | :x: |
+| Whatlang | :white_check_mark: | :construction: | :construction: | :x: |
+| MediaPipe | :white_check_mark: | :construction: | :construction: | :x: |
+| Lingua | :white_check_mark: | :construction: | :construction: | :x: |
:white_check_mark: — Full support |
:x: — No support |
diff --git a/README_CLD2.md b/README_CLD2.md
index 3a1b62df..785c614d 100644
--- a/README_CLD2.md
+++ b/README_CLD2.md
@@ -5,8 +5,8 @@ Welcome to **Panlingo.LanguageIdentification.CLD2**, a .NET wrapper for the Chro
## Requirements
- Runtime: **.NET >= 5.0**
-- OS: **Linux**
-- Arch: **AMD64**
+- OS: **Linux (Ubuntu, Debian)**, **Windows 10+** or **Windows Server 2019+**, **macOS**
+- Arch: **AMD64** (or **ARM** for macOS)
## Installation
diff --git a/README_CLD3.md b/README_CLD3.md
index ed827ed8..ddfd5085 100644
--- a/README_CLD3.md
+++ b/README_CLD3.md
@@ -5,7 +5,7 @@ Welcome to **Panlingo.LanguageIdentification.CLD3**, a .NET wrapper for the Chro
## Requirements
- Runtime: **.NET >= 5.0**
-- OS: **Linux**
+- OS: **Linux (Ubuntu, Debian)**, **Windows 10+** or **Windows Server 2019+**
- Arch: **AMD64**
## Installation
diff --git a/README_FASTTEXT.md b/README_FASTTEXT.md
index bb37021c..2df9b77a 100644
--- a/README_FASTTEXT.md
+++ b/README_FASTTEXT.md
@@ -5,8 +5,8 @@ Welcome to **Panlingo.LanguageIdentification.FastText**, a .NET wrapper for the
## Requirements
- Runtime: **.NET >= 5.0**
-- OS: **Linux**
-- Arch: **AMD64**
+- OS: **Linux (Ubuntu, Debian)**, **Windows 10+** or **Windows Server 2019+**, **macOS**
+- Arch: **AMD64** (or **ARM** for macOS)
## Installation
diff --git a/src/LanguageCode.Tests/LanguageCode.Tests.csproj b/src/LanguageCode.Tests/LanguageCode.Tests.csproj
index 205df9fa..310f39d6 100644
--- a/src/LanguageCode.Tests/LanguageCode.Tests.csproj
+++ b/src/LanguageCode.Tests/LanguageCode.Tests.csproj
@@ -7,6 +7,10 @@
false
true
+
+ Panlingo.LanguageCode.Tests
+
+ Debug;Release;CI
diff --git a/src/LanguageCode.Tests/LanguageCodeTests.cs b/src/LanguageCode.Tests/LanguageCodeTests.cs
index 1e0a88a9..b03f19c1 100644
--- a/src/LanguageCode.Tests/LanguageCodeTests.cs
+++ b/src/LanguageCode.Tests/LanguageCodeTests.cs
@@ -1,7 +1,6 @@
-using Panlingo.LanguageCode;
-using Panlingo.LanguageCode.Models;
+using Panlingo.LanguageCode.Models;
-namespace LanguageCode.Tests
+namespace Panlingo.LanguageCode.Tests
{
public class LanguageCodeTests
{
diff --git a/src/LanguageCode/LanguageCode.csproj b/src/LanguageCode/LanguageCode.csproj
index 8813ba77..15927afe 100644
--- a/src/LanguageCode/LanguageCode.csproj
+++ b/src/LanguageCode/LanguageCode.csproj
@@ -2,7 +2,7 @@
netstandard2.1;net5.0;net6.0;net7.0;net8.0
- 0.0.0.21
+ 0.0.0.23
Panlingo.LanguageCode
Panlingo.LanguageCode
Panlingo.LanguageCode
diff --git a/src/LanguageIdentification.CLD2.ConsoleTest/Program.cs b/src/LanguageIdentification.CLD2.ConsoleTest/Program.cs
index b94ab372..cb3672c6 100644
--- a/src/LanguageIdentification.CLD2.ConsoleTest/Program.cs
+++ b/src/LanguageIdentification.CLD2.ConsoleTest/Program.cs
@@ -6,7 +6,7 @@ static void Main(string[] args)
{
using var cld2 = new CLD2Detector();
- var text = "Hello, how are you? Привіт, як справи? Привет, как дела?";
+ var text = "Привіт, як справи?";
var predictions = cld2.PredictLanguage(text);
diff --git a/src/LanguageIdentification.CLD2.ConsoleTest/Properties/launchSettings.json b/src/LanguageIdentification.CLD2.ConsoleTest/Properties/launchSettings.json
index 65b8965a..1f434ec6 100644
--- a/src/LanguageIdentification.CLD2.ConsoleTest/Properties/launchSettings.json
+++ b/src/LanguageIdentification.CLD2.ConsoleTest/Properties/launchSettings.json
@@ -2,6 +2,13 @@
"profiles": {
"Docker": {
"commandName": "Docker"
+ },
+ "WSL": {
+ "commandName": "WSL2",
+ "distributionName": ""
+ },
+ "Project": {
+ "commandName": "Project"
}
}
}
\ No newline at end of file
diff --git a/src/LanguageIdentification.CLD2.Native/.gitignore b/src/LanguageIdentification.CLD2.Native/.gitignore
index 162500b1..585d5a3f 100644
--- a/src/LanguageIdentification.CLD2.Native/.gitignore
+++ b/src/LanguageIdentification.CLD2.Native/.gitignore
@@ -1,2 +1,3 @@
libcld2.so
+libcld2.dll
build_temp/**
diff --git a/src/LanguageIdentification.CLD2.Native/CLD2NativeLibrary.cs b/src/LanguageIdentification.CLD2.Native/CLD2NativeLibrary.cs
index 5b952e65..7afcdeba 100644
--- a/src/LanguageIdentification.CLD2.Native/CLD2NativeLibrary.cs
+++ b/src/LanguageIdentification.CLD2.Native/CLD2NativeLibrary.cs
@@ -5,6 +5,6 @@ public static class CLD2NativeLibrary
///
/// Name of native binary
///
- public const string Name = "libcld2.so";
+ public const string Name = "libcld2";
}
}
diff --git a/src/LanguageIdentification.CLD2.Native/Dockerfile b/src/LanguageIdentification.CLD2.Native/Dockerfile
index 3e7e769a..e0d63a0e 100644
--- a/src/LanguageIdentification.CLD2.Native/Dockerfile
+++ b/src/LanguageIdentification.CLD2.Native/Dockerfile
@@ -1,5 +1,4 @@
-FROM mcr.microsoft.com/dotnet/sdk:8.0 AS build
-ARG BUILD_CONFIGURATION=Release
+FROM debian:bullseye-slim AS build
WORKDIR /repo
COPY ["src/LanguageIdentification.CLD2.Native/LanguageIdentification.CLD2.Native.csproj", "src/LanguageIdentification.CLD2.Native/"]
diff --git a/src/LanguageIdentification.CLD2.Native/LanguageIdentification.CLD2.Native.csproj b/src/LanguageIdentification.CLD2.Native/LanguageIdentification.CLD2.Native.csproj
index fdec2604..1bdd177a 100644
--- a/src/LanguageIdentification.CLD2.Native/LanguageIdentification.CLD2.Native.csproj
+++ b/src/LanguageIdentification.CLD2.Native/LanguageIdentification.CLD2.Native.csproj
@@ -2,7 +2,7 @@
netstandard2.1
- 0.0.0.21
+ 0.1.0
Panlingo.LanguageIdentification.CLD2.Native
Panlingo.LanguageIdentification.CLD2.Native
Panlingo.LanguageIdentification.CLD2.Native
@@ -62,6 +62,36 @@
+
+
+ PreserveNewest
+ true
+ runtimes/win-x64/native
+ true
+ false
+
+
+
+
+
+ PreserveNewest
+ true
+ runtimes/osx-x64/native/libcld2.dylib
+ true
+ false
+
+
+
+
+
+ PreserveNewest
+ true
+ runtimes/osx-arm64/native/libcld2.dylib
+ true
+ false
+
+
+
diff --git a/src/LanguageIdentification.CLD2.Native/Native/CMakeLists.txt b/src/LanguageIdentification.CLD2.Native/Native/CMakeLists.txt
index 4d6a56b7..d416d77b 100644
--- a/src/LanguageIdentification.CLD2.Native/Native/CMakeLists.txt
+++ b/src/LanguageIdentification.CLD2.Native/Native/CMakeLists.txt
@@ -1,21 +1,48 @@
-cmake_minimum_required(VERSION 2.8 FATAL_ERROR)
-project(cld2_bridge)
+cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
-if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
-endif()
+project(cld2)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_C_STANDARD 99)
+set(CMAKE_C_STANDARD_REQUIRED ON)
-if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
+if (CMAKE_CXX_COMPILER_ID MATCHES "GNU")
+ if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99 -pthread -funroll-loops -O3")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -pthread -funroll-loops -O3")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -lpthread")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
+ elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -O3")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static-libgcc -static-libstdc++")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,--start-group -lwinpthread -Wl,--end-group")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
+ endif()
+elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99 -pthread -funroll-loops -O3")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -pthread -funroll-loops -O3")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
-elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4244 /wd4267")
-endif()
+ if (CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -arch arm64 -target arm64-apple-macos11")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -arch arm64 -target arm64-apple-macos11")
+ elseif (CMAKE_OSX_ARCHITECTURES MATCHES "x86_64")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -arch x86_64 -target x86_64-apple-macos10.15")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -arch x86_64 -target x86_64-apple-macos10.15")
+ endif()
+endif()
set(CMAKE_MACOSX_RPATH 1)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+message(STATUS "System name: ${CMAKE_SYSTEM_NAME}")
+message(STATUS "CMake version: ${CMAKE_VERSION}")
+message(STATUS "C++ compiler: ${CMAKE_CXX_COMPILER_ID}")
+message(STATUS "C compiler: ${CMAKE_C_COMPILER_ID}")
+message(STATUS "CXX flags: ${CMAKE_CXX_FLAGS}")
+message(STATUS "C flags: ${CMAKE_C_FLAGS}")
+
include_directories(
${PROJECT_SOURCE_DIR}/cld2/internal
${PROJECT_SOURCE_DIR}/cld2/public
@@ -39,28 +66,28 @@ set(CLD2_SOURCES
${PROJECT_SOURCE_DIR}/cld2/internal/tote.cc
${PROJECT_SOURCE_DIR}/cld2/internal/utf8statetable.cc
${PROJECT_SOURCE_DIR}/cld2/internal/generated_distinct_bi_0.cc
-
- ### Chrome (less perfect predictions)
- # ${PROJECT_SOURCE_DIR}/cld2/internal/cld_generated_cjk_uni_prop_80.cc
- # ${PROJECT_SOURCE_DIR}/cld2/internal/cld2_generated_cjk_compatible.cc
+
+ ### Chrome (less perfect predictions)
+ # ${PROJECT_SOURCE_DIR}/cld2/internal/cld_generated_cjk_uni_prop_80.cc
+ # ${PROJECT_SOURCE_DIR}/cld2/internal/cld2_generated_cjk_compatible.cc
# ${PROJECT_SOURCE_DIR}/cld2/internal/cld_generated_cjk_delta_bi_4.cc
- # ${PROJECT_SOURCE_DIR}/cld2/internal/generated_distinct_bi_0.cc
+ # ${PROJECT_SOURCE_DIR}/cld2/internal/generated_distinct_bi_0.cc
# ${PROJECT_SOURCE_DIR}/cld2/internal/cld2_generated_quadchrome_2.cc
- # ${PROJECT_SOURCE_DIR}/cld2/internal/cld2_generated_deltaoctachrome.cc
+ # ${PROJECT_SOURCE_DIR}/cld2/internal/cld2_generated_deltaoctachrome.cc
# ${PROJECT_SOURCE_DIR}/cld2/internal/cld2_generated_distinctoctachrome.cc
- # ${PROJECT_SOURCE_DIR}/cld2/internal/cld_generated_score_quad_octa_2.cc
- ###
+ # ${PROJECT_SOURCE_DIR}/cld2/internal/cld_generated_score_quad_octa_2.cc
+ ###
- ### Full
- ${PROJECT_SOURCE_DIR}/cld2/internal/cld_generated_cjk_uni_prop_80.cc
- ${PROJECT_SOURCE_DIR}/cld2/internal/cld2_generated_cjk_compatible.cc
+ ### Full
+ ${PROJECT_SOURCE_DIR}/cld2/internal/cld_generated_cjk_uni_prop_80.cc
+ ${PROJECT_SOURCE_DIR}/cld2/internal/cld2_generated_cjk_compatible.cc
${PROJECT_SOURCE_DIR}/cld2/internal/cld_generated_cjk_delta_bi_32.cc
- ${PROJECT_SOURCE_DIR}/cld2/internal/generated_distinct_bi_0.cc
+ ${PROJECT_SOURCE_DIR}/cld2/internal/generated_distinct_bi_0.cc
${PROJECT_SOURCE_DIR}/cld2/internal/cld2_generated_quad0122.cc
- ${PROJECT_SOURCE_DIR}/cld2/internal/cld2_generated_deltaocta0122.cc
+ ${PROJECT_SOURCE_DIR}/cld2/internal/cld2_generated_deltaocta0122.cc
${PROJECT_SOURCE_DIR}/cld2/internal/cld2_generated_distinctocta0122.cc
- ${PROJECT_SOURCE_DIR}/cld2/internal/cld_generated_score_quad_octa_0122.cc
- ###
+ ${PROJECT_SOURCE_DIR}/cld2/internal/cld_generated_score_quad_octa_0122.cc
+ ###
${PROJECT_SOURCE_DIR}/cld2/internal/cld2_dynamic_compat.h
${PROJECT_SOURCE_DIR}/cld2/internal/cld2_dynamic_data_extractor.h
@@ -101,6 +128,25 @@ set(CLD2_SOURCES
add_library(objlib OBJECT ${CLD2_SOURCES})
-add_library(cld2 SHARED $)
+add_library(${PROJECT_NAME} SHARED $)
+
+set_target_properties(${PROJECT_NAME} PROPERTIES PUBLIC_HEADER "${PROJECT_SOURCE_DIR}/cld2/binding.h")
+
+if (APPLE)
+ if (CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
+ set(CLANG_LIB_DIR /opt/homebrew/opt/llvm/lib/c++)
+ elseif (CMAKE_OSX_ARCHITECTURES MATCHES "x86_64")
+ set(CLANG_LIB_DIR /usr/local/opt/llvm/lib/c++)
+ endif()
-set_target_properties(cld2 PROPERTIES PUBLIC_HEADER "${PROJECT_SOURCE_DIR}/cld2/binding.h")
\ No newline at end of file
+ target_link_libraries(${PROJECT_NAME}
+ -nostdlib++
+ -Wl,${CLANG_LIB_DIR}/libc++.a
+ -Wl,${CLANG_LIB_DIR}/libc++abi.a)
+else()
+ if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ # nop
+ elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows")
+ target_link_libraries(${PROJECT_NAME} -static-libgcc -static-libstdc++)
+ endif()
+endif()
\ No newline at end of file
diff --git a/src/LanguageIdentification.CLD2.Native/Native/binding.cc b/src/LanguageIdentification.CLD2.Native/Native/binding.cc
index 43207036..ee3bafe5 100644
--- a/src/LanguageIdentification.CLD2.Native/Native/binding.cc
+++ b/src/LanguageIdentification.CLD2.Native/Native/binding.cc
@@ -9,7 +9,7 @@
extern "C"
{
- PredictionResult* PredictLanguage(char *text, int* resultCount)
+ EXPORT PredictionResult* PredictLanguage(char *text, int* resultCount)
{
int textLength = strlen(text);
@@ -62,7 +62,7 @@ extern "C"
for (int i = 0; i < predictionCount; ++i) {
CLD2::Language language = languages[i];
- double probability = scoreTotal > 0 ? scores[i] / (double)scoreTotal : 1.0;
+ double probability = scoreTotal > 0 ? scores[i] / (double)scoreTotal : 0;
double proportion = percents[i] / 100.0;
result[i].language = strdup(CLD2::LanguageCode(language));
@@ -75,7 +75,7 @@ extern "C"
return result;
}
- void FreeResults(PredictionResult* results, int count)
+ EXPORT void FreeResults(PredictionResult* results, int count)
{
for (int i = 0; i < count; ++i) {
free((void*)results[i].language);
diff --git a/src/LanguageIdentification.CLD2.Native/Native/binding.h b/src/LanguageIdentification.CLD2.Native/Native/binding.h
index f221d840..7feba150 100644
--- a/src/LanguageIdentification.CLD2.Native/Native/binding.h
+++ b/src/LanguageIdentification.CLD2.Native/Native/binding.h
@@ -5,11 +5,13 @@
#include "./cld2/public/compact_lang_det.h"
#ifndef EXPORT
-#if defined(_WIN32) || defined(_WIN64)
-#define EXPORT __declspec(dllimport)
-#else
-#define EXPORT extern
-#endif
+# if defined(_WIN32) || defined(_WIN64)
+# define EXPORT __declspec(dllexport)
+# elif defined(__GNUC__) || defined(__clang__)
+# define EXPORT __attribute__((visibility("default")))
+# else
+# define EXPORT
+# endif
#endif
extern "C"
diff --git a/src/LanguageIdentification.CLD2.Native/Native/toolchain-mingw.cmake b/src/LanguageIdentification.CLD2.Native/Native/toolchain-mingw.cmake
new file mode 100644
index 00000000..94e97c6b
--- /dev/null
+++ b/src/LanguageIdentification.CLD2.Native/Native/toolchain-mingw.cmake
@@ -0,0 +1,16 @@
+set(CMAKE_SYSTEM_NAME Windows)
+set(TOOLCHAIN_PREFIX x86_64-w64-mingw32)
+
+# cross compilers to use for C, C++ and Fortran
+set(CMAKE_C_COMPILER ${TOOLCHAIN_PREFIX}-gcc)
+set(CMAKE_CXX_COMPILER ${TOOLCHAIN_PREFIX}-g++)
+set(CMAKE_Fortran_COMPILER ${TOOLCHAIN_PREFIX}-gfortran)
+set(CMAKE_RC_COMPILER ${TOOLCHAIN_PREFIX}-windres)
+
+# target environment on the build host system
+set(CMAKE_FIND_ROOT_PATH /usr/${TOOLCHAIN_PREFIX})
+
+# modify default behavior of FIND_XXX() commands
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
\ No newline at end of file
diff --git a/src/LanguageIdentification.CLD2.Native/Native/toolchain-osxcross.cmake b/src/LanguageIdentification.CLD2.Native/Native/toolchain-osxcross.cmake
new file mode 100644
index 00000000..6e1223e1
--- /dev/null
+++ b/src/LanguageIdentification.CLD2.Native/Native/toolchain-osxcross.cmake
@@ -0,0 +1,7 @@
+set(CMAKE_SYSTEM_NAME Darwin)
+set(CMAKE_SYSTEM_VERSION 1)
+
+# Path to the osxcross toolchain binaries
+set(CMAKE_OSX_SYSROOT /usr/local/osxcross/target)
+set(CMAKE_C_COMPILER /usr/local/osxcross/bin/o64-clang)
+set(CMAKE_CXX_COMPILER /usr/local/osxcross/bin/o64-clang++)
\ No newline at end of file
diff --git a/src/LanguageIdentification.CLD2.Native/Scripts/run-build.macos.sh b/src/LanguageIdentification.CLD2.Native/Scripts/run-build.macos.sh
new file mode 100644
index 00000000..dedf7bcb
--- /dev/null
+++ b/src/LanguageIdentification.CLD2.Native/Scripts/run-build.macos.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+set -e
+
+if [ -z "$1" ]; then
+ echo "Error: No architecture specified."
+ echo "Usage: $0 "
+ exit 1
+fi
+
+ARCH=$1
+
+if [[ "$ARCH" != "x86_64" && "$ARCH" != "arm64" ]]; then
+ echo "Error: Invalid architecture specified. Use 'x86_64' or 'arm64'."
+ exit 1
+fi
+
+echo "Hello world $ARCH";
+
+brew install llvm
+
+workspace="build_temp"
+
+mkdir "$workspace" -p
+cp -a ../../third_party/cld2/. $workspace/cld2
+cp -a Native/. $workspace
+
+ls -R .
+
+cd "$workspace"
+
+mkdir build
+cd build
+
+echo "Build for MacOS on $ARCH";
+rm -rf *
+cmake -DCMAKE_OSX_ARCHITECTURES=$ARCH ..
+make -j $(sysctl -n hw.logicalcpu)
+
+ls -R
+
+otool -L libcld2.dylib
+cp libcld2.dylib ../../libcld2.$ARCH.dylib
+
+# Clean up
+rm -rf "$workspace"
+echo "Goodbye world";
diff --git a/src/LanguageIdentification.CLD2.Native/Scripts/run-build.sh b/src/LanguageIdentification.CLD2.Native/Scripts/run-build.sh
index f46c6d06..2b9571b6 100644
--- a/src/LanguageIdentification.CLD2.Native/Scripts/run-build.sh
+++ b/src/LanguageIdentification.CLD2.Native/Scripts/run-build.sh
@@ -15,17 +15,27 @@ cd "$workspace"
mkdir build
cd build
+
+# Build for Linux
+rm -rf *
cmake ..
make -j $(nproc) # make
-cd ..
-echo $(pwd)
-ls -R build
-cd ..
+ls -R
-find "$workspace/build" -name "libcld2.so" -exec cp {} libcld2.so \;
-rm -rf "$workspace"
ldd libcld2.so
+cp libcld2.so ../../libcld2.so
+
+# Build for Windows
+rm -rf *
+cmake .. -DCMAKE_TOOLCHAIN_FILE=./toolchain-mingw.cmake
+make -j $(nproc) # make
+ls -R
+
+cp libcld2.dll ../../libcld2.dll
+
+# Clean up
+rm -rf "$workspace"
echo "Goodbye world";
diff --git a/src/LanguageIdentification.CLD2.Native/Scripts/setup-build.sh b/src/LanguageIdentification.CLD2.Native/Scripts/setup-build.sh
index 4f5708d0..c0ebe9b5 100644
--- a/src/LanguageIdentification.CLD2.Native/Scripts/setup-build.sh
+++ b/src/LanguageIdentification.CLD2.Native/Scripts/setup-build.sh
@@ -6,4 +6,5 @@ echo "Installing build packages";
sudo apt -y update | apt -y update
sudo apt -y install cmake | apt -y install cmake
sudo apt -y install g++ | apt -y install g++
+sudo apt -y install gcc-mingw-w64-x86-64 g++-mingw-w64-x86-64 | apt -y install gcc-mingw-w64-x86-64 g++-mingw-w64-x86-64
diff --git a/src/LanguageIdentification.CLD2/CLD2Detector.cs b/src/LanguageIdentification.CLD2/CLD2Detector.cs
index 4f257853..881451a3 100644
--- a/src/LanguageIdentification.CLD2/CLD2Detector.cs
+++ b/src/LanguageIdentification.CLD2/CLD2Detector.cs
@@ -13,7 +13,7 @@ public class CLD2Detector : IDisposable
{
public CLD2Detector()
{
- if (!RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
+ if (!IsSupported())
{
throw new NotSupportedException(
$"{nameof(CLD2Detector)} is not yet supported on {RuntimeInformation.RuntimeIdentifier}"
@@ -21,6 +21,18 @@ public CLD2Detector()
}
}
+ public static bool IsSupported()
+ {
+ return RuntimeInformation.OSArchitecture switch
+ {
+ Architecture.X64 when RuntimeInformation.IsOSPlatform(OSPlatform.Linux) => true,
+ Architecture.X64 when RuntimeInformation.IsOSPlatform(OSPlatform.Windows) => true,
+ Architecture.X64 when RuntimeInformation.IsOSPlatform(OSPlatform.OSX) => true,
+ Architecture.Arm64 when RuntimeInformation.IsOSPlatform(OSPlatform.OSX) => true,
+ _ => false,
+ };
+ }
+
///
/// Produces a prediction for 'text'
///
diff --git a/src/LanguageIdentification.CLD2/Internal/CLD2DetectorWrapper.cs b/src/LanguageIdentification.CLD2/Internal/CLD2DetectorWrapper.cs
index 3ef06497..21897b92 100644
--- a/src/LanguageIdentification.CLD2/Internal/CLD2DetectorWrapper.cs
+++ b/src/LanguageIdentification.CLD2/Internal/CLD2DetectorWrapper.cs
@@ -7,7 +7,10 @@ namespace Panlingo.LanguageIdentification.CLD2.Internal
internal static class CLD2DetectorWrapper
{
[DllImport(CLD2NativeLibrary.Name, CallingConvention = CallingConvention.Cdecl)]
- public static extern IntPtr PredictLanguage(string text, out int resultCount);
+ public static extern IntPtr PredictLanguage(
+ [MarshalAs(UnmanagedType.LPUTF8Str)] string text,
+ out int resultCount
+ );
[DllImport(CLD2NativeLibrary.Name, CallingConvention = CallingConvention.Cdecl)]
public static extern void FreeResults(IntPtr results, int count);
diff --git a/src/LanguageIdentification.CLD2/LanguageIdentification.CLD2.csproj b/src/LanguageIdentification.CLD2/LanguageIdentification.CLD2.csproj
index 71ca49e8..189b9e60 100644
--- a/src/LanguageIdentification.CLD2/LanguageIdentification.CLD2.csproj
+++ b/src/LanguageIdentification.CLD2/LanguageIdentification.CLD2.csproj
@@ -2,7 +2,7 @@
net5.0;net6.0;net7.0;net8.0
- 0.0.0.21
+ 0.1.0
Panlingo.LanguageIdentification.CLD2
Panlingo.LanguageIdentification.CLD2
Panlingo.LanguageIdentification.CLD2
@@ -15,7 +15,11 @@
nlp lid language-identification language-detection cld2
README_CLD2.md
- - Initial release
+0.1.0.0
+- Windows and MacOS support
+
+0.0.0.1
+- Initial release
This is a .NET wrapper for the Chrome Language Detection (CLD2) library by Google Inc.
diff --git a/src/LanguageIdentification.CLD3.ConsoleTest/Program.cs b/src/LanguageIdentification.CLD3.ConsoleTest/Program.cs
index 63710c75..ec55d826 100644
--- a/src/LanguageIdentification.CLD3.ConsoleTest/Program.cs
+++ b/src/LanguageIdentification.CLD3.ConsoleTest/Program.cs
@@ -6,7 +6,7 @@ static void Main(string[] args)
{
using var cld3 = new CLD3Detector(minNumBytes: 0, maxNumBytes: 512);
- var text = "Hello, how are you? Привіт, як справи? Привет, как дела?";
+ var text = "Привіт, як справи?";
var singlePrediction = cld3.PredictLanguage(text);
diff --git a/src/LanguageIdentification.CLD3.ConsoleTest/Properties/launchSettings.json b/src/LanguageIdentification.CLD3.ConsoleTest/Properties/launchSettings.json
index 47d960e0..1f434ec6 100644
--- a/src/LanguageIdentification.CLD3.ConsoleTest/Properties/launchSettings.json
+++ b/src/LanguageIdentification.CLD3.ConsoleTest/Properties/launchSettings.json
@@ -6,6 +6,9 @@
"WSL": {
"commandName": "WSL2",
"distributionName": ""
+ },
+ "Project": {
+ "commandName": "Project"
}
}
}
\ No newline at end of file
diff --git a/src/LanguageIdentification.CLD3.Native/.gitignore b/src/LanguageIdentification.CLD3.Native/.gitignore
index 60230aec..2af070e4 100644
--- a/src/LanguageIdentification.CLD3.Native/.gitignore
+++ b/src/LanguageIdentification.CLD3.Native/.gitignore
@@ -1,2 +1,4 @@
libcld3.so
+libcld3.dll
+libcld3.dylib
build_temp/**
diff --git a/src/LanguageIdentification.CLD3.Native/CLD3NativeLibrary.cs b/src/LanguageIdentification.CLD3.Native/CLD3NativeLibrary.cs
index c84346a0..5c5835d8 100644
--- a/src/LanguageIdentification.CLD3.Native/CLD3NativeLibrary.cs
+++ b/src/LanguageIdentification.CLD3.Native/CLD3NativeLibrary.cs
@@ -5,6 +5,6 @@ public static class CLD3NativeLibrary
///
/// Name of native binary
///
- public const string Name = "libcld3.so";
+ public const string Name = "libcld3";
}
}
diff --git a/src/LanguageIdentification.CLD3.Native/Dockerfile b/src/LanguageIdentification.CLD3.Native/Dockerfile
index a575ae8f..d249b1b3 100644
--- a/src/LanguageIdentification.CLD3.Native/Dockerfile
+++ b/src/LanguageIdentification.CLD3.Native/Dockerfile
@@ -1,5 +1,4 @@
-FROM mcr.microsoft.com/dotnet/sdk:8.0 AS build
-ARG BUILD_CONFIGURATION=Release
+FROM debian:bullseye-slim AS build
WORKDIR /repo
COPY ["src/LanguageIdentification.CLD3.Native/LanguageIdentification.CLD3.Native.csproj", "src/LanguageIdentification.CLD3.Native/"]
diff --git a/src/LanguageIdentification.CLD3.Native/LanguageIdentification.CLD3.Native.csproj b/src/LanguageIdentification.CLD3.Native/LanguageIdentification.CLD3.Native.csproj
index 71bd96dc..f70f6e96 100644
--- a/src/LanguageIdentification.CLD3.Native/LanguageIdentification.CLD3.Native.csproj
+++ b/src/LanguageIdentification.CLD3.Native/LanguageIdentification.CLD3.Native.csproj
@@ -2,7 +2,7 @@
netstandard2.1
- 0.0.0.21
+ 0.1.0
Panlingo.LanguageIdentification.CLD3.Native
Panlingo.LanguageIdentification.CLD3.Native
Panlingo.LanguageIdentification.CLD3.Native
@@ -61,6 +61,36 @@
false
+
+
+
+ PreserveNewest
+ true
+ runtimes/win-x64/native
+ true
+ false
+
+
+
+
+
+ PreserveNewest
+ true
+ runtimes/osx-x64/native/libcld3.dylib
+ true
+ false
+
+
+
+
+
+ PreserveNewest
+ true
+ runtimes/osx-arm64/native/libcld3.dylib
+ true
+ false
+
+
diff --git a/src/LanguageIdentification.CLD3.Native/Native/CMakeLists.txt b/src/LanguageIdentification.CLD3.Native/Native/CMakeLists.txt
index 6b3d4cde..82c9ea70 100644
--- a/src/LanguageIdentification.CLD3.Native/Native/CMakeLists.txt
+++ b/src/LanguageIdentification.CLD3.Native/Native/CMakeLists.txt
@@ -1,45 +1,80 @@
-project(cld3)
+cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
-cmake_minimum_required(VERSION 3.9)
+project(cld3)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
-set(CMAKE_CXX_EXTENSIONS OFF)
+set(CMAKE_C_STANDARD 99)
+set(CMAKE_C_STANDARD_REQUIRED ON)
+
+if (CMAKE_CXX_COMPILER_ID MATCHES "GNU")
+ if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99 -pthread -funroll-loops -O3")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -pthread -funroll-loops -O3")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -lpthread")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
+ elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -O3")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static-libgcc -static-libstdc++")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,--start-group -lwinpthread -Wl,--end-group")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,-allow-multiple-definition")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
+ endif()
+elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99 -pthread -funroll-loops -O3")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -pthread -funroll-loops -O3")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
+
+ if (CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -arch arm64 -target arm64-apple-macos11")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -arch arm64 -target arm64-apple-macos11")
+ elseif (CMAKE_OSX_ARCHITECTURES MATCHES "x86_64")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -arch x86_64 -target x86_64-apple-macos10.15")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -arch x86_64 -target x86_64-apple-macos10.15")
+ endif()
+endif()
-add_definitions(-fPIC) # Position Independent Code
-add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
+set(CMAKE_MACOSX_RPATH 1)
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+
+message(STATUS "System name: ${CMAKE_SYSTEM_NAME}")
+message(STATUS "CMake version: ${CMAKE_VERSION}")
+message(STATUS "C++ compiler: ${CMAKE_CXX_COMPILER_ID}")
+message(STATUS "C compiler: ${CMAKE_C_COMPILER_ID}")
+message(STATUS "CXX flags: ${CMAKE_CXX_FLAGS}")
+message(STATUS "C flags: ${CMAKE_C_FLAGS}")
add_library(${PROJECT_NAME} SHARED
- src/base.cc
- src/embedding_feature_extractor.cc
- src/embedding_network.cc
- src/feature_extractor.cc
- src/feature_extractor.h
- src/feature_types.cc
- src/fml_parser.cc
- src/language_identifier_features.cc
- src/lang_id_nn_params.cc
- src/nnet_language_identifier.cc
- src/registry.cc
- src/relevant_script_feature.cc
- src/sentence_features.cc
- src/task_context.cc
- src/task_context_params.cc
- src/unicodetext.cc
- src/utils.cc
- src/workspace.cc
-
- src/script_span/generated_entities.cc
- src/script_span/getonescriptspan.cc
- src/script_span/getonescriptspan.h
- src/script_span/getonescriptspan_test.cc
- src/script_span/utf8statetable.cc
- src/script_span/offsetmap.cc
- src/script_span/text_processing.cc
- src/script_span/text_processing.h
- src/script_span/fixunicodevalue.cc
-
- # bindings
+ src/base.cc
+ src/embedding_feature_extractor.cc
+ src/embedding_network.cc
+ src/feature_extractor.cc
+ src/feature_extractor.h
+ src/feature_types.cc
+ src/fml_parser.cc
+ src/language_identifier_features.cc
+ src/lang_id_nn_params.cc
+ src/nnet_language_identifier.cc
+ src/registry.cc
+ src/relevant_script_feature.cc
+ src/sentence_features.cc
+ src/task_context.cc
+ src/task_context_params.cc
+ src/unicodetext.cc
+ src/utils.cc
+ src/workspace.cc
+
+ src/script_span/generated_entities.cc
+ src/script_span/getonescriptspan.cc
+ src/script_span/getonescriptspan.h
+ src/script_span/getonescriptspan_test.cc
+ src/script_span/utf8statetable.cc
+ src/script_span/offsetmap.cc
+ src/script_span/text_processing.cc
+ src/script_span/text_processing.h
+ src/script_span/fixunicodevalue.cc
+
+ # bindings
src/binding.cc
src/binding.h
src/fake_protobuf.h
@@ -47,12 +82,35 @@ add_library(${PROJECT_NAME} SHARED
set_target_properties(${PROJECT_NAME} PROPERTIES PUBLIC_HEADER "src/binding.h")
-# unit tests exec:
-add_executable(language_identifier_main src/language_identifier_main.cc)
-target_link_libraries(language_identifier_main cld3)
+if (APPLE)
+ if (CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
+ set(CLANG_LIB_DIR /opt/homebrew/opt/llvm/lib/c++)
+ elseif (CMAKE_OSX_ARCHITECTURES MATCHES "x86_64")
+ set(CLANG_LIB_DIR /usr/local/opt/llvm/lib/c++)
+ endif()
+
+ target_link_libraries(${PROJECT_NAME}
+ -nostdlib++
+ -Wl,${CLANG_LIB_DIR}/libc++.a
+ -Wl,${CLANG_LIB_DIR}/libc++abi.a)
+else()
+ if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ # nop
+ elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows")
+ target_link_libraries(${PROJECT_NAME} -static-libgcc -static-libstdc++)
+ endif()
+endif()
+
+# Build unit tests
+if (CMAKE_CXX_COMPILER_ID MATCHES "GNU")
+ if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ add_executable(language_identifier_main src/language_identifier_main.cc)
+ target_link_libraries(language_identifier_main PRIVATE cld3)
-add_executable(getonescriptspan_test src/script_span/getonescriptspan_test.cc)
-target_link_libraries(getonescriptspan_test cld3)
+ add_executable(getonescriptspan_test src/script_span/getonescriptspan_test.cc)
+ target_link_libraries(getonescriptspan_test PRIVATE cld3)
-add_executable(language_identifier_features_test src/language_identifier_features_test.cc)
-target_link_libraries(language_identifier_features_test cld3)
+ add_executable(language_identifier_features_test src/language_identifier_features_test.cc)
+ target_link_libraries(language_identifier_features_test PRIVATE cld3)
+ endif()
+endif()
\ No newline at end of file
diff --git a/src/LanguageIdentification.CLD3.Native/Native/src/binding.cc b/src/LanguageIdentification.CLD3.Native/Native/src/binding.cc
index 807ca6f9..860d1ec7 100644
--- a/src/LanguageIdentification.CLD3.Native/Native/src/binding.cc
+++ b/src/LanguageIdentification.CLD3.Native/Native/src/binding.cc
@@ -3,44 +3,48 @@
using namespace chrome_lang_id;
-void* CreateIdentifier(int minNumBytes, int maxNumBytes) {
- return new NNetLanguageIdentifier(minNumBytes, maxNumBytes);
-}
+extern "C" {
+ EXPORT void* create_cld3(int minNumBytes, int maxNumBytes) {
+ return new NNetLanguageIdentifier(minNumBytes, maxNumBytes);
+ }
-void FreeIdentifier(void* identifier) {
- delete static_cast(identifier);
-}
+ EXPORT void destroy_cld3(void* identifier) {
+ delete static_cast(identifier);
+ }
-PredictionResult FindLanguage(void* identifier, const char* text) {
- NNetLanguageIdentifier* nativeIdentifier = static_cast(identifier);
- auto nativeResult = nativeIdentifier->FindLanguage(text);
+ EXPORT PredictionResult* cld3_find_language(void* identifier, const char* text, int* resultCount) {
+ NNetLanguageIdentifier* nativeIdentifier = static_cast(identifier);
+ auto nativeResult = nativeIdentifier->FindLanguage(text);
+
+ *resultCount = 1;
+ PredictionResult* result = new PredictionResult[*resultCount];
+ result[0].language = strdup(nativeResult.language.c_str());
+ result[0].probability = nativeResult.probability;
+ result[0].is_reliable = nativeResult.is_reliable;
+ result[0].proportion = nativeResult.proportion;
+ return result;
+ }
- PredictionResult result;
- result.language = strdup(nativeResult.language.c_str());
- result.probability = nativeResult.probability;
- result.is_reliable = nativeResult.is_reliable;
- result.proportion = nativeResult.proportion;
- return result;
-}
+ EXPORT PredictionResult* cld3_find_languages(void* identifier, const char* text, int numLangs, int* resultCount) {
+ NNetLanguageIdentifier* nativeIdentifier = static_cast(identifier);
+ auto nativeResults = nativeIdentifier->FindTopNMostFreqLangs(text, numLangs);
+
+ *resultCount = static_cast(nativeResults.size());
+ PredictionResult* result = new PredictionResult[*resultCount];
+ for (int i = 0; i < *resultCount; ++i) {
+ result[i].language = strdup(nativeResults[i].language.c_str());
+ result[i].probability = nativeResults[i].probability;
+ result[i].is_reliable = nativeResults[i].is_reliable;
+ result[i].proportion = nativeResults[i].proportion;
+ }
+ return result;
+ }
-PredictionResult* FindLanguages(void* identifier, const char* text, int numLangs, int* resultCount) {
- NNetLanguageIdentifier* nativeIdentifier = static_cast(identifier);
- auto nativeResults = nativeIdentifier->FindTopNMostFreqLangs(text, numLangs);
-
- *resultCount = static_cast(nativeResults.size());
- PredictionResult* result = new PredictionResult[*resultCount];
- for (int i = 0; i < *resultCount; ++i) {
- result[i].language = strdup(nativeResults[i].language.c_str());
- result[i].probability = nativeResults[i].probability;
- result[i].is_reliable = nativeResults[i].is_reliable;
- result[i].proportion = nativeResults[i].proportion;
+ EXPORT void cld3_destroy_prediction_result(PredictionResult* results, int count) {
+ for (int i = 0; i < count; ++i) {
+ free((void*)results[i].language);
+ }
+ delete[] results;
}
- return result;
}
-void FreeResults(PredictionResult* results, int count) {
- for (int i = 0; i < count; ++i) {
- free((void*)results[i].language);
- }
- delete[] results;
-}
\ No newline at end of file
diff --git a/src/LanguageIdentification.CLD3.Native/Native/src/binding.h b/src/LanguageIdentification.CLD3.Native/Native/src/binding.h
index 7e2ba7a6..0abea8b8 100644
--- a/src/LanguageIdentification.CLD3.Native/Native/src/binding.h
+++ b/src/LanguageIdentification.CLD3.Native/Native/src/binding.h
@@ -1,4 +1,4 @@
-#pragma once
+#pragma once
#include "base.h"
#include "nnet_language_identifier.h"
@@ -6,18 +6,17 @@
using namespace std;
#ifndef EXPORT
-# ifdef __linux__
+# if defined(_WIN32) || defined(_WIN64)
+# define EXPORT __declspec(dllexport)
+# elif defined(__GNUC__) || defined(__clang__)
# define EXPORT __attribute__((visibility("default")))
# else
-# if defined(_MSC_VER)
-# define EXPORT __declspec(dllexport)
-# else
-# define EXPORT __attribute__((visibility("default")))
-# endif
+# define EXPORT
# endif
#endif
-extern "C" {
+extern "C"
+{
struct PredictionResult {
const char* language;
double probability;
@@ -25,9 +24,9 @@ extern "C" {
double proportion;
};
- EXPORT void* CreateIdentifier(int minNumBytes, int maxNumBytes);
- EXPORT void FreeIdentifier(void* identifier);
- EXPORT PredictionResult FindLanguage(void* identifier, const char* text);
- EXPORT PredictionResult* FindLanguages(void* identifier, const char* text, int numLangs, int* resultCount);
- EXPORT void FreeResults(PredictionResult* results, int count);
+ EXPORT void* create_cld3(int minNumBytes, int maxNumBytes);
+ EXPORT void destroy_cld3(void* identifier);
+ EXPORT PredictionResult* cld3_find_language(void* identifier, const char* text, int* resultCount);
+ EXPORT PredictionResult* cld3_find_languages(void* identifier, const char* text, int numLangs, int* resultCount);
+ EXPORT void cld3_destroy_prediction_result(PredictionResult* results, int count);
}
\ No newline at end of file
diff --git a/src/LanguageIdentification.CLD3.Native/Native/toolchain-mingw.cmake b/src/LanguageIdentification.CLD3.Native/Native/toolchain-mingw.cmake
new file mode 100644
index 00000000..94e97c6b
--- /dev/null
+++ b/src/LanguageIdentification.CLD3.Native/Native/toolchain-mingw.cmake
@@ -0,0 +1,16 @@
+set(CMAKE_SYSTEM_NAME Windows)
+set(TOOLCHAIN_PREFIX x86_64-w64-mingw32)
+
+# cross compilers to use for C, C++ and Fortran
+set(CMAKE_C_COMPILER ${TOOLCHAIN_PREFIX}-gcc)
+set(CMAKE_CXX_COMPILER ${TOOLCHAIN_PREFIX}-g++)
+set(CMAKE_Fortran_COMPILER ${TOOLCHAIN_PREFIX}-gfortran)
+set(CMAKE_RC_COMPILER ${TOOLCHAIN_PREFIX}-windres)
+
+# target environment on the build host system
+set(CMAKE_FIND_ROOT_PATH /usr/${TOOLCHAIN_PREFIX})
+
+# modify default behavior of FIND_XXX() commands
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
\ No newline at end of file
diff --git a/src/LanguageIdentification.CLD3.Native/Native/toolchain-osxcross.cmake b/src/LanguageIdentification.CLD3.Native/Native/toolchain-osxcross.cmake
new file mode 100644
index 00000000..6e1223e1
--- /dev/null
+++ b/src/LanguageIdentification.CLD3.Native/Native/toolchain-osxcross.cmake
@@ -0,0 +1,7 @@
+set(CMAKE_SYSTEM_NAME Darwin)
+set(CMAKE_SYSTEM_VERSION 1)
+
+# Path to the osxcross toolchain binaries
+set(CMAKE_OSX_SYSROOT /usr/local/osxcross/target)
+set(CMAKE_C_COMPILER /usr/local/osxcross/bin/o64-clang)
+set(CMAKE_CXX_COMPILER /usr/local/osxcross/bin/o64-clang++)
\ No newline at end of file
diff --git a/src/LanguageIdentification.CLD3.Native/Scripts/run-build.macos.sh b/src/LanguageIdentification.CLD3.Native/Scripts/run-build.macos.sh
new file mode 100644
index 00000000..cd169515
--- /dev/null
+++ b/src/LanguageIdentification.CLD3.Native/Scripts/run-build.macos.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+set -e
+
+if [ -z "$1" ]; then
+ echo "Error: No architecture specified."
+ echo "Usage: $0 "
+ exit 1
+fi
+
+ARCH=$1
+
+if [[ "$ARCH" != "x86_64" && "$ARCH" != "arm64" ]]; then
+ echo "Error: Invalid architecture specified. Use 'x86_64' or 'arm64'."
+ exit 1
+fi
+
+echo "Hello world $ARCH";
+
+brew install llvm
+npm install -g zx
+
+workspace="build_temp"
+
+mkdir "$workspace" -p
+cp -a ../../third_party/cld3/. $workspace/.
+cp -a Native/. $workspace
+
+ls -R .
+
+cd "$workspace"
+
+zx ../Native/monkey-patch.mjs
+
+mkdir build
+cd build
+
+echo "Build for MacOS on $ARCH";
+rm -rf *
+cmake -DCMAKE_OSX_ARCHITECTURES=$ARCH ..
+make -j $(sysctl -n hw.logicalcpu)
+
+ls -R
+
+otool -L libcld3.dylib
+cp libcld3.dylib ../../libcld3.$ARCH.dylib
+
+# Clean up
+rm -rf "$workspace"
+echo "Goodbye world";
diff --git a/src/LanguageIdentification.CLD3.Native/Scripts/run-build.sh b/src/LanguageIdentification.CLD3.Native/Scripts/run-build.sh
index facae322..ef0e887b 100644
--- a/src/LanguageIdentification.CLD3.Native/Scripts/run-build.sh
+++ b/src/LanguageIdentification.CLD3.Native/Scripts/run-build.sh
@@ -13,25 +13,41 @@ ls -R .
cd "$workspace"
+curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.5/install.sh | bash
+export NVM_DIR="$HOME/.nvm"
+[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"
+nvm install 22
+nvm use 22
+npm install -g zx
+
zx ../Native/monkey-patch.mjs
mkdir build
cd build
+
+# Build for Linux
+rm -rf *
cmake ..
make -j $(nproc) # make
./language_identifier_main # run tests
./language_identifier_features_test # run tests
-cd ..
-
-echo $(pwd)
-ls -R build
-cd ..
+ls -R
-find "$workspace/build" -name "libcld3.so" -exec cp {} libcld3.so \;
-rm -rf "$workspace"
ldd libcld3.so
+cp libcld3.so ../../libcld3.so
+# Build for Windows
+rm -rf *
+cmake .. -DCMAKE_TOOLCHAIN_FILE=./toolchain-mingw.cmake
+make -j $(nproc) # make
+
+ls -R
+
+cp libcld3.dll ../../libcld3.dll
+
+# Clean up
+rm -rf "$workspace"
echo "Goodbye world";
diff --git a/src/LanguageIdentification.CLD3.Native/Scripts/setup-build.sh b/src/LanguageIdentification.CLD3.Native/Scripts/setup-build.sh
index f54c7eb6..324f3894 100644
--- a/src/LanguageIdentification.CLD3.Native/Scripts/setup-build.sh
+++ b/src/LanguageIdentification.CLD3.Native/Scripts/setup-build.sh
@@ -3,14 +3,11 @@ set -e
echo "Installing build packages";
-apt-get update && apt-get install -y ca-certificates curl gnupg && \
- curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \
- echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list
-
apt -y update
+apt -y install curl dirmngr apt-transport-https lsb-release ca-certificates
apt -y install cmake
apt -y install g++
-apt -y install nodejs
+apt -y install gcc-mingw-w64-x86-64 g++-mingw-w64-x86-64
+
-npm install -g zx
diff --git a/src/LanguageIdentification.CLD3/CLD3Detector.cs b/src/LanguageIdentification.CLD3/CLD3Detector.cs
index 5b5a1d9e..195c80d6 100644
--- a/src/LanguageIdentification.CLD3/CLD3Detector.cs
+++ b/src/LanguageIdentification.CLD3/CLD3Detector.cs
@@ -1,5 +1,6 @@
using System;
using System.Collections.Generic;
+using System.Drawing;
using System.Linq;
using System.Runtime.InteropServices;
using System.Threading;
@@ -17,17 +18,29 @@ public class CLD3Detector : IDisposable
public CLD3Detector(int minNumBytes, int maxNumBytes)
{
- if (!RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
+ if (!IsSupported())
{
throw new NotSupportedException(
$"{nameof(CLD3Detector)} is not yet supported on {RuntimeInformation.RuntimeIdentifier}"
);
}
- _identifier = CLD3DetectorWrapper.CreateIdentifier(minNumBytes, maxNumBytes);
+ _identifier = CLD3DetectorWrapper.CreateCLD3(minNumBytes, maxNumBytes);
_semaphore = new(1, 1);
}
+ public static bool IsSupported()
+ {
+ return RuntimeInformation.OSArchitecture switch
+ {
+ Architecture.X64 when RuntimeInformation.IsOSPlatform(OSPlatform.Linux) => true,
+ Architecture.X64 when RuntimeInformation.IsOSPlatform(OSPlatform.Windows) => true,
+ //Architecture.X64 when RuntimeInformation.IsOSPlatform(OSPlatform.OSX) => true,
+ //Architecture.Arm64 when RuntimeInformation.IsOSPlatform(OSPlatform.OSX) => true,
+ _ => false,
+ };
+ }
+
public void Dispose()
{
GC.SuppressFinalize(this);
@@ -35,7 +48,7 @@ public void Dispose()
try
{
_semaphore.Wait();
- CLD3DetectorWrapper.FreeIdentifier(_identifier);
+ CLD3DetectorWrapper.DestroyCLD3(_identifier);
}
finally
{
@@ -50,8 +63,29 @@ public void Dispose()
/// List of language predictions
public CLD3Prediction PredictLanguage(string text)
{
- var result = CLD3DetectorWrapper.FindLanguage(_identifier, text);
- return new CLD3Prediction(result);
+ var resultPtr = CLD3DetectorWrapper.PredictLanguage(
+ identifier: _identifier,
+ text: text,
+ resultCount: out var resultCount
+ );
+
+ try
+ {
+ var nativeResult = new CLD3PredictionResult[resultCount];
+ var structSize = Marshal.SizeOf(typeof(CLD3PredictionResult));
+
+ for (var i = 0; i < resultCount; i++)
+ {
+ nativeResult[i] = Marshal.PtrToStructure(resultPtr + i * structSize);
+ }
+
+ var firstItem = nativeResult.First();
+ return new CLD3Prediction(firstItem);
+ }
+ finally
+ {
+ CLD3DetectorWrapper.DestroyPredictionResult(resultPtr, resultCount);
+ }
}
///
@@ -65,7 +99,7 @@ public IEnumerable PredictLanguages(
int count
)
{
- var resultPtr = CLD3DetectorWrapper.FindLanguages(
+ var resultPtr = CLD3DetectorWrapper.PredictLanguages(
identifier: _identifier,
text: text,
numLangs: count,
@@ -74,22 +108,22 @@ int count
try
{
- var result = new CLD3PredictionResult[resultCount];
+ var nativeResult = new CLD3PredictionResult[resultCount];
var structSize = Marshal.SizeOf(typeof(CLD3PredictionResult));
for (var i = 0; i < resultCount; i++)
{
- result[i] = Marshal.PtrToStructure(resultPtr + i * structSize);
+ nativeResult[i] = Marshal.PtrToStructure(resultPtr + i * structSize);
}
- return result
+ return nativeResult
.OrderByDescending(x => x.Probability)
.Select(x => new CLD3Prediction(x))
.ToArray();
}
finally
{
- CLD3DetectorWrapper.FreeResults(resultPtr, resultCount);
+ CLD3DetectorWrapper.DestroyPredictionResult(resultPtr, resultCount);
}
}
}
diff --git a/src/LanguageIdentification.CLD3/Internal/CLD3DetectorWrapper.cs b/src/LanguageIdentification.CLD3/Internal/CLD3DetectorWrapper.cs
index 1d856609..9fc9e0de 100644
--- a/src/LanguageIdentification.CLD3/Internal/CLD3DetectorWrapper.cs
+++ b/src/LanguageIdentification.CLD3/Internal/CLD3DetectorWrapper.cs
@@ -6,20 +6,29 @@ namespace Panlingo.LanguageIdentification.CLD3.Internal
{
internal static class CLD3DetectorWrapper
{
- [DllImport(CLD3NativeLibrary.Name, CallingConvention = CallingConvention.Cdecl)]
- public static extern IntPtr CreateIdentifier(int minNumBytes, int maxNumBytes);
+ [DllImport(CLD3NativeLibrary.Name, EntryPoint = "create_cld3", CallingConvention = CallingConvention.Cdecl)]
+ public static extern IntPtr CreateCLD3(int minNumBytes, int maxNumBytes);
- [DllImport(CLD3NativeLibrary.Name, CallingConvention = CallingConvention.Cdecl)]
- public static extern void FreeIdentifier(IntPtr identifier);
+ [DllImport(CLD3NativeLibrary.Name, EntryPoint = "destroy_cld3", CallingConvention = CallingConvention.Cdecl)]
+ public static extern void DestroyCLD3(IntPtr identifier);
- [DllImport(CLD3NativeLibrary.Name, CallingConvention = CallingConvention.Cdecl)]
- public static extern CLD3PredictionResult FindLanguage(IntPtr identifier, string text);
+ [DllImport(CLD3NativeLibrary.Name, EntryPoint = "cld3_find_language", CallingConvention = CallingConvention.Cdecl)]
+ public static extern IntPtr PredictLanguage(
+ IntPtr identifier,
+ [MarshalAs(UnmanagedType.LPUTF8Str)] string text,
+ out int resultCount
+ );
- [DllImport(CLD3NativeLibrary.Name, CallingConvention = CallingConvention.Cdecl)]
- public static extern IntPtr FindLanguages(IntPtr identifier, string text, int numLangs, out int resultCount);
+ [DllImport(CLD3NativeLibrary.Name, EntryPoint = "cld3_find_languages", CallingConvention = CallingConvention.Cdecl)]
+ public static extern IntPtr PredictLanguages(
+ IntPtr identifier,
+ [MarshalAs(UnmanagedType.LPUTF8Str)] string text,
+ int numLangs,
+ out int resultCount
+ );
- [DllImport(CLD3NativeLibrary.Name, CallingConvention = CallingConvention.Cdecl)]
- public static extern void FreeResults(IntPtr results, int count);
+ [DllImport(CLD3NativeLibrary.Name, EntryPoint = "cld3_destroy_prediction_result", CallingConvention = CallingConvention.Cdecl)]
+ public static extern void DestroyPredictionResult(IntPtr results, int count);
}
}
diff --git a/src/LanguageIdentification.CLD3/LanguageIdentification.CLD3.csproj b/src/LanguageIdentification.CLD3/LanguageIdentification.CLD3.csproj
index 4e6828bc..2f715fe8 100644
--- a/src/LanguageIdentification.CLD3/LanguageIdentification.CLD3.csproj
+++ b/src/LanguageIdentification.CLD3/LanguageIdentification.CLD3.csproj
@@ -2,7 +2,7 @@
net5.0;net6.0;net7.0;net8.0
- 0.0.0.21
+ 0.1.0
Panlingo.LanguageIdentification.CLD3
Panlingo.LanguageIdentification.CLD3
Panlingo.LanguageIdentification.CLD3
@@ -15,6 +15,9 @@
nlp lid language-identification language-detection cld3
README_CLD3.md
+0.1.0.0
+- Windows support
+
0.0.0.20:
- Protobuf is not required now!
diff --git a/src/LanguageIdentification.FastText.ConsoleTest/Program.cs b/src/LanguageIdentification.FastText.ConsoleTest/Program.cs
index fc018ec3..2e294170 100644
--- a/src/LanguageIdentification.FastText.ConsoleTest/Program.cs
+++ b/src/LanguageIdentification.FastText.ConsoleTest/Program.cs
@@ -4,7 +4,7 @@ internal class Program
{
static void Main(string[] args)
{
- var text = "Hello, how are you? Привіт, як справи? Привет, как дела?";
+ var text = "Привіт, як справи?";
using var fastText = new FastTextDetector();
diff --git a/src/LanguageIdentification.FastText.ConsoleTest/Properties/launchSettings.json b/src/LanguageIdentification.FastText.ConsoleTest/Properties/launchSettings.json
index 65b8965a..1f434ec6 100644
--- a/src/LanguageIdentification.FastText.ConsoleTest/Properties/launchSettings.json
+++ b/src/LanguageIdentification.FastText.ConsoleTest/Properties/launchSettings.json
@@ -2,6 +2,13 @@
"profiles": {
"Docker": {
"commandName": "Docker"
+ },
+ "WSL": {
+ "commandName": "WSL2",
+ "distributionName": ""
+ },
+ "Project": {
+ "commandName": "Project"
}
}
}
\ No newline at end of file
diff --git a/src/LanguageIdentification.FastText.Native/.gitignore b/src/LanguageIdentification.FastText.Native/.gitignore
index 01a24561..d15bc74b 100644
--- a/src/LanguageIdentification.FastText.Native/.gitignore
+++ b/src/LanguageIdentification.FastText.Native/.gitignore
@@ -1,2 +1,5 @@
libfasttext.so
+fasttext.dll
+libfasttext.arm64.dylib
+libfasttext.x86_64.dylib
build_temp/**
diff --git a/src/LanguageIdentification.FastText.Native/Dockerfile b/src/LanguageIdentification.FastText.Native/Dockerfile
index 2948215c..2a0e5655 100644
--- a/src/LanguageIdentification.FastText.Native/Dockerfile
+++ b/src/LanguageIdentification.FastText.Native/Dockerfile
@@ -1,5 +1,4 @@
-FROM mcr.microsoft.com/dotnet/sdk:8.0 AS build
-ARG BUILD_CONFIGURATION=Release
+FROM debian:bullseye-slim AS build
WORKDIR /repo
COPY ["src/LanguageIdentification.FastText.Native/LanguageIdentification.FastText.Native.csproj", "src/LanguageIdentification.FastText.Native/"]
diff --git a/src/LanguageIdentification.FastText.Native/FastTextNativeLibrary.cs b/src/LanguageIdentification.FastText.Native/FastTextNativeLibrary.cs
index a0688250..064f5cae 100644
--- a/src/LanguageIdentification.FastText.Native/FastTextNativeLibrary.cs
+++ b/src/LanguageIdentification.FastText.Native/FastTextNativeLibrary.cs
@@ -5,7 +5,7 @@ public class FastTextNativeLibrary
///
/// Name of native binary
///
- public const string Name = "libfasttext.so";
+ public const string Name = "fasttext";
}
}
diff --git a/src/LanguageIdentification.FastText.Native/LanguageIdentification.FastText.Native.csproj b/src/LanguageIdentification.FastText.Native/LanguageIdentification.FastText.Native.csproj
index 375f561b..197a8b72 100644
--- a/src/LanguageIdentification.FastText.Native/LanguageIdentification.FastText.Native.csproj
+++ b/src/LanguageIdentification.FastText.Native/LanguageIdentification.FastText.Native.csproj
@@ -2,7 +2,7 @@
netstandard2.1
- 0.0.0.21
+ 0.1.0
Panlingo.LanguageIdentification.FastText.Native
Panlingo.LanguageIdentification.FastText.Native
Panlingo.LanguageIdentification.FastText.Native
@@ -62,6 +62,36 @@
+
+
+ PreserveNewest
+ true
+ runtimes/win-x64/native
+ true
+ false
+
+
+
+
+
+ PreserveNewest
+ true
+ runtimes/osx-x64/native/libfasttext.dylib
+ true
+ false
+
+
+
+
+
+ PreserveNewest
+ true
+ runtimes/osx-arm64/native/libfasttext.dylib
+ true
+ false
+
+
+
diff --git a/src/LanguageIdentification.FastText.Native/Native/CMakeLists.txt b/src/LanguageIdentification.FastText.Native/Native/CMakeLists.txt
index 0fe25023..9dfc83c8 100644
--- a/src/LanguageIdentification.FastText.Native/Native/CMakeLists.txt
+++ b/src/LanguageIdentification.FastText.Native/Native/CMakeLists.txt
@@ -1,15 +1,41 @@
-cmake_minimum_required(VERSION 2.8 FATAL_ERROR)
-project(fasttext_bridge)
+cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
-set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99")
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
-if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
+project(fasttext)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_C_STANDARD 99)
+set(CMAKE_C_STANDARD_REQUIRED ON)
+
+if (CMAKE_CXX_COMPILER_ID MATCHES "GNU")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99 -pthread -funroll-loops -O3")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -pthread -funroll-loops -O3")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -lpthread")
+elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99 -pthread -funroll-loops -O3")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -pthread -funroll-loops -O3")
+
+ if (CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -arch arm64 -target arm64-apple-macos11")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -arch arm64 -target arm64-apple-macos11")
+ elseif (CMAKE_OSX_ARCHITECTURES MATCHES "x86_64")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -arch x86_64 -target x86_64-apple-macos10.15")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -arch x86_64 -target x86_64-apple-macos10.15")
+ endif()
+elseif (CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /W3 /O2 /fp:precise /arch:AVX2")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W3 /O2 /fp:precise /arch:AVX2")
endif()
set(CMAKE_MACOSX_RPATH 1)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+message(STATUS "CMake version: ${CMAKE_VERSION}")
+message(STATUS "C++ compiler: ${CMAKE_CXX_COMPILER_ID}")
+message(STATUS "C compiler: ${CMAKE_C_COMPILER_ID}")
+message(STATUS "CXX flags: ${CMAKE_CXX_FLAGS}")
+message(STATUS "C flags: ${CMAKE_C_FLAGS}")
+
add_subdirectory(fasttext EXCLUDE_FROM_ALL)
include_directories(
@@ -21,7 +47,24 @@ set(SOURCES ${PROJECT_SOURCE_DIR}/binding.cc)
add_library(objlib OBJECT ${SOURCES})
-add_library(fasttext SHARED $)
-set_target_properties(fasttext PROPERTIES PUBLIC_HEADER ${PROJECT_SOURCE_DIR}/binding.h)
-target_link_libraries(fasttext fasttext-static_pic)
+add_library(${PROJECT_NAME} SHARED $)
+set_target_properties(${PROJECT_NAME} PROPERTIES PUBLIC_HEADER ${PROJECT_SOURCE_DIR}/binding.h)
+
+if (APPLE)
+ if (CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
+ set(CLANG_LIB_DIR /opt/homebrew/opt/llvm/lib/c++)
+ elseif (CMAKE_OSX_ARCHITECTURES MATCHES "x86_64")
+ set(CLANG_LIB_DIR /usr/local/opt/llvm/lib/c++)
+ endif()
+ target_link_libraries(${PROJECT_NAME} fasttext-static_pic
+ -nostdlib++
+ -Wl,${CLANG_LIB_DIR}/libc++.a
+ -Wl,${CLANG_LIB_DIR}/libc++abi.a)
+else()
+ if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ target_link_libraries(${PROJECT_NAME} fasttext-static_pic)
+ elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows")
+ target_link_libraries(${PROJECT_NAME} fasttext-static_pic -static-libgcc -static-libstdc++)
+ endif()
+endif()
diff --git a/src/LanguageIdentification.FastText.Native/Native/binding.cc b/src/LanguageIdentification.FastText.Native/Native/binding.cc
index c2f269ba..af7f58e6 100644
--- a/src/LanguageIdentification.FastText.Native/Native/binding.cc
+++ b/src/LanguageIdentification.FastText.Native/Native/binding.cc
@@ -18,30 +18,31 @@ extern "C" {
*err_ptr = strdup(e.what());
}
- void DestroyString(char* s) {
+ EXPORT void DestroyString(char* s) {
if (s != nullptr) {
free(s);
}
}
- fasttext_t* CreateFastText(void) {
+ EXPORT fasttext_t* CreateFastText(void) {
return (fasttext_t*)(new FastTextExtension());
}
- void DestroyFastText(fasttext_t* handle) {
+ EXPORT void DestroyFastText(fasttext_t* handle) {
FastTextExtension* x = (FastTextExtension*)handle;
delete x;
}
- void FastTextLoadModel(fasttext_t* handle, const char* filename, char** err_ptr) {
+ EXPORT void FastTextLoadModel(fasttext_t* handle, const char* filename, char** err_ptr) {
try {
((FastTextExtension*)handle)->loadModel(filename);
- } catch (const std::invalid_argument& e) {
+ }
+ catch (const std::invalid_argument& e) {
save_error(err_ptr, e);
}
}
- void FastTextLoadModelData(fasttext_t* handle, const char* buffer, size_t buffer_length, char** err_ptr) {
+ EXPORT void FastTextLoadModelData(fasttext_t* handle, const char* buffer, size_t buffer_length, char** err_ptr) {
try {
((FastTextExtension*)handle)->loadModelData(buffer, buffer_length);
}
@@ -50,16 +51,17 @@ extern "C" {
}
}
- int FastTextGetModelDimensions(fasttext_t* handle) {
+ EXPORT int FastTextGetModelDimensions(fasttext_t* handle) {
return ((FastTextExtension*)handle)->getDimension();
}
- fasttext_predictions_t* FastTextPredict(fasttext_t* handle, const char* text, int32_t k, float threshold, char** err_ptr) {
+ EXPORT fasttext_predictions_t* FastTextPredict(fasttext_t* handle, const char* text, int32_t k, float threshold, char** err_ptr) {
std::vector> predictions;
std::stringstream ioss(text);
try {
((FastTextExtension*)handle)->predictLine(ioss, predictions, k, threshold);
- } catch (const std::invalid_argument& e) {
+ }
+ catch (const std::invalid_argument& e) {
save_error(err_ptr, e);
return nullptr;
}
@@ -75,7 +77,7 @@ extern "C" {
return ret;
}
- void DestroyPredictions(fasttext_predictions_t* predictions) {
+ EXPORT void DestroyPredictions(fasttext_predictions_t* predictions) {
if (predictions == nullptr) {
return;
}
@@ -87,7 +89,7 @@ extern "C" {
free(predictions);
}
- fasttext_labels_t* FastTextGetLabels(fasttext_t* handle) {
+ EXPORT fasttext_labels_t* FastTextGetLabels(fasttext_t* handle) {
std::shared_ptr d = ((FastTextExtension*)handle)->getDictionary();
std::vector labels_freq = d->getCounts(fasttext::entry_type::label);
size_t len = labels_freq.size();
@@ -106,7 +108,7 @@ extern "C" {
return ret;
}
- void DestroyLabels(fasttext_labels_t* labels) {
+ EXPORT void DestroyLabels(fasttext_labels_t* labels) {
if (labels == nullptr) {
return;
}
@@ -118,18 +120,18 @@ extern "C" {
free(labels);
}
- void FastTextAbort(fasttext_t* handle) {
+ EXPORT void FastTextAbort(fasttext_t* handle) {
((FastTextExtension*)handle)->abort();
}
- fasttext_tokens_t* FastTextTokenize(fasttext_t* handle, const char* text) {
+ EXPORT fasttext_tokens_t* FastTextTokenize(fasttext_t* handle, const char* text) {
std::vector text_split;
std::shared_ptr d = ((FastTextExtension*)handle)->getDictionary();
std::stringstream ioss(text);
std::string token;
while (!ioss.eof()) {
while (d->readWord(ioss, token)) {
- text_split.push_back(token);
+ text_split.push_back(token);
}
}
size_t len = text_split.size();
@@ -143,7 +145,7 @@ extern "C" {
return ret;
}
- void DestroyTokens(fasttext_tokens_t* tokens) {
+ EXPORT void DestroyTokens(fasttext_tokens_t* tokens) {
for (size_t i = 0; i < tokens->length; i++) {
free(tokens->tokens[i]);
}
diff --git a/src/LanguageIdentification.FastText.Native/Native/binding.h b/src/LanguageIdentification.FastText.Native/Native/binding.h
index ad4909f6..a9170377 100644
--- a/src/LanguageIdentification.FastText.Native/Native/binding.h
+++ b/src/LanguageIdentification.FastText.Native/Native/binding.h
@@ -6,14 +6,16 @@
#ifndef EXPORT
# if defined(_WIN32) || defined(_WIN64)
-# define EXPORT __declspec(dllimport)
+# define EXPORT __declspec(dllexport)
+# elif defined(__GNUC__) || defined(__clang__)
+# define EXPORT __attribute__((visibility("default")))
# else
-# define EXPORT extern
+# define EXPORT
# endif
#endif
-extern "C" {
-
+extern "C"
+{
typedef struct fasttext_t fasttext_t;
typedef struct {
diff --git a/src/LanguageIdentification.FastText.Native/Scripts/run-build.macos.sh b/src/LanguageIdentification.FastText.Native/Scripts/run-build.macos.sh
new file mode 100644
index 00000000..dcf0d7b0
--- /dev/null
+++ b/src/LanguageIdentification.FastText.Native/Scripts/run-build.macos.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+set -e
+
+if [ -z "$1" ]; then
+ echo "Error: No architecture specified."
+ echo "Usage: $0 "
+ exit 1
+fi
+
+ARCH=$1
+
+if [[ "$ARCH" != "x86_64" && "$ARCH" != "arm64" ]]; then
+ echo "Error: Invalid architecture specified. Use 'x86_64' or 'arm64'."
+ exit 1
+fi
+
+echo "Hello world $ARCH";
+
+brew install llvm
+
+workspace="build_temp"
+
+mkdir "$workspace" -p
+cp -a ../../third_party/fastText/. $workspace/fasttext
+cp -a Native/. $workspace
+
+ls -R .
+
+cd "$workspace"
+
+mkdir build
+cd build
+
+echo "Build for MacOS on $ARCH";
+rm -rf *
+cmake -DCMAKE_OSX_ARCHITECTURES=$ARCH ..
+make -j $(sysctl -n hw.logicalcpu)
+
+ls -R
+
+otool -L libfasttext.dylib
+cp libfasttext.dylib ../../libfasttext.$ARCH.dylib
+
+# Clean up
+rm -rf "$workspace"
+echo "Goodbye world";
diff --git a/src/LanguageIdentification.FastText.Native/Scripts/run-build.ps1 b/src/LanguageIdentification.FastText.Native/Scripts/run-build.ps1
new file mode 100644
index 00000000..c34c510c
--- /dev/null
+++ b/src/LanguageIdentification.FastText.Native/Scripts/run-build.ps1
@@ -0,0 +1,45 @@
+# PowerShell equivalent script
+# Ensure script stops if any command fails
+$ErrorActionPreference = "Stop"
+
+Write-Output "Hello world"
+
+$workspace = "build_temp"
+
+# Create directory if it doesn't exist
+if (-Not (Test-Path $workspace)) {
+ New-Item -Path $workspace -ItemType Directory
+ New-Item -Path "$workspace/fasttext" -ItemType Directory
+}
+
+# Copy directories
+Copy-Item -Path "../../third_party/fastText/*" -Destination "$workspace/fasttext" -Recurse -Force
+Copy-Item -Path "Native/*" -Destination $workspace -Recurse -Force
+
+# List directory contents recursively
+Get-ChildItem -Recurse -Path .
+
+Set-Location $workspace
+
+# Create and enter build directory
+if (-Not (Test-Path "build")) {
+ New-Item -Path "build" -ItemType Directory
+}
+Set-Location "build"
+
+# Build for Windows
+cmake ..
+cmake --build .
+
+# List directory contents recursively
+Get-ChildItem -Recurse -Path .
+
+# Display shared library dependencies
+Copy-Item -Path ".\Debug\fasttext.dll" -Destination "../../fasttext.dll"
+
+# List directory contents recursively
+Get-ChildItem -Recurse -Path .
+
+# Clean up
+cd ../..
+Write-Output "Goodbye world"
\ No newline at end of file
diff --git a/src/LanguageIdentification.FastText.Native/Scripts/run-build.sh b/src/LanguageIdentification.FastText.Native/Scripts/run-build.sh
index 6e001a81..e069b375 100644
--- a/src/LanguageIdentification.FastText.Native/Scripts/run-build.sh
+++ b/src/LanguageIdentification.FastText.Native/Scripts/run-build.sh
@@ -15,17 +15,17 @@ cd "$workspace"
mkdir build
cd build
+
+# Build for Linux
+rm -rf *
cmake ..
make -j $(nproc) # make
-cd ..
-echo $(pwd)
-ls -R build
-cd ..
+ls -R
-find "$workspace/build" -name "libfasttext.so" -exec cp {} libfasttext.so \;
-rm -rf "$workspace"
ldd libfasttext.so
+cp libfasttext.so ../../libfasttext.so
+# Clean up
+rm -rf "$workspace"
echo "Goodbye world";
-
diff --git a/src/LanguageIdentification.FastText/FastTextDetector.cs b/src/LanguageIdentification.FastText/FastTextDetector.cs
index abdce527..5c7f4181 100644
--- a/src/LanguageIdentification.FastText/FastTextDetector.cs
+++ b/src/LanguageIdentification.FastText/FastTextDetector.cs
@@ -18,7 +18,7 @@ public class FastTextDetector : IDisposable
public FastTextDetector()
{
- if (!RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
+ if (!IsSupported())
{
throw new NotSupportedException(
$"{nameof(FastTextDetector)} is not yet supported on {RuntimeInformation.RuntimeIdentifier}"
@@ -29,6 +29,18 @@ public FastTextDetector()
_semaphore = new SemaphoreSlim(1, 1);
}
+ public static bool IsSupported()
+ {
+ return RuntimeInformation.OSArchitecture switch
+ {
+ Architecture.X64 when RuntimeInformation.IsOSPlatform(OSPlatform.Linux) => true,
+ Architecture.X64 when RuntimeInformation.IsOSPlatform(OSPlatform.Windows) => true,
+ Architecture.X64 when RuntimeInformation.IsOSPlatform(OSPlatform.OSX) => true,
+ Architecture.Arm64 when RuntimeInformation.IsOSPlatform(OSPlatform.OSX) => true,
+ _ => false,
+ };
+ }
+
public string ModelPath { get; private set; } = string.Empty;
///
@@ -153,10 +165,11 @@ ref errPtr
var predictions = Marshal.PtrToStructure(predictionPtr);
var result = new List();
- for (ulong i = 0; i < predictions.Length; i++)
+ var structSize = Marshal.SizeOf();
+
+ for (var i = 0; i < (int)predictions.Length; i++)
{
- IntPtr elementPtr = new IntPtr(predictions.Predictions.ToInt64() + (long)(i * (uint)Marshal.SizeOf()));
- var prediction = Marshal.PtrToStructure(elementPtr);
+ var prediction = Marshal.PtrToStructure(predictions.Predictions + i * structSize);
var label = DecodeString(prediction.Label);
result.Add(new FastTextPrediction(
diff --git a/src/LanguageIdentification.FastText/Internal/FastTextDetectorWrapper.cs b/src/LanguageIdentification.FastText/Internal/FastTextDetectorWrapper.cs
index f8815d77..3f4756b1 100644
--- a/src/LanguageIdentification.FastText/Internal/FastTextDetectorWrapper.cs
+++ b/src/LanguageIdentification.FastText/Internal/FastTextDetectorWrapper.cs
@@ -14,16 +14,22 @@ internal static class FastTextDetectorWrapper
public static extern void DestroyFastText(IntPtr handle);
[DllImport(FastTextNativeLibrary.Name, CallingConvention = CallingConvention.Cdecl)]
- public static extern void FastTextLoadModel(IntPtr handle, string filename, ref IntPtr errptr);
+ public static extern void FastTextLoadModel(IntPtr handle, string filename, ref IntPtr errPtr);
[DllImport(FastTextNativeLibrary.Name, CallingConvention = CallingConvention.Cdecl)]
- public static extern void FastTextLoadModelData(IntPtr handle, IntPtr buffer, uint bufferLength, ref IntPtr errptr);
+ public static extern void FastTextLoadModelData(IntPtr handle, IntPtr buffer, uint bufferLength, ref IntPtr errPtr);
[DllImport(FastTextNativeLibrary.Name, CallingConvention = CallingConvention.Cdecl)]
public static extern int FastTextGetModelDimensions(IntPtr handle);
[DllImport(FastTextNativeLibrary.Name, CallingConvention = CallingConvention.Cdecl)]
- public static extern IntPtr FastTextPredict(IntPtr handle, string text, int k, float threshold, ref IntPtr errptr);
+ public static extern IntPtr FastTextPredict(
+ IntPtr handle,
+ [MarshalAs(UnmanagedType.LPUTF8Str)] string text,
+ int k,
+ float threshold,
+ ref IntPtr errPtr
+ );
[DllImport(FastTextNativeLibrary.Name, CallingConvention = CallingConvention.Cdecl)]
public static extern void DestroyPredictions(IntPtr predictions);
diff --git a/src/LanguageIdentification.FastText/LanguageIdentification.FastText.csproj b/src/LanguageIdentification.FastText/LanguageIdentification.FastText.csproj
index 089e5159..486577bb 100644
--- a/src/LanguageIdentification.FastText/LanguageIdentification.FastText.csproj
+++ b/src/LanguageIdentification.FastText/LanguageIdentification.FastText.csproj
@@ -2,7 +2,7 @@
net5.0;net6.0;net7.0;net8.0
- 0.0.0.21
+ 0.1.0
Panlingo.LanguageIdentification.FastText
Panlingo.LanguageIdentification.FastText
Panlingo.LanguageIdentification.FastText
@@ -15,6 +15,9 @@
nlp lid language-identification language-detection fasttext
README_FASTTEXT.md
+0.1.0.0
+- Windows and MacOS support
+
0.0.0.21:
- Default FastText model is included in NuGet package
diff --git a/src/LanguageIdentification.Lingua.Native/Dockerfile b/src/LanguageIdentification.Lingua.Native/Dockerfile
index f60462e0..b0bbaabc 100644
--- a/src/LanguageIdentification.Lingua.Native/Dockerfile
+++ b/src/LanguageIdentification.Lingua.Native/Dockerfile
@@ -1,5 +1,4 @@
FROM ubuntu:22.04 AS build
-ARG BUILD_CONFIGURATION=Release
WORKDIR /repo
COPY ["src/LanguageIdentification.Lingua.Native/LanguageIdentification.Lingua.Native.csproj", "src/LanguageIdentification.Lingua.Native/"]
diff --git a/src/LanguageIdentification.Lingua.Native/LanguageIdentification.Lingua.Native.csproj b/src/LanguageIdentification.Lingua.Native/LanguageIdentification.Lingua.Native.csproj
index 317961a2..a2fa47e8 100644
--- a/src/LanguageIdentification.Lingua.Native/LanguageIdentification.Lingua.Native.csproj
+++ b/src/LanguageIdentification.Lingua.Native/LanguageIdentification.Lingua.Native.csproj
@@ -2,7 +2,7 @@
netstandard2.1
- 0.0.0.21
+ 0.0.0.23
Panlingo.LanguageIdentification.Lingua.Native
Panlingo.LanguageIdentification.Lingua.Native
Panlingo.LanguageIdentification.Lingua.Native
diff --git a/src/LanguageIdentification.Lingua.Native/LinguaNativeLibrary.cs b/src/LanguageIdentification.Lingua.Native/LinguaNativeLibrary.cs
index 4019366a..7c6326bb 100644
--- a/src/LanguageIdentification.Lingua.Native/LinguaNativeLibrary.cs
+++ b/src/LanguageIdentification.Lingua.Native/LinguaNativeLibrary.cs
@@ -5,6 +5,6 @@ public static class LinguaNativeLibrary
///
/// Name of native binary
///
- public const string Name = "liblingua.so";
+ public const string Name = "lingua";
}
}
diff --git a/src/LanguageIdentification.Lingua/LanguageIdentification.Lingua.csproj b/src/LanguageIdentification.Lingua/LanguageIdentification.Lingua.csproj
index 16c4a33b..60341491 100644
--- a/src/LanguageIdentification.Lingua/LanguageIdentification.Lingua.csproj
+++ b/src/LanguageIdentification.Lingua/LanguageIdentification.Lingua.csproj
@@ -2,7 +2,7 @@
net5.0;net6.0;net7.0;net8.0
- 0.0.0.21
+ 0.0.0.23
Panlingo.LanguageIdentification.Lingua
Panlingo.LanguageIdentification.Lingua
Panlingo.LanguageIdentification.Lingua
diff --git a/src/LanguageIdentification.Lingua/LinguaDetector.cs b/src/LanguageIdentification.Lingua/LinguaDetector.cs
index 9e39e1f5..3a1808e4 100644
--- a/src/LanguageIdentification.Lingua/LinguaDetector.cs
+++ b/src/LanguageIdentification.Lingua/LinguaDetector.cs
@@ -16,7 +16,7 @@ public class LinguaDetector : IDisposable
internal LinguaDetector(LinguaDetectorBuilder builder)
{
- if (!RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
+ if (!IsSupported())
{
throw new NotSupportedException(
$"{nameof(LinguaDetector)} is not yet supported on {RuntimeInformation.RuntimeIdentifier}"
@@ -30,6 +30,15 @@ internal LinguaDetector(LinguaDetectorBuilder builder)
}
}
+ public static bool IsSupported()
+ {
+ return RuntimeInformation.OSArchitecture switch
+ {
+ Architecture.X64 when RuntimeInformation.IsOSPlatform(OSPlatform.Linux) => true,
+ _ => false,
+ };
+ }
+
///
/// Produces a prediction for 'text'
///
diff --git a/src/LanguageIdentification.Lingua/LinguaDetectorBuilder.cs b/src/LanguageIdentification.Lingua/LinguaDetectorBuilder.cs
index 2e1624ce..70f1221f 100644
--- a/src/LanguageIdentification.Lingua/LinguaDetectorBuilder.cs
+++ b/src/LanguageIdentification.Lingua/LinguaDetectorBuilder.cs
@@ -14,7 +14,7 @@ public class LinguaDetectorBuilder : IDisposable
public LinguaDetectorBuilder(LinguaLanguage[] languages)
{
- if (!RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
+ if (!LinguaDetector.IsSupported())
{
throw new NotSupportedException(
$"{nameof(LinguaDetector)} is not yet supported on {RuntimeInformation.RuntimeIdentifier}"
diff --git a/src/LanguageIdentification.MediaPipe.Native/Dockerfile b/src/LanguageIdentification.MediaPipe.Native/Dockerfile
index 078d583f..5fa55755 100644
--- a/src/LanguageIdentification.MediaPipe.Native/Dockerfile
+++ b/src/LanguageIdentification.MediaPipe.Native/Dockerfile
@@ -1,5 +1,4 @@
FROM ubuntu:22.04 AS build
-ARG BUILD_CONFIGURATION=Release
WORKDIR /repo
COPY ["src/LanguageIdentification.MediaPipe.Native/LanguageIdentification.MediaPipe.Native.csproj", "src/LanguageIdentification.MediaPipe.Native/"]
diff --git a/src/LanguageIdentification.MediaPipe.Native/LanguageIdentification.MediaPipe.Native.csproj b/src/LanguageIdentification.MediaPipe.Native/LanguageIdentification.MediaPipe.Native.csproj
index 7cee9456..1c747b5b 100644
--- a/src/LanguageIdentification.MediaPipe.Native/LanguageIdentification.MediaPipe.Native.csproj
+++ b/src/LanguageIdentification.MediaPipe.Native/LanguageIdentification.MediaPipe.Native.csproj
@@ -2,7 +2,7 @@
netstandard2.1
- 0.0.0.21
+ 0.0.0.23
Panlingo.LanguageIdentification.MediaPipe.Native
Panlingo.LanguageIdentification.MediaPipe.Native
Panlingo.LanguageIdentification.MediaPipe.Native
diff --git a/src/LanguageIdentification.MediaPipe.Native/MediaPipeNativeLibrary.cs b/src/LanguageIdentification.MediaPipe.Native/MediaPipeNativeLibrary.cs
index 44f627e6..74c0cf9a 100644
--- a/src/LanguageIdentification.MediaPipe.Native/MediaPipeNativeLibrary.cs
+++ b/src/LanguageIdentification.MediaPipe.Native/MediaPipeNativeLibrary.cs
@@ -5,7 +5,7 @@ public class MediaPipeNativeLibrary
///
/// Name of native binary
///
- public const string Name = "liblanguage_detector.so";
+ public const string Name = "language_detector";
///
/// Name of model
///
diff --git a/src/LanguageIdentification.MediaPipe/LanguageIdentification.MediaPipe.csproj b/src/LanguageIdentification.MediaPipe/LanguageIdentification.MediaPipe.csproj
index e296f9e9..f4d48028 100644
--- a/src/LanguageIdentification.MediaPipe/LanguageIdentification.MediaPipe.csproj
+++ b/src/LanguageIdentification.MediaPipe/LanguageIdentification.MediaPipe.csproj
@@ -2,7 +2,7 @@
net5.0;net6.0;net7.0;net8.0
- 0.0.0.21
+ 0.0.0.23
Panlingo.LanguageIdentification.MediaPipe
Panlingo.LanguageIdentification.MediaPipe
Panlingo.LanguageIdentification.MediaPipe
diff --git a/src/LanguageIdentification.MediaPipe/MediaPipeDetector.cs b/src/LanguageIdentification.MediaPipe/MediaPipeDetector.cs
index df87aa36..7a1a8610 100644
--- a/src/LanguageIdentification.MediaPipe/MediaPipeDetector.cs
+++ b/src/LanguageIdentification.MediaPipe/MediaPipeDetector.cs
@@ -25,7 +25,7 @@ public MediaPipeDetector(int resultCount = -1, float scoreThreshold = 0.0f, stri
public MediaPipeDetector(MediaPipeOptions options)
{
- if (!RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
+ if (!IsSupported())
{
throw new NotSupportedException(
$"{nameof(MediaPipeDetector)} is not yet supported on {RuntimeInformation.RuntimeIdentifier}"
@@ -91,6 +91,15 @@ public MediaPipeDetector(MediaPipeOptions options)
_semaphore = new SemaphoreSlim(1, 1);
}
+ public static bool IsSupported()
+ {
+ return RuntimeInformation.OSArchitecture switch
+ {
+ Architecture.X64 when RuntimeInformation.IsOSPlatform(OSPlatform.Linux) => true,
+ _ => false,
+ };
+ }
+
public IEnumerable PredictLanguages(string text)
{
var nativeResult = new LanguageDetectorResult();
diff --git a/src/LanguageIdentification.Tests/CLD2Tests.cs b/src/LanguageIdentification.Tests/CLD2Tests.cs
index 870bee44..220b30ab 100644
--- a/src/LanguageIdentification.Tests/CLD2Tests.cs
+++ b/src/LanguageIdentification.Tests/CLD2Tests.cs
@@ -5,12 +5,14 @@ namespace Panlingo.LanguageIdentification.Tests;
public class CLD2Tests
{
- [Theory]
+ [SkippableTheory]
[InlineData("en", Constants.PHRASE_ENG_1, 0.9999)]
[InlineData("uk", Constants.PHRASE_UKR_1, 0.9999)]
- [InlineData("un", Constants.PHRASE_RUS_1, 0.9999)]
+ [InlineData("un", Constants.PHRASE_RUS_1, 0)]
public void CLD2SingleLanguage(string languageCode, string text, double score)
{
+ Skip.IfNot(CLD2Detector.IsSupported());
+
using var cld2 = new CLD2Detector();
var predictions = cld2.PredictLanguage(text);
diff --git a/src/LanguageIdentification.Tests/CLD3Tests.cs b/src/LanguageIdentification.Tests/CLD3Tests.cs
index 10c0e662..7f7769c0 100644
--- a/src/LanguageIdentification.Tests/CLD3Tests.cs
+++ b/src/LanguageIdentification.Tests/CLD3Tests.cs
@@ -5,24 +5,46 @@ namespace Panlingo.LanguageIdentification.Tests;
public class CLD3Tests
{
- [Theory]
+ [SkippableTheory]
[InlineData("en", Constants.PHRASE_ENG_1, 0.9985)]
[InlineData("uk", Constants.PHRASE_UKR_1, 0.9992)]
[InlineData("ru", Constants.PHRASE_RUS_1, 0.9770)]
public void CLD3SingleLanguage(string languageCode, string text, double score)
{
+ Skip.IfNot(CLD3Detector.IsSupported());
+
using var cld3 = new CLD3Detector(0, 512);
var prediction = cld3.PredictLanguage(text: text);
var predictions = cld3.PredictLanguages(text: text, count: 3);
- var mainLanguage = predictions.FirstOrDefault();
- if (prediction is null || mainLanguage is null)
+ if (prediction is null)
{
throw new NullReferenceException();
}
Assert.Equal(languageCode, prediction.Language);
+ Assert.Equal(score, prediction.Probability, Constants.EPSILON);
+ }
+
+ [SkippableTheory]
+ [InlineData("en", Constants.PHRASE_ENG_1, 0.9985)]
+ [InlineData("uk", Constants.PHRASE_UKR_1, 0.9992)]
+ [InlineData("ru", Constants.PHRASE_RUS_1, 0.9770)]
+ public void CLD3MixedLanguage(string languageCode, string text, double score)
+ {
+ Skip.IfNot(CLD3Detector.IsSupported());
+
+ using var cld3 = new CLD3Detector(0, 512);
+
+ var predictions = cld3.PredictLanguages(text: text, count: 3);
+ var mainLanguage = predictions.FirstOrDefault();
+
+ if (mainLanguage is null)
+ {
+ throw new NullReferenceException();
+ }
+
Assert.Equal(languageCode, mainLanguage.Language);
Assert.Equal(score, mainLanguage.Probability, Constants.EPSILON);
}
diff --git a/src/LanguageIdentification.Tests/FastTextTests.cs b/src/LanguageIdentification.Tests/FastTextTests.cs
index f7862221..1d6a6f78 100644
--- a/src/LanguageIdentification.Tests/FastTextTests.cs
+++ b/src/LanguageIdentification.Tests/FastTextTests.cs
@@ -1,20 +1,24 @@
-using Panlingo.LanguageIdentification.FastText;
+using System.Runtime.InteropServices;
+using Panlingo.LanguageIdentification.FastText;
using Panlingo.LanguageIdentification.Tests.Helpers;
namespace Panlingo.LanguageIdentification.Tests;
-public class FastTextTests
+public class FastTextTests : IAsyncLifetime
{
- [Theory]
+ private readonly string _modelPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "models/fasttext176.bin");
+
+ [SkippableTheory]
[InlineData("__label__en", Constants.PHRASE_ENG_1, 0.9955)]
[InlineData("__label__uk", Constants.PHRASE_UKR_1, 0.9900)]
[InlineData("__label__ru", Constants.PHRASE_RUS_1, 0.9983)]
public void FastTextFileSingleLanguage(string languageCode, string text, double score)
{
+ Skip.IfNot(FastTextDetector.IsSupported());
+
using var fastText = new FastTextDetector();
- var modelPath = "/models/fasttext176.bin";
- fastText.LoadModel(modelPath);
+ fastText.LoadModel(_modelPath);
var predictions = fastText.Predict(text: text, count: 10);
var mainLanguage = predictions.FirstOrDefault();
@@ -28,16 +32,17 @@ public void FastTextFileSingleLanguage(string languageCode, string text, double
Assert.Equal(score, mainLanguage.Probability, Constants.EPSILON);
}
- [Theory]
+ [SkippableTheory]
[InlineData("__label__en", Constants.PHRASE_ENG_1, 0.9955)]
[InlineData("__label__uk", Constants.PHRASE_UKR_1, 0.9900)]
[InlineData("__label__ru", Constants.PHRASE_RUS_1, 0.9983)]
public void FastTextStreamSingleLanguage(string languageCode, string text, double score)
{
+ Skip.IfNot(FastTextDetector.IsSupported());
+
using var fastText = new FastTextDetector();
- var modelPath = "/models/fasttext176.bin";
- using var stream = File.Open(modelPath, FileMode.Open);
+ using var stream = File.Open(_modelPath, FileMode.Open);
fastText.LoadModel(stream);
@@ -53,12 +58,14 @@ public void FastTextStreamSingleLanguage(string languageCode, string text, doubl
Assert.Equal(score, mainLanguage.Probability, Constants.EPSILON);
}
- [Theory]
+ [SkippableTheory]
[InlineData("__label__en", Constants.PHRASE_ENG_1, 1.0000)]
[InlineData("__label__uk", Constants.PHRASE_UKR_1, 0.8511)]
[InlineData("__label__ru", Constants.PHRASE_RUS_1, 0.9693)]
public void FastTextContainedSingleLanguage(string languageCode, string text, double score)
{
+ Skip.IfNot(FastTextDetector.IsSupported());
+
using var fastText = new FastTextDetector();
fastText.LoadDefaultModel();
@@ -74,13 +81,14 @@ public void FastTextContainedSingleLanguage(string languageCode, string text, do
Assert.Equal(score, mainLanguage.Probability, Constants.EPSILON);
}
- [Fact]
+ [SkippableFact]
public void FastTextLabels()
{
+ Skip.IfNot(FastTextDetector.IsSupported());
+
using var fastText = new FastTextDetector();
- var modelPath = "/models/fasttext176.bin";
- fastText.LoadModel(modelPath);
+ fastText.LoadModel(_modelPath);
var labels = fastText.GetLabels();
@@ -88,4 +96,23 @@ public void FastTextLabels()
Assert.Contains(labels, x => x.Label == "__label__uk");
Assert.Contains(labels, x => x.Label == "__label__ru");
}
+
+ public async Task InitializeAsync()
+ {
+ var url = "https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin";
+ await FileHelper.DownloadAsync(
+ path: _modelPath,
+ url: url
+ );
+ }
+
+ public async Task DisposeAsync()
+ {
+ if (File.Exists(_modelPath))
+ {
+ File.Delete(_modelPath);
+ }
+
+ await Task.CompletedTask;
+ }
}
diff --git a/src/LanguageIdentification.Tests/Helpers/FileHelper.cs b/src/LanguageIdentification.Tests/Helpers/FileHelper.cs
new file mode 100644
index 00000000..5c20991c
--- /dev/null
+++ b/src/LanguageIdentification.Tests/Helpers/FileHelper.cs
@@ -0,0 +1,24 @@
+namespace Panlingo.LanguageIdentification.Tests.Helpers;
+
+public class FileHelper
+{
+ public static async Task DownloadAsync(string path, string url)
+ {
+ if (File.Exists(path))
+ {
+ return;
+ }
+
+ using var client = new HttpClient();
+ using var stream = await client.GetStreamAsync(url);
+
+ var directory = Path.GetDirectoryName(path) ?? throw new Exception("No directory");
+ if (!Directory.Exists(directory))
+ {
+ Directory.CreateDirectory(directory);
+ }
+
+ using var file = new FileStream(path, FileMode.OpenOrCreate);
+ await stream.CopyToAsync(file);
+ }
+}
diff --git a/src/LanguageIdentification.Tests/LanguageIdentification.Tests.csproj b/src/LanguageIdentification.Tests/LanguageIdentification.Tests.csproj
index 501548f8..8ce25e43 100644
--- a/src/LanguageIdentification.Tests/LanguageIdentification.Tests.csproj
+++ b/src/LanguageIdentification.Tests/LanguageIdentification.Tests.csproj
@@ -14,10 +14,18 @@
-
-
-
-
+
+ all
+ runtime; build; native; contentfiles; analyzers; buildtransitive
+
+
+
+
+
+ all
+ runtime; build; native; contentfiles; analyzers; buildtransitive
+
+
diff --git a/src/LanguageIdentification.Tests/LinguaTests.cs b/src/LanguageIdentification.Tests/LinguaTests.cs
index 6544a915..8c142e1e 100644
--- a/src/LanguageIdentification.Tests/LinguaTests.cs
+++ b/src/LanguageIdentification.Tests/LinguaTests.cs
@@ -5,12 +5,14 @@ namespace Panlingo.LanguageIdentification.Tests;
public class LinguaTests
{
- [Theory]
+ [SkippableTheory]
[InlineData(LinguaLanguage.English, Constants.PHRASE_ENG_1, 0.1666)]
[InlineData(LinguaLanguage.Ukrainian, Constants.PHRASE_UKR_1, 0.8228)]
[InlineData(LinguaLanguage.Russian, Constants.PHRASE_RUS_1, 0.3502)]
public void LinguaSingleLanguage(LinguaLanguage languageCode, string text, double score)
{
+ Skip.IfNot(LinguaDetector.IsSupported());
+
using var linguaBuilder = new LinguaDetectorBuilder(Enum.GetValues());
using var lingua = linguaBuilder.Build();
@@ -26,12 +28,14 @@ public void LinguaSingleLanguage(LinguaLanguage languageCode, string text, doubl
Assert.Equal(score, mainLanguage.Confidence, Constants.EPSILON);
}
- [Theory]
+ [SkippableTheory]
[InlineData(LinguaLanguage.English, Constants.PHRASE_ENG_1, 0.1666)]
[InlineData(LinguaLanguage.Ukrainian, Constants.PHRASE_UKR_1, 0.8228)]
[InlineData(LinguaLanguage.Russian, Constants.PHRASE_RUS_1, 0.3502)]
public void LinguaMixedLanguage(LinguaLanguage languageCode, string text, double score)
{
+ Skip.IfNot(LinguaDetector.IsSupported());
+
using var linguaBuilder = new LinguaDetectorBuilder(Enum.GetValues());
using var lingua = linguaBuilder.Build();
@@ -47,7 +51,7 @@ public void LinguaMixedLanguage(LinguaLanguage languageCode, string text, double
Assert.Equal(score, mainLanguage.Confidence, Constants.EPSILON);
}
- [Theory]
+ [SkippableTheory]
[InlineData(LinguaLanguage.Ukrainian, LinguaLanguageCode.Alpha2, "uk")]
[InlineData(LinguaLanguage.Ukrainian, LinguaLanguageCode.Alpha3, "ukr")]
[InlineData(LinguaLanguage.Hebrew, LinguaLanguageCode.Alpha2, "he")]
@@ -56,6 +60,8 @@ public void LinguaMixedLanguage(LinguaLanguage languageCode, string text, double
[InlineData(LinguaLanguage.Serbian, LinguaLanguageCode.Alpha3, "srp")]
public void LinguaGetLanguageCode(LinguaLanguage language, LinguaLanguageCode type, string code)
{
+ Skip.IfNot(LinguaDetector.IsSupported());
+
using var linguaBuilder = new LinguaDetectorBuilder(Enum.GetValues());
using var lingua = linguaBuilder.Build();
@@ -63,9 +69,11 @@ public void LinguaGetLanguageCode(LinguaLanguage language, LinguaLanguageCode ty
Assert.Equal(code, languageCode);
}
- [Fact]
+ [SkippableFact]
public void LinguaBuilderReuse()
{
+ Skip.IfNot(LinguaDetector.IsSupported());
+
using var linguaBuilder = new LinguaDetectorBuilder(Enum.GetValues());
using var lingua1 = linguaBuilder.Build();
using var lingua2 = linguaBuilder.Build();
diff --git a/src/LanguageIdentification.Tests/MainTests.cs b/src/LanguageIdentification.Tests/MainTests.cs
index 12eb4f88..ea1cdb08 100644
--- a/src/LanguageIdentification.Tests/MainTests.cs
+++ b/src/LanguageIdentification.Tests/MainTests.cs
@@ -1,14 +1,105 @@
-namespace Panlingo.LanguageIdentification.Tests;
+using Microsoft.Build.Construction;
+
+namespace Panlingo.LanguageIdentification.Tests;
public class MainTests
{
- ///
- /// Checks the current OS and container environment
- ///
[Fact]
- public void CheckPlatform()
+ public void CheckPackageVersion()
{
- Assert.Equal(PlatformID.Unix, Environment.OSVersion.Platform);
- Assert.Equal("true", Environment.GetEnvironmentVariable("DOTNET_RUNNING_IN_CONTAINER"));
+ Type[] types = [
+ typeof(Panlingo.LanguageIdentification.CLD2.CLD2Detector),
+ typeof(Panlingo.LanguageIdentification.CLD3.CLD3Detector),
+ typeof(Panlingo.LanguageIdentification.FastText.FastTextDetector),
+ typeof(Panlingo.LanguageIdentification.Lingua.LinguaDetector),
+ typeof(Panlingo.LanguageIdentification.MediaPipe.MediaPipeDetector),
+ typeof(Panlingo.LanguageIdentification.Whatlang.WhatlangDetector),
+ typeof(Panlingo.LanguageIdentification.CLD2.Native.CLD2NativeLibrary),
+ typeof(Panlingo.LanguageIdentification.CLD3.Native.CLD3NativeLibrary),
+ typeof(Panlingo.LanguageIdentification.FastText.Native.FastTextNativeLibrary),
+ typeof(Panlingo.LanguageIdentification.Lingua.Native.LinguaNativeLibrary),
+ typeof(Panlingo.LanguageIdentification.MediaPipe.Native.MediaPipeNativeLibrary),
+ typeof(Panlingo.LanguageIdentification.Whatlang.Native.WhatlangNativeLibrary),
+ ];
+
+ var root = AppDomain.CurrentDomain.BaseDirectory;
+
+ var src = root;
+ while (src != "/")
+ {
+ if (Path.GetFileName(src) == "src")
+ {
+ break;
+ }
+
+ src = Path.GetDirectoryName(src) ?? "/";
+ }
+
+ var projectFiles = Directory.GetFiles(src, "*.csproj", SearchOption.AllDirectories);
+
+ var packageProjects = new Dictionary();
+
+ foreach (var projectFile in projectFiles)
+ {
+ var projectRootElement = ProjectRootElement.Open(projectFile);
+ var assemblyName = projectRootElement.Properties.FirstOrDefault(x => x.Name == "AssemblyName");
+ var version = projectRootElement.Properties.FirstOrDefault(x => x.Name == "Version");
+
+ if (assemblyName is null || version is null)
+ {
+ continue;
+ }
+
+ if (string.IsNullOrEmpty(assemblyName.Value) || string.IsNullOrEmpty(version.Value))
+ {
+ continue;
+ }
+
+ packageProjects[assemblyName.Value] = version.Value;
+ }
+
+ if (packageProjects.Count == 0)
+ {
+ throw new Exception("Projects are not found");
+ }
+
+ var assemblies = AppDomain.CurrentDomain.GetAssemblies()
+ .Where(x =>
+ {
+ foreach (var type in x.GetTypes())
+ {
+ if (types.Contains(type))
+ {
+ return true;
+ }
+ }
+
+ return false;
+ })
+ .ToArray();
+
+ var assemblyNames = new List();
+ var packageNames = new List();
+
+ foreach (var assembly in assemblies)
+ {
+ var assemblyName = assembly.GetName();
+
+ assemblyNames.Add($"{assemblyName.Name} {assemblyName.Version}");
+ if (assemblyName.Name != null && packageProjects.TryGetValue(assemblyName.Name, out var packageVersion))
+ {
+ var a = Version.Parse(packageVersion);
+ var b = new Version(
+ major: a.Major != -1 ? a.Major : 0,
+ minor: a.Minor != -1 ? a.Minor : 0,
+ build: a.Build != -1 ? a.Build : 0,
+ revision: a.Revision != -1 ? a.Revision : 0
+ );
+
+ packageNames.Add($"{assemblyName.Name} {b}");
+ }
+ }
+
+ Assert.Equal(packageNames, assemblyNames);
}
}
diff --git a/src/LanguageIdentification.Tests/MediaPipeTests.cs b/src/LanguageIdentification.Tests/MediaPipeTests.cs
index c9f698d5..0aaf4a1d 100644
--- a/src/LanguageIdentification.Tests/MediaPipeTests.cs
+++ b/src/LanguageIdentification.Tests/MediaPipeTests.cs
@@ -1,20 +1,22 @@
-using System.IO;
-using Panlingo.LanguageIdentification.MediaPipe;
+using Panlingo.LanguageIdentification.MediaPipe;
using Panlingo.LanguageIdentification.Tests.Helpers;
namespace Panlingo.LanguageIdentification.Tests;
-public class MediaPipeTests
+public class MediaPipeTests : IAsyncLifetime
{
- [Theory]
+ private readonly string _modelPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "models/mediapipe_language_detector.tflite");
+
+ [SkippableTheory]
[InlineData("en", Constants.PHRASE_ENG_1, 0.9994)]
[InlineData("uk", Constants.PHRASE_UKR_1, 0.9999)]
[InlineData("ru", Constants.PHRASE_RUS_1, 0.9999)]
public void MediaPipeFileSingleLanguage(string languageCode, string text, double score)
{
- var modelPath = "/models/mediapipe_language_detector.tflite";
+ Skip.IfNot(MediaPipeDetector.IsSupported());
+
using var mediaPipe = new MediaPipeDetector(
- options: MediaPipeOptions.FromFile(modelPath).WithResultCount(10)
+ options: MediaPipeOptions.FromFile(_modelPath).WithResultCount(10)
);
var predictions = mediaPipe.PredictLanguages(text: text);
@@ -28,15 +30,16 @@ public void MediaPipeFileSingleLanguage(string languageCode, string text, double
Assert.Equal(languageCode, mainLanguage.Language);
Assert.Equal(score, mainLanguage.Probability, Constants.EPSILON);
}
-
- [Theory]
+
+ [SkippableTheory]
[InlineData("en", Constants.PHRASE_ENG_1, 0.9994)]
[InlineData("uk", Constants.PHRASE_UKR_1, 0.9999)]
[InlineData("ru", Constants.PHRASE_RUS_1, 0.9999)]
public void MediaPipeStreamSingleLanguage(string languageCode, string text, double score)
{
- var modelPath = "/models/mediapipe_language_detector.tflite";
- using var stream = File.Open(modelPath, FileMode.Open);
+ Skip.IfNot(MediaPipeDetector.IsSupported());
+
+ using var stream = File.Open(_modelPath, FileMode.Open);
using var mediaPipe = new MediaPipeDetector(
options: MediaPipeOptions.FromStream(stream).WithResultCount(10)
@@ -54,14 +57,15 @@ public void MediaPipeStreamSingleLanguage(string languageCode, string text, doub
Assert.Equal(score, mainLanguage.Probability, Constants.EPSILON);
}
- [Theory]
+ [SkippableTheory]
[InlineData("en", Constants.PHRASE_ENG_1, 0.9994)]
[InlineData("uk", Constants.PHRASE_UKR_1, 0.9999)]
[InlineData("ru", Constants.PHRASE_RUS_1, 0.9999)]
public void MediaPipeContainedSingleLanguage(string languageCode, string text, double score)
{
- var modelPath = "/models/mediapipe_language_detector.tflite";
- using var stream = File.Open(modelPath, FileMode.Open);
+ Skip.IfNot(MediaPipeDetector.IsSupported());
+
+ using var stream = File.Open(_modelPath, FileMode.Open);
using var mediaPipe = new MediaPipeDetector(
options: MediaPipeOptions.FromDefault().WithResultCount(10)
@@ -78,4 +82,23 @@ public void MediaPipeContainedSingleLanguage(string languageCode, string text, d
Assert.Equal(languageCode, mainLanguage.Language);
Assert.Equal(score, mainLanguage.Probability, Constants.EPSILON);
}
+
+ public async Task InitializeAsync()
+ {
+ var url = "https://storage.googleapis.com/mediapipe-models/language_detector/language_detector/float32/1/language_detector.tflite";
+ await FileHelper.DownloadAsync(
+ path: _modelPath,
+ url: url
+ );
+ }
+
+ public async Task DisposeAsync()
+ {
+ if (File.Exists(_modelPath))
+ {
+ File.Delete(_modelPath);
+ }
+
+ await Task.CompletedTask;
+ }
}
diff --git a/src/LanguageIdentification.Tests/WhatlangTests.cs b/src/LanguageIdentification.Tests/WhatlangTests.cs
index 5ea352d6..74207c2b 100644
--- a/src/LanguageIdentification.Tests/WhatlangTests.cs
+++ b/src/LanguageIdentification.Tests/WhatlangTests.cs
@@ -5,12 +5,14 @@ namespace Panlingo.LanguageIdentification.Tests;
public class WhatlangTests
{
- [Theory]
+ [SkippableTheory]
[InlineData(WhatlangLanguage.Ron, Constants.PHRASE_ENG_1, 0.0274)]
[InlineData(WhatlangLanguage.Ukr, Constants.PHRASE_UKR_1, 0.9999)]
[InlineData(WhatlangLanguage.Rus, Constants.PHRASE_RUS_1, 0.2308)]
public void WhatlangSingleLanguage(WhatlangLanguage languageCode, string text, double score)
{
+ Skip.IfNot(WhatlangDetector.IsSupported());
+
using var whatlang = new WhatlangDetector();
var prediction = whatlang.PredictLanguage(text: text);
@@ -24,13 +26,15 @@ public void WhatlangSingleLanguage(WhatlangLanguage languageCode, string text, d
Assert.Equal(score, prediction.Confidence, Constants.EPSILON);
}
- [Theory]
+ [SkippableTheory]
[InlineData(WhatlangLanguage.Ukr, "ukr")]
[InlineData(WhatlangLanguage.Uzb, "uzb")]
[InlineData(WhatlangLanguage.Heb, "heb")]
[InlineData(WhatlangLanguage.Srp, "srp")]
public void WhatlangGetLanguageCode(WhatlangLanguage language, string code)
{
+ Skip.IfNot(WhatlangDetector.IsSupported());
+
using var whatlang = new WhatlangDetector();
var languageCode = whatlang.GetLanguageCode(language);
diff --git a/src/LanguageIdentification.Whatlang.Native/Dockerfile b/src/LanguageIdentification.Whatlang.Native/Dockerfile
index 9c1f75c8..80c722c2 100644
--- a/src/LanguageIdentification.Whatlang.Native/Dockerfile
+++ b/src/LanguageIdentification.Whatlang.Native/Dockerfile
@@ -1,5 +1,4 @@
-FROM mcr.microsoft.com/dotnet/sdk:8.0 AS build
-ARG BUILD_CONFIGURATION=Release
+FROM ubuntu:22.04 AS build
WORKDIR /repo
COPY ["src/LanguageIdentification.Whatlang.Native/LanguageIdentification.Whatlang.Native.csproj", "src/LanguageIdentification.Whatlang.Native/"]
diff --git a/src/LanguageIdentification.Whatlang.Native/LanguageIdentification.Whatlang.Native.csproj b/src/LanguageIdentification.Whatlang.Native/LanguageIdentification.Whatlang.Native.csproj
index 95a71f99..ab0966ce 100644
--- a/src/LanguageIdentification.Whatlang.Native/LanguageIdentification.Whatlang.Native.csproj
+++ b/src/LanguageIdentification.Whatlang.Native/LanguageIdentification.Whatlang.Native.csproj
@@ -2,7 +2,7 @@
netstandard2.1
- 0.0.0.21
+ 0.0.0.23
Panlingo.LanguageIdentification.Whatlang.Native
Panlingo.LanguageIdentification.Whatlang.Native
Panlingo.LanguageIdentification.Whatlang.Native
diff --git a/src/LanguageIdentification.Whatlang.Native/WhatlangNativeLibrary.cs b/src/LanguageIdentification.Whatlang.Native/WhatlangNativeLibrary.cs
index 2ec7e231..836f41f6 100644
--- a/src/LanguageIdentification.Whatlang.Native/WhatlangNativeLibrary.cs
+++ b/src/LanguageIdentification.Whatlang.Native/WhatlangNativeLibrary.cs
@@ -5,6 +5,6 @@ public static class WhatlangNativeLibrary
///
/// Name of native binary
///
- public const string Name = "libwhatlang.so";
+ public const string Name = "whatlang";
}
}
diff --git a/src/LanguageIdentification.Whatlang/LanguageIdentification.Whatlang.csproj b/src/LanguageIdentification.Whatlang/LanguageIdentification.Whatlang.csproj
index 7f6fef1a..13fc8084 100644
--- a/src/LanguageIdentification.Whatlang/LanguageIdentification.Whatlang.csproj
+++ b/src/LanguageIdentification.Whatlang/LanguageIdentification.Whatlang.csproj
@@ -2,7 +2,7 @@
net5.0;net6.0;net7.0;net8.0
- 0.0.0.21
+ 0.0.0.23
Panlingo.LanguageIdentification.Whatlang
Panlingo.LanguageIdentification.Whatlang
Panlingo.LanguageIdentification.Whatlang
diff --git a/src/LanguageIdentification.Whatlang/WhatlangDetector.cs b/src/LanguageIdentification.Whatlang/WhatlangDetector.cs
index fd3076eb..00b9c8b0 100644
--- a/src/LanguageIdentification.Whatlang/WhatlangDetector.cs
+++ b/src/LanguageIdentification.Whatlang/WhatlangDetector.cs
@@ -12,7 +12,7 @@ public class WhatlangDetector : IDisposable
{
public WhatlangDetector()
{
- if (!RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
+ if (!IsSupported())
{
throw new NotSupportedException(
$"{nameof(WhatlangDetector)} is not yet supported on {RuntimeInformation.RuntimeIdentifier}"
@@ -20,6 +20,15 @@ public WhatlangDetector()
}
}
+ public static bool IsSupported()
+ {
+ return RuntimeInformation.OSArchitecture switch
+ {
+ Architecture.X64 when RuntimeInformation.IsOSPlatform(OSPlatform.Linux) => true,
+ _ => false,
+ };
+ }
+
///
/// Produces a prediction for 'text'
///
diff --git a/src/test-ci.Dockerfile b/src/test-ci.Dockerfile
index 27d7a2ab..bc46a1d6 100644
--- a/src/test-ci.Dockerfile
+++ b/src/test-ci.Dockerfile
@@ -3,7 +3,7 @@
WORKDIR /src
COPY . .
-RUN dotnet nuget add source /src/local-nugets
+RUN dotnet nuget add source /src/local-packages
RUN ls -R
diff --git a/src/test.Dockerfile b/src/test.Dockerfile
index 8fafba69..9e6935da 100644
--- a/src/test.Dockerfile
+++ b/src/test.Dockerfile
@@ -3,18 +3,3 @@
RUN wget https://aka.ms/getvsdbgsh && \
sh getvsdbgsh -v latest -l /vsdbg
-### FastText
-RUN apt -y update
-RUN apt -y install curl
-RUN mkdir /models -p
-RUN curl --location -o /models/fasttext176.bin https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin
-# RUN curl --location -o /models/fasttext217.bin https://huggingface.co/facebook/fasttext-language-identification/resolve/main/model.bin?download=true
-###
-
-### MediaPipe
-RUN apt -y update
-RUN apt -y install curl
-RUN curl --location -o /models/mediapipe_language_detector.tflite https://storage.googleapis.com/mediapipe-models/language_detector/language_detector/float32/1/language_detector.tflite
-###
-
-