diff --git a/.github/workflows/github-ci.yml b/.github/workflows/github-ci.yml index 374a4945..c8624c30 100644 --- a/.github/workflows/github-ci.yml +++ b/.github/workflows/github-ci.yml @@ -8,20 +8,20 @@ on: workflow_dispatch: jobs: - build-nugets: - name: 🚀 Pack ${{matrix.projectName}} - runs-on: ubuntu-latest + build-native: + name: 💾 Build ${{matrix.task.title}} for ${{matrix.task.os}} (${{matrix.task.arch}}) + runs-on: ${{matrix.task.os}} strategy: fail-fast: false matrix: - projectName: - - LanguageIdentification.CLD2 - - LanguageIdentification.CLD3 - - LanguageIdentification.FastText - - LanguageIdentification.Whatlang - - LanguageIdentification.MediaPipe - - LanguageIdentification.Lingua - - LanguageCode + task: + - { title: FastText, projectName: LanguageIdentification.FastText.Native, os: windows-latest, arch: x86_64, script: ./Scripts/run-build.ps1, artifact: fasttext.dll } + - { title: FastText, projectName: LanguageIdentification.FastText.Native, os: macos-13, arch: x86_64, script: ./Scripts/run-build.macos.sh, artifact: libfasttext.x86_64.dylib } + - { title: FastText, projectName: LanguageIdentification.FastText.Native, os: macos-14, arch: arm64, script: ./Scripts/run-build.macos.sh, artifact: libfasttext.arm64.dylib } + - { title: CLD2, projectName: LanguageIdentification.CLD2.Native, os: macos-13, arch: x86_64, script: ./Scripts/run-build.macos.sh, artifact: libcld2.x86_64.dylib } + - { title: CLD2, projectName: LanguageIdentification.CLD2.Native, os: macos-14, arch: arm64, script: ./Scripts/run-build.macos.sh, artifact: libcld2.arm64.dylib } + - { title: CLD3, projectName: LanguageIdentification.CLD3.Native, os: macos-13, arch: x86_64, script: ./Scripts/run-build.macos.sh, artifact: libcld3.x86_64.dylib } + - { title: CLD3, projectName: LanguageIdentification.CLD3.Native, os: macos-14, arch: arm64, script: ./Scripts/run-build.macos.sh, artifact: libcld3.arm64.dylib } permissions: contents: read packages: write @@ -38,14 +38,79 @@ jobs: with: filters: | src: - - '${{env.ROOT}}/${{matrix.projectName}}/**' + - '${{env.ROOT}}/${{matrix.task.projectName}}/**' + + - uses: dorny/paths-filter@v3 + id: changes-native + with: + filters: | + src: + - '${{env.ROOT}}/${{matrix.task.projectName}}.Native/**' + + - name: 🚀 Build ${{matrix.task.projectName}} + if: steps.changes.outputs.src == 'true' + working-directory: ${{env.ROOT}}/${{matrix.task.projectName}} + run: | + echo $(pwd) + git update-index --chmod=+x ${{matrix.task.script}} + chmod +x ${{matrix.task.script}} + ${{matrix.task.script}} ${{matrix.task.arch}} + + - uses: actions/upload-artifact@v4 + if: steps.changes.outputs.src == 'true' + with: + name: native-${{matrix.task.projectName}}-${{matrix.task.os}} + path: ${{env.ROOT}}/${{matrix.task.projectName}}/${{matrix.task.artifact}} + retention-days: 1 + overwrite: 'true' + compression-level: 0 + if-no-files-found: 'error' + + build-package: + name: 🚀 Pack ${{matrix.task.title}} + runs-on: ${{matrix.task.os}} + needs: build-native + strategy: + fail-fast: false + matrix: + task: + - { title: CLD2, projectName: LanguageIdentification.CLD2, os: ubuntu-latest } + - { title: CLD3, projectName: LanguageIdentification.CLD3, os: ubuntu-latest } + - { title: FastText, projectName: LanguageIdentification.FastText, os: ubuntu-latest } + - { title: Whatlang, projectName: LanguageIdentification.Whatlang, os: ubuntu-latest } + - { title: MediaPipe, projectName: LanguageIdentification.MediaPipe, os: ubuntu-latest } + - { title: Lingua, projectName: LanguageIdentification.Lingua, os: ubuntu-latest } + - { title: LanguageCode, projectName: LanguageCode, os: ubuntu-latest } + permissions: + contents: read + packages: write + env: + ROOT: ./src + NUGET_AUTH_TOKEN: ${{secrets.GITHUB_TOKEN}} + steps: + - uses: actions/checkout@v4 + with: + submodules: 'true' + + - uses: actions/download-artifact@v4 + with: + pattern: native-${{matrix.task.projectName}}.Native-* + merge-multiple: true + path: ${{env.ROOT}}/${{matrix.task.projectName}}.Native/ + + - uses: dorny/paths-filter@v3 + id: changes + with: + filters: | + src: + - '${{env.ROOT}}/${{matrix.task.projectName}}/**' - uses: dorny/paths-filter@v3 id: changes-native with: filters: | src: - - '${{env.ROOT}}/${{matrix.projectName}}.Native/**' + - '${{env.ROOT}}/${{matrix.task.projectName}}.Native/**' - name: 📂 Setup .NET Core uses: actions/setup-dotnet@v4 @@ -57,21 +122,21 @@ jobs: 8.0.x source-url: https://nuget.pkg.github.com/${{github.repository_owner}}/index.json - - name: 🚀 Pack ${{matrix.projectName}}.Native + - name: 🚀 Pack ${{matrix.task.projectName}}.Native if: steps.changes-native.outputs.src == 'true' - working-directory: ${{env.ROOT}}/${{matrix.projectName}}.Native + working-directory: ${{env.ROOT}}/${{matrix.task.projectName}}.Native run: dotnet pack -c Release -o out - - name: 🚀 Pack ${{matrix.projectName}} + - name: 🚀 Pack ${{matrix.task.projectName}} if: steps.changes.outputs.src == 'true' - working-directory: ${{env.ROOT}}/${{matrix.projectName}} + working-directory: ${{env.ROOT}}/${{matrix.task.projectName}} run: dotnet pack -c Release -o out - uses: actions/upload-artifact@v4 if: steps.changes-native.outputs.src == 'true' with: - name: build-${{matrix.projectName}}.Native - path: ${{env.ROOT}}/${{matrix.projectName}}.Native/out + name: build-${{matrix.task.projectName}}.Native + path: ${{env.ROOT}}/${{matrix.task.projectName}}.Native/out retention-days: 1 overwrite: 'true' compression-level: 0 @@ -79,16 +144,29 @@ jobs: - uses: actions/upload-artifact@v4 if: steps.changes.outputs.src == 'true' with: - name: build-${{matrix.projectName}} - path: ${{env.ROOT}}/${{matrix.projectName}}/out + name: build-${{matrix.task.projectName}} + path: ${{env.ROOT}}/${{matrix.task.projectName}}/out retention-days: 1 overwrite: 'true' compression-level: 0 - test-nugets: - name: 🧪 Test ${{matrix.projectName}} - runs-on: ubuntu-latest - needs: build-nugets + test-package: + name: 🧪 Test on ${{matrix.task.os}} + runs-on: ${{matrix.task.os}} + needs: build-package + strategy: + fail-fast: false + # https://docs.github.com/en/actions/writing-workflows/workflow-syntax-for-github-actions#standard-github-hosted-runners-for--private-repositories + matrix: + task: + - { os: ubuntu-24.04 } + - { os: ubuntu-22.04 } + # - { os: ubuntu-20.04 } + - { os: windows-2022 } + - { os: windows-2019 } + - { os: macos-13 } + - { os: macos-14 } + - { os: macos-15 } permissions: contents: read packages: write @@ -100,36 +178,48 @@ jobs: - name: 📂 Files working-directory: ${{env.ROOT}} - run: mkdir -p local-nugets + run: mkdir -p local-packages - uses: actions/download-artifact@v4 with: pattern: build-* merge-multiple: true - path: ${{env.ROOT}}/local-nugets + path: ${{env.ROOT}}/local-packages - name: 📂 Files - working-directory: ${{env.ROOT}} + working-directory: ${{env.ROOT}}/local-packages run: ls -R - - name: 📂 Build Docker Image - working-directory: ${{env.ROOT}} + - name: 📂 Use local NuGet (Linux) + if: ${{ startsWith(matrix.task.os, 'ubuntu') }} run: | - docker build -f ./test.Dockerfile -t langunage-identification-test-runner:latest . - docker build -f ./test-ci.Dockerfile -t langunage-identification-test-runner-ci:latest . + path=$(realpath "${{env.ROOT}}/local-packages") + dotnet nuget add source $path - - name: 🧪 Run Tests in Docker - working-directory: ${{env.ROOT}} - run: docker run --rm -v $(pwd):/src langunage-identification-test-runner-ci:latest + - name: 📂 Use local NuGet (Windows) + if: ${{ startsWith(matrix.task.os, 'windows') }} + run: | + $path = [System.IO.Path]::GetFullPath('${{env.ROOT}}/local-packages') + dotnet nuget add source $path - - name: 🧪 Run Test for LanguageCode - working-directory: ${{env.ROOT}}/LanguageCode.Tests + - name: 📂 Use local NuGet (OSX) + if: ${{ startsWith(matrix.task.os, 'macos') }} + run: | + path=$(realpath "${{env.ROOT}}/local-packages") + dotnet nuget add source $path + + - name: 🧪 Run Tests for LanguageIdentification + working-directory: ${{env.ROOT}}/LanguageIdentification.Tests run: dotnet test -c CI - deploy-nugets: + - name: 🧪 Run Tests for LanguageCode + working-directory: ${{env.ROOT}}/LanguageCode.Tests + run: dotnet test + + deploy-package: name: 🚚 Push ${{matrix.projectName}} runs-on: ubuntu-latest - needs: test-nugets + needs: test-package if: github.ref == 'refs/heads/master' strategy: fail-fast: false diff --git a/README.md b/README.md index c9cb57d3..73e52774 100644 --- a/README.md +++ b/README.md @@ -89,14 +89,14 @@ functionality into their applications. ## Platform support -| Model | Linux | Windows | macOS | Blazor WASM | -| :-------- | :----------------: | :--------------: | :----: | :------------: | -| CLD2 | :white_check_mark: | :construction: | :x: | :x: | -| CLD3 | :white_check_mark: | :construction: | :x: | :x: | -| FastText | :white_check_mark: | :construction: | :x: | :x: | -| Whatlang | :white_check_mark: | :construction: | :x: | :x: | -| MediaPipe | :white_check_mark: | :construction: | :x: | :x: | -| Lingua | :white_check_mark: | :construction: | :x: | :x: | +| Model | Linux | Windows | macOS | Blazor WASM | +| :-------- | :----------------: | :----------------: | :----------------: | :------------: | +| CLD2 | :white_check_mark: | :white_check_mark: | :white_check_mark: | :x: | +| CLD3 | :white_check_mark: | :white_check_mark: | :construction: | :x: | +| FastText | :white_check_mark: | :white_check_mark: | :white_check_mark: | :x: | +| Whatlang | :white_check_mark: | :construction: | :construction: | :x: | +| MediaPipe | :white_check_mark: | :construction: | :construction: | :x: | +| Lingua | :white_check_mark: | :construction: | :construction: | :x: | :white_check_mark: — Full support | :x: — No support | diff --git a/README_CLD2.md b/README_CLD2.md index 3a1b62df..785c614d 100644 --- a/README_CLD2.md +++ b/README_CLD2.md @@ -5,8 +5,8 @@ Welcome to **Panlingo.LanguageIdentification.CLD2**, a .NET wrapper for the Chro ## Requirements - Runtime: **.NET >= 5.0** -- OS: **Linux** -- Arch: **AMD64** +- OS: **Linux (Ubuntu, Debian)**, **Windows 10+** or **Windows Server 2019+**, **macOS** +- Arch: **AMD64** (or **ARM** for macOS) ## Installation diff --git a/README_CLD3.md b/README_CLD3.md index ed827ed8..ddfd5085 100644 --- a/README_CLD3.md +++ b/README_CLD3.md @@ -5,7 +5,7 @@ Welcome to **Panlingo.LanguageIdentification.CLD3**, a .NET wrapper for the Chro ## Requirements - Runtime: **.NET >= 5.0** -- OS: **Linux** +- OS: **Linux (Ubuntu, Debian)**, **Windows 10+** or **Windows Server 2019+** - Arch: **AMD64** ## Installation diff --git a/README_FASTTEXT.md b/README_FASTTEXT.md index bb37021c..2df9b77a 100644 --- a/README_FASTTEXT.md +++ b/README_FASTTEXT.md @@ -5,8 +5,8 @@ Welcome to **Panlingo.LanguageIdentification.FastText**, a .NET wrapper for the ## Requirements - Runtime: **.NET >= 5.0** -- OS: **Linux** -- Arch: **AMD64** +- OS: **Linux (Ubuntu, Debian)**, **Windows 10+** or **Windows Server 2019+**, **macOS** +- Arch: **AMD64** (or **ARM** for macOS) ## Installation diff --git a/src/LanguageCode.Tests/LanguageCode.Tests.csproj b/src/LanguageCode.Tests/LanguageCode.Tests.csproj index 205df9fa..310f39d6 100644 --- a/src/LanguageCode.Tests/LanguageCode.Tests.csproj +++ b/src/LanguageCode.Tests/LanguageCode.Tests.csproj @@ -7,6 +7,10 @@ false true + + Panlingo.LanguageCode.Tests + + Debug;Release;CI diff --git a/src/LanguageCode.Tests/LanguageCodeTests.cs b/src/LanguageCode.Tests/LanguageCodeTests.cs index 1e0a88a9..b03f19c1 100644 --- a/src/LanguageCode.Tests/LanguageCodeTests.cs +++ b/src/LanguageCode.Tests/LanguageCodeTests.cs @@ -1,7 +1,6 @@ -using Panlingo.LanguageCode; -using Panlingo.LanguageCode.Models; +using Panlingo.LanguageCode.Models; -namespace LanguageCode.Tests +namespace Panlingo.LanguageCode.Tests { public class LanguageCodeTests { diff --git a/src/LanguageCode/LanguageCode.csproj b/src/LanguageCode/LanguageCode.csproj index 8813ba77..15927afe 100644 --- a/src/LanguageCode/LanguageCode.csproj +++ b/src/LanguageCode/LanguageCode.csproj @@ -2,7 +2,7 @@ netstandard2.1;net5.0;net6.0;net7.0;net8.0 - 0.0.0.21 + 0.0.0.23 Panlingo.LanguageCode Panlingo.LanguageCode Panlingo.LanguageCode diff --git a/src/LanguageIdentification.CLD2.ConsoleTest/Program.cs b/src/LanguageIdentification.CLD2.ConsoleTest/Program.cs index b94ab372..cb3672c6 100644 --- a/src/LanguageIdentification.CLD2.ConsoleTest/Program.cs +++ b/src/LanguageIdentification.CLD2.ConsoleTest/Program.cs @@ -6,7 +6,7 @@ static void Main(string[] args) { using var cld2 = new CLD2Detector(); - var text = "Hello, how are you? Привіт, як справи? Привет, как дела?"; + var text = "Привіт, як справи?"; var predictions = cld2.PredictLanguage(text); diff --git a/src/LanguageIdentification.CLD2.ConsoleTest/Properties/launchSettings.json b/src/LanguageIdentification.CLD2.ConsoleTest/Properties/launchSettings.json index 65b8965a..1f434ec6 100644 --- a/src/LanguageIdentification.CLD2.ConsoleTest/Properties/launchSettings.json +++ b/src/LanguageIdentification.CLD2.ConsoleTest/Properties/launchSettings.json @@ -2,6 +2,13 @@ "profiles": { "Docker": { "commandName": "Docker" + }, + "WSL": { + "commandName": "WSL2", + "distributionName": "" + }, + "Project": { + "commandName": "Project" } } } \ No newline at end of file diff --git a/src/LanguageIdentification.CLD2.Native/.gitignore b/src/LanguageIdentification.CLD2.Native/.gitignore index 162500b1..585d5a3f 100644 --- a/src/LanguageIdentification.CLD2.Native/.gitignore +++ b/src/LanguageIdentification.CLD2.Native/.gitignore @@ -1,2 +1,3 @@ libcld2.so +libcld2.dll build_temp/** diff --git a/src/LanguageIdentification.CLD2.Native/CLD2NativeLibrary.cs b/src/LanguageIdentification.CLD2.Native/CLD2NativeLibrary.cs index 5b952e65..7afcdeba 100644 --- a/src/LanguageIdentification.CLD2.Native/CLD2NativeLibrary.cs +++ b/src/LanguageIdentification.CLD2.Native/CLD2NativeLibrary.cs @@ -5,6 +5,6 @@ public static class CLD2NativeLibrary /// /// Name of native binary /// - public const string Name = "libcld2.so"; + public const string Name = "libcld2"; } } diff --git a/src/LanguageIdentification.CLD2.Native/Dockerfile b/src/LanguageIdentification.CLD2.Native/Dockerfile index 3e7e769a..e0d63a0e 100644 --- a/src/LanguageIdentification.CLD2.Native/Dockerfile +++ b/src/LanguageIdentification.CLD2.Native/Dockerfile @@ -1,5 +1,4 @@ -FROM mcr.microsoft.com/dotnet/sdk:8.0 AS build -ARG BUILD_CONFIGURATION=Release +FROM debian:bullseye-slim AS build WORKDIR /repo COPY ["src/LanguageIdentification.CLD2.Native/LanguageIdentification.CLD2.Native.csproj", "src/LanguageIdentification.CLD2.Native/"] diff --git a/src/LanguageIdentification.CLD2.Native/LanguageIdentification.CLD2.Native.csproj b/src/LanguageIdentification.CLD2.Native/LanguageIdentification.CLD2.Native.csproj index fdec2604..1bdd177a 100644 --- a/src/LanguageIdentification.CLD2.Native/LanguageIdentification.CLD2.Native.csproj +++ b/src/LanguageIdentification.CLD2.Native/LanguageIdentification.CLD2.Native.csproj @@ -2,7 +2,7 @@ netstandard2.1 - 0.0.0.21 + 0.1.0 Panlingo.LanguageIdentification.CLD2.Native Panlingo.LanguageIdentification.CLD2.Native Panlingo.LanguageIdentification.CLD2.Native @@ -62,6 +62,36 @@ + + + PreserveNewest + true + runtimes/win-x64/native + true + false + + + + + + PreserveNewest + true + runtimes/osx-x64/native/libcld2.dylib + true + false + + + + + + PreserveNewest + true + runtimes/osx-arm64/native/libcld2.dylib + true + false + + + diff --git a/src/LanguageIdentification.CLD2.Native/Native/CMakeLists.txt b/src/LanguageIdentification.CLD2.Native/Native/CMakeLists.txt index 4d6a56b7..d416d77b 100644 --- a/src/LanguageIdentification.CLD2.Native/Native/CMakeLists.txt +++ b/src/LanguageIdentification.CLD2.Native/Native/CMakeLists.txt @@ -1,21 +1,48 @@ -cmake_minimum_required(VERSION 2.8 FATAL_ERROR) -project(cld2_bridge) +cmake_minimum_required(VERSION 3.10 FATAL_ERROR) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17") -if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") -endif() +project(cld2) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_C_STANDARD 99) +set(CMAKE_C_STANDARD_REQUIRED ON) -if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") +if (CMAKE_CXX_COMPILER_ID MATCHES "GNU") + if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99 -pthread -funroll-loops -O3") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -pthread -funroll-loops -O3") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -lpthread") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing") + elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -O3") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static-libgcc -static-libstdc++") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,--start-group -lwinpthread -Wl,--end-group") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing") + endif() +elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99 -pthread -funroll-loops -O3") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -pthread -funroll-loops -O3") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing") -elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4244 /wd4267") -endif() + if (CMAKE_OSX_ARCHITECTURES MATCHES "arm64") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -arch arm64 -target arm64-apple-macos11") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -arch arm64 -target arm64-apple-macos11") + elseif (CMAKE_OSX_ARCHITECTURES MATCHES "x86_64") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -arch x86_64 -target x86_64-apple-macos10.15") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -arch x86_64 -target x86_64-apple-macos10.15") + endif() +endif() set(CMAKE_MACOSX_RPATH 1) set(CMAKE_POSITION_INDEPENDENT_CODE ON) +message(STATUS "System name: ${CMAKE_SYSTEM_NAME}") +message(STATUS "CMake version: ${CMAKE_VERSION}") +message(STATUS "C++ compiler: ${CMAKE_CXX_COMPILER_ID}") +message(STATUS "C compiler: ${CMAKE_C_COMPILER_ID}") +message(STATUS "CXX flags: ${CMAKE_CXX_FLAGS}") +message(STATUS "C flags: ${CMAKE_C_FLAGS}") + include_directories( ${PROJECT_SOURCE_DIR}/cld2/internal ${PROJECT_SOURCE_DIR}/cld2/public @@ -39,28 +66,28 @@ set(CLD2_SOURCES ${PROJECT_SOURCE_DIR}/cld2/internal/tote.cc ${PROJECT_SOURCE_DIR}/cld2/internal/utf8statetable.cc ${PROJECT_SOURCE_DIR}/cld2/internal/generated_distinct_bi_0.cc - - ### Chrome (less perfect predictions) - # ${PROJECT_SOURCE_DIR}/cld2/internal/cld_generated_cjk_uni_prop_80.cc - # ${PROJECT_SOURCE_DIR}/cld2/internal/cld2_generated_cjk_compatible.cc + + ### Chrome (less perfect predictions) + # ${PROJECT_SOURCE_DIR}/cld2/internal/cld_generated_cjk_uni_prop_80.cc + # ${PROJECT_SOURCE_DIR}/cld2/internal/cld2_generated_cjk_compatible.cc # ${PROJECT_SOURCE_DIR}/cld2/internal/cld_generated_cjk_delta_bi_4.cc - # ${PROJECT_SOURCE_DIR}/cld2/internal/generated_distinct_bi_0.cc + # ${PROJECT_SOURCE_DIR}/cld2/internal/generated_distinct_bi_0.cc # ${PROJECT_SOURCE_DIR}/cld2/internal/cld2_generated_quadchrome_2.cc - # ${PROJECT_SOURCE_DIR}/cld2/internal/cld2_generated_deltaoctachrome.cc + # ${PROJECT_SOURCE_DIR}/cld2/internal/cld2_generated_deltaoctachrome.cc # ${PROJECT_SOURCE_DIR}/cld2/internal/cld2_generated_distinctoctachrome.cc - # ${PROJECT_SOURCE_DIR}/cld2/internal/cld_generated_score_quad_octa_2.cc - ### + # ${PROJECT_SOURCE_DIR}/cld2/internal/cld_generated_score_quad_octa_2.cc + ### - ### Full - ${PROJECT_SOURCE_DIR}/cld2/internal/cld_generated_cjk_uni_prop_80.cc - ${PROJECT_SOURCE_DIR}/cld2/internal/cld2_generated_cjk_compatible.cc + ### Full + ${PROJECT_SOURCE_DIR}/cld2/internal/cld_generated_cjk_uni_prop_80.cc + ${PROJECT_SOURCE_DIR}/cld2/internal/cld2_generated_cjk_compatible.cc ${PROJECT_SOURCE_DIR}/cld2/internal/cld_generated_cjk_delta_bi_32.cc - ${PROJECT_SOURCE_DIR}/cld2/internal/generated_distinct_bi_0.cc + ${PROJECT_SOURCE_DIR}/cld2/internal/generated_distinct_bi_0.cc ${PROJECT_SOURCE_DIR}/cld2/internal/cld2_generated_quad0122.cc - ${PROJECT_SOURCE_DIR}/cld2/internal/cld2_generated_deltaocta0122.cc + ${PROJECT_SOURCE_DIR}/cld2/internal/cld2_generated_deltaocta0122.cc ${PROJECT_SOURCE_DIR}/cld2/internal/cld2_generated_distinctocta0122.cc - ${PROJECT_SOURCE_DIR}/cld2/internal/cld_generated_score_quad_octa_0122.cc - ### + ${PROJECT_SOURCE_DIR}/cld2/internal/cld_generated_score_quad_octa_0122.cc + ### ${PROJECT_SOURCE_DIR}/cld2/internal/cld2_dynamic_compat.h ${PROJECT_SOURCE_DIR}/cld2/internal/cld2_dynamic_data_extractor.h @@ -101,6 +128,25 @@ set(CLD2_SOURCES add_library(objlib OBJECT ${CLD2_SOURCES}) -add_library(cld2 SHARED $) +add_library(${PROJECT_NAME} SHARED $) + +set_target_properties(${PROJECT_NAME} PROPERTIES PUBLIC_HEADER "${PROJECT_SOURCE_DIR}/cld2/binding.h") + +if (APPLE) + if (CMAKE_OSX_ARCHITECTURES MATCHES "arm64") + set(CLANG_LIB_DIR /opt/homebrew/opt/llvm/lib/c++) + elseif (CMAKE_OSX_ARCHITECTURES MATCHES "x86_64") + set(CLANG_LIB_DIR /usr/local/opt/llvm/lib/c++) + endif() -set_target_properties(cld2 PROPERTIES PUBLIC_HEADER "${PROJECT_SOURCE_DIR}/cld2/binding.h") \ No newline at end of file + target_link_libraries(${PROJECT_NAME} + -nostdlib++ + -Wl,${CLANG_LIB_DIR}/libc++.a + -Wl,${CLANG_LIB_DIR}/libc++abi.a) +else() + if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + # nop + elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows") + target_link_libraries(${PROJECT_NAME} -static-libgcc -static-libstdc++) + endif() +endif() \ No newline at end of file diff --git a/src/LanguageIdentification.CLD2.Native/Native/binding.cc b/src/LanguageIdentification.CLD2.Native/Native/binding.cc index 43207036..ee3bafe5 100644 --- a/src/LanguageIdentification.CLD2.Native/Native/binding.cc +++ b/src/LanguageIdentification.CLD2.Native/Native/binding.cc @@ -9,7 +9,7 @@ extern "C" { - PredictionResult* PredictLanguage(char *text, int* resultCount) + EXPORT PredictionResult* PredictLanguage(char *text, int* resultCount) { int textLength = strlen(text); @@ -62,7 +62,7 @@ extern "C" for (int i = 0; i < predictionCount; ++i) { CLD2::Language language = languages[i]; - double probability = scoreTotal > 0 ? scores[i] / (double)scoreTotal : 1.0; + double probability = scoreTotal > 0 ? scores[i] / (double)scoreTotal : 0; double proportion = percents[i] / 100.0; result[i].language = strdup(CLD2::LanguageCode(language)); @@ -75,7 +75,7 @@ extern "C" return result; } - void FreeResults(PredictionResult* results, int count) + EXPORT void FreeResults(PredictionResult* results, int count) { for (int i = 0; i < count; ++i) { free((void*)results[i].language); diff --git a/src/LanguageIdentification.CLD2.Native/Native/binding.h b/src/LanguageIdentification.CLD2.Native/Native/binding.h index f221d840..7feba150 100644 --- a/src/LanguageIdentification.CLD2.Native/Native/binding.h +++ b/src/LanguageIdentification.CLD2.Native/Native/binding.h @@ -5,11 +5,13 @@ #include "./cld2/public/compact_lang_det.h" #ifndef EXPORT -#if defined(_WIN32) || defined(_WIN64) -#define EXPORT __declspec(dllimport) -#else -#define EXPORT extern -#endif +# if defined(_WIN32) || defined(_WIN64) +# define EXPORT __declspec(dllexport) +# elif defined(__GNUC__) || defined(__clang__) +# define EXPORT __attribute__((visibility("default"))) +# else +# define EXPORT +# endif #endif extern "C" diff --git a/src/LanguageIdentification.CLD2.Native/Native/toolchain-mingw.cmake b/src/LanguageIdentification.CLD2.Native/Native/toolchain-mingw.cmake new file mode 100644 index 00000000..94e97c6b --- /dev/null +++ b/src/LanguageIdentification.CLD2.Native/Native/toolchain-mingw.cmake @@ -0,0 +1,16 @@ +set(CMAKE_SYSTEM_NAME Windows) +set(TOOLCHAIN_PREFIX x86_64-w64-mingw32) + +# cross compilers to use for C, C++ and Fortran +set(CMAKE_C_COMPILER ${TOOLCHAIN_PREFIX}-gcc) +set(CMAKE_CXX_COMPILER ${TOOLCHAIN_PREFIX}-g++) +set(CMAKE_Fortran_COMPILER ${TOOLCHAIN_PREFIX}-gfortran) +set(CMAKE_RC_COMPILER ${TOOLCHAIN_PREFIX}-windres) + +# target environment on the build host system +set(CMAKE_FIND_ROOT_PATH /usr/${TOOLCHAIN_PREFIX}) + +# modify default behavior of FIND_XXX() commands +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) \ No newline at end of file diff --git a/src/LanguageIdentification.CLD2.Native/Native/toolchain-osxcross.cmake b/src/LanguageIdentification.CLD2.Native/Native/toolchain-osxcross.cmake new file mode 100644 index 00000000..6e1223e1 --- /dev/null +++ b/src/LanguageIdentification.CLD2.Native/Native/toolchain-osxcross.cmake @@ -0,0 +1,7 @@ +set(CMAKE_SYSTEM_NAME Darwin) +set(CMAKE_SYSTEM_VERSION 1) + +# Path to the osxcross toolchain binaries +set(CMAKE_OSX_SYSROOT /usr/local/osxcross/target) +set(CMAKE_C_COMPILER /usr/local/osxcross/bin/o64-clang) +set(CMAKE_CXX_COMPILER /usr/local/osxcross/bin/o64-clang++) \ No newline at end of file diff --git a/src/LanguageIdentification.CLD2.Native/Scripts/run-build.macos.sh b/src/LanguageIdentification.CLD2.Native/Scripts/run-build.macos.sh new file mode 100644 index 00000000..dedf7bcb --- /dev/null +++ b/src/LanguageIdentification.CLD2.Native/Scripts/run-build.macos.sh @@ -0,0 +1,46 @@ +#!/bin/bash +set -e + +if [ -z "$1" ]; then + echo "Error: No architecture specified." + echo "Usage: $0 " + exit 1 +fi + +ARCH=$1 + +if [[ "$ARCH" != "x86_64" && "$ARCH" != "arm64" ]]; then + echo "Error: Invalid architecture specified. Use 'x86_64' or 'arm64'." + exit 1 +fi + +echo "Hello world $ARCH"; + +brew install llvm + +workspace="build_temp" + +mkdir "$workspace" -p +cp -a ../../third_party/cld2/. $workspace/cld2 +cp -a Native/. $workspace + +ls -R . + +cd "$workspace" + +mkdir build +cd build + +echo "Build for MacOS on $ARCH"; +rm -rf * +cmake -DCMAKE_OSX_ARCHITECTURES=$ARCH .. +make -j $(sysctl -n hw.logicalcpu) + +ls -R + +otool -L libcld2.dylib +cp libcld2.dylib ../../libcld2.$ARCH.dylib + +# Clean up +rm -rf "$workspace" +echo "Goodbye world"; diff --git a/src/LanguageIdentification.CLD2.Native/Scripts/run-build.sh b/src/LanguageIdentification.CLD2.Native/Scripts/run-build.sh index f46c6d06..2b9571b6 100644 --- a/src/LanguageIdentification.CLD2.Native/Scripts/run-build.sh +++ b/src/LanguageIdentification.CLD2.Native/Scripts/run-build.sh @@ -15,17 +15,27 @@ cd "$workspace" mkdir build cd build + +# Build for Linux +rm -rf * cmake .. make -j $(nproc) # make -cd .. -echo $(pwd) -ls -R build -cd .. +ls -R -find "$workspace/build" -name "libcld2.so" -exec cp {} libcld2.so \; -rm -rf "$workspace" ldd libcld2.so +cp libcld2.so ../../libcld2.so + +# Build for Windows +rm -rf * +cmake .. -DCMAKE_TOOLCHAIN_FILE=./toolchain-mingw.cmake +make -j $(nproc) # make +ls -R + +cp libcld2.dll ../../libcld2.dll + +# Clean up +rm -rf "$workspace" echo "Goodbye world"; diff --git a/src/LanguageIdentification.CLD2.Native/Scripts/setup-build.sh b/src/LanguageIdentification.CLD2.Native/Scripts/setup-build.sh index 4f5708d0..c0ebe9b5 100644 --- a/src/LanguageIdentification.CLD2.Native/Scripts/setup-build.sh +++ b/src/LanguageIdentification.CLD2.Native/Scripts/setup-build.sh @@ -6,4 +6,5 @@ echo "Installing build packages"; sudo apt -y update | apt -y update sudo apt -y install cmake | apt -y install cmake sudo apt -y install g++ | apt -y install g++ +sudo apt -y install gcc-mingw-w64-x86-64 g++-mingw-w64-x86-64 | apt -y install gcc-mingw-w64-x86-64 g++-mingw-w64-x86-64 diff --git a/src/LanguageIdentification.CLD2/CLD2Detector.cs b/src/LanguageIdentification.CLD2/CLD2Detector.cs index 4f257853..881451a3 100644 --- a/src/LanguageIdentification.CLD2/CLD2Detector.cs +++ b/src/LanguageIdentification.CLD2/CLD2Detector.cs @@ -13,7 +13,7 @@ public class CLD2Detector : IDisposable { public CLD2Detector() { - if (!RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) + if (!IsSupported()) { throw new NotSupportedException( $"{nameof(CLD2Detector)} is not yet supported on {RuntimeInformation.RuntimeIdentifier}" @@ -21,6 +21,18 @@ public CLD2Detector() } } + public static bool IsSupported() + { + return RuntimeInformation.OSArchitecture switch + { + Architecture.X64 when RuntimeInformation.IsOSPlatform(OSPlatform.Linux) => true, + Architecture.X64 when RuntimeInformation.IsOSPlatform(OSPlatform.Windows) => true, + Architecture.X64 when RuntimeInformation.IsOSPlatform(OSPlatform.OSX) => true, + Architecture.Arm64 when RuntimeInformation.IsOSPlatform(OSPlatform.OSX) => true, + _ => false, + }; + } + /// /// Produces a prediction for 'text' /// diff --git a/src/LanguageIdentification.CLD2/Internal/CLD2DetectorWrapper.cs b/src/LanguageIdentification.CLD2/Internal/CLD2DetectorWrapper.cs index 3ef06497..21897b92 100644 --- a/src/LanguageIdentification.CLD2/Internal/CLD2DetectorWrapper.cs +++ b/src/LanguageIdentification.CLD2/Internal/CLD2DetectorWrapper.cs @@ -7,7 +7,10 @@ namespace Panlingo.LanguageIdentification.CLD2.Internal internal static class CLD2DetectorWrapper { [DllImport(CLD2NativeLibrary.Name, CallingConvention = CallingConvention.Cdecl)] - public static extern IntPtr PredictLanguage(string text, out int resultCount); + public static extern IntPtr PredictLanguage( + [MarshalAs(UnmanagedType.LPUTF8Str)] string text, + out int resultCount + ); [DllImport(CLD2NativeLibrary.Name, CallingConvention = CallingConvention.Cdecl)] public static extern void FreeResults(IntPtr results, int count); diff --git a/src/LanguageIdentification.CLD2/LanguageIdentification.CLD2.csproj b/src/LanguageIdentification.CLD2/LanguageIdentification.CLD2.csproj index 71ca49e8..189b9e60 100644 --- a/src/LanguageIdentification.CLD2/LanguageIdentification.CLD2.csproj +++ b/src/LanguageIdentification.CLD2/LanguageIdentification.CLD2.csproj @@ -2,7 +2,7 @@ net5.0;net6.0;net7.0;net8.0 - 0.0.0.21 + 0.1.0 Panlingo.LanguageIdentification.CLD2 Panlingo.LanguageIdentification.CLD2 Panlingo.LanguageIdentification.CLD2 @@ -15,7 +15,11 @@ nlp lid language-identification language-detection cld2 README_CLD2.md - - Initial release +0.1.0.0 +- Windows and MacOS support + +0.0.0.1 +- Initial release This is a .NET wrapper for the Chrome Language Detection (CLD2) library by Google Inc. diff --git a/src/LanguageIdentification.CLD3.ConsoleTest/Program.cs b/src/LanguageIdentification.CLD3.ConsoleTest/Program.cs index 63710c75..ec55d826 100644 --- a/src/LanguageIdentification.CLD3.ConsoleTest/Program.cs +++ b/src/LanguageIdentification.CLD3.ConsoleTest/Program.cs @@ -6,7 +6,7 @@ static void Main(string[] args) { using var cld3 = new CLD3Detector(minNumBytes: 0, maxNumBytes: 512); - var text = "Hello, how are you? Привіт, як справи? Привет, как дела?"; + var text = "Привіт, як справи?"; var singlePrediction = cld3.PredictLanguage(text); diff --git a/src/LanguageIdentification.CLD3.ConsoleTest/Properties/launchSettings.json b/src/LanguageIdentification.CLD3.ConsoleTest/Properties/launchSettings.json index 47d960e0..1f434ec6 100644 --- a/src/LanguageIdentification.CLD3.ConsoleTest/Properties/launchSettings.json +++ b/src/LanguageIdentification.CLD3.ConsoleTest/Properties/launchSettings.json @@ -6,6 +6,9 @@ "WSL": { "commandName": "WSL2", "distributionName": "" + }, + "Project": { + "commandName": "Project" } } } \ No newline at end of file diff --git a/src/LanguageIdentification.CLD3.Native/.gitignore b/src/LanguageIdentification.CLD3.Native/.gitignore index 60230aec..2af070e4 100644 --- a/src/LanguageIdentification.CLD3.Native/.gitignore +++ b/src/LanguageIdentification.CLD3.Native/.gitignore @@ -1,2 +1,4 @@ libcld3.so +libcld3.dll +libcld3.dylib build_temp/** diff --git a/src/LanguageIdentification.CLD3.Native/CLD3NativeLibrary.cs b/src/LanguageIdentification.CLD3.Native/CLD3NativeLibrary.cs index c84346a0..5c5835d8 100644 --- a/src/LanguageIdentification.CLD3.Native/CLD3NativeLibrary.cs +++ b/src/LanguageIdentification.CLD3.Native/CLD3NativeLibrary.cs @@ -5,6 +5,6 @@ public static class CLD3NativeLibrary /// /// Name of native binary /// - public const string Name = "libcld3.so"; + public const string Name = "libcld3"; } } diff --git a/src/LanguageIdentification.CLD3.Native/Dockerfile b/src/LanguageIdentification.CLD3.Native/Dockerfile index a575ae8f..d249b1b3 100644 --- a/src/LanguageIdentification.CLD3.Native/Dockerfile +++ b/src/LanguageIdentification.CLD3.Native/Dockerfile @@ -1,5 +1,4 @@ -FROM mcr.microsoft.com/dotnet/sdk:8.0 AS build -ARG BUILD_CONFIGURATION=Release +FROM debian:bullseye-slim AS build WORKDIR /repo COPY ["src/LanguageIdentification.CLD3.Native/LanguageIdentification.CLD3.Native.csproj", "src/LanguageIdentification.CLD3.Native/"] diff --git a/src/LanguageIdentification.CLD3.Native/LanguageIdentification.CLD3.Native.csproj b/src/LanguageIdentification.CLD3.Native/LanguageIdentification.CLD3.Native.csproj index 71bd96dc..f70f6e96 100644 --- a/src/LanguageIdentification.CLD3.Native/LanguageIdentification.CLD3.Native.csproj +++ b/src/LanguageIdentification.CLD3.Native/LanguageIdentification.CLD3.Native.csproj @@ -2,7 +2,7 @@ netstandard2.1 - 0.0.0.21 + 0.1.0 Panlingo.LanguageIdentification.CLD3.Native Panlingo.LanguageIdentification.CLD3.Native Panlingo.LanguageIdentification.CLD3.Native @@ -61,6 +61,36 @@ false + + + + PreserveNewest + true + runtimes/win-x64/native + true + false + + + + + + PreserveNewest + true + runtimes/osx-x64/native/libcld3.dylib + true + false + + + + + + PreserveNewest + true + runtimes/osx-arm64/native/libcld3.dylib + true + false + + diff --git a/src/LanguageIdentification.CLD3.Native/Native/CMakeLists.txt b/src/LanguageIdentification.CLD3.Native/Native/CMakeLists.txt index 6b3d4cde..82c9ea70 100644 --- a/src/LanguageIdentification.CLD3.Native/Native/CMakeLists.txt +++ b/src/LanguageIdentification.CLD3.Native/Native/CMakeLists.txt @@ -1,45 +1,80 @@ -project(cld3) +cmake_minimum_required(VERSION 3.10 FATAL_ERROR) -cmake_minimum_required(VERSION 3.9) +project(cld3) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS OFF) +set(CMAKE_C_STANDARD 99) +set(CMAKE_C_STANDARD_REQUIRED ON) + +if (CMAKE_CXX_COMPILER_ID MATCHES "GNU") + if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99 -pthread -funroll-loops -O3") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -pthread -funroll-loops -O3") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -lpthread") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing") + elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -O3") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static-libgcc -static-libstdc++") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,--start-group -lwinpthread -Wl,--end-group") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,-allow-multiple-definition") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing") + endif() +elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99 -pthread -funroll-loops -O3") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -pthread -funroll-loops -O3") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing") + + if (CMAKE_OSX_ARCHITECTURES MATCHES "arm64") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -arch arm64 -target arm64-apple-macos11") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -arch arm64 -target arm64-apple-macos11") + elseif (CMAKE_OSX_ARCHITECTURES MATCHES "x86_64") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -arch x86_64 -target x86_64-apple-macos10.15") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -arch x86_64 -target x86_64-apple-macos10.15") + endif() +endif() -add_definitions(-fPIC) # Position Independent Code -add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) +set(CMAKE_MACOSX_RPATH 1) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + +message(STATUS "System name: ${CMAKE_SYSTEM_NAME}") +message(STATUS "CMake version: ${CMAKE_VERSION}") +message(STATUS "C++ compiler: ${CMAKE_CXX_COMPILER_ID}") +message(STATUS "C compiler: ${CMAKE_C_COMPILER_ID}") +message(STATUS "CXX flags: ${CMAKE_CXX_FLAGS}") +message(STATUS "C flags: ${CMAKE_C_FLAGS}") add_library(${PROJECT_NAME} SHARED - src/base.cc - src/embedding_feature_extractor.cc - src/embedding_network.cc - src/feature_extractor.cc - src/feature_extractor.h - src/feature_types.cc - src/fml_parser.cc - src/language_identifier_features.cc - src/lang_id_nn_params.cc - src/nnet_language_identifier.cc - src/registry.cc - src/relevant_script_feature.cc - src/sentence_features.cc - src/task_context.cc - src/task_context_params.cc - src/unicodetext.cc - src/utils.cc - src/workspace.cc - - src/script_span/generated_entities.cc - src/script_span/getonescriptspan.cc - src/script_span/getonescriptspan.h - src/script_span/getonescriptspan_test.cc - src/script_span/utf8statetable.cc - src/script_span/offsetmap.cc - src/script_span/text_processing.cc - src/script_span/text_processing.h - src/script_span/fixunicodevalue.cc - - # bindings + src/base.cc + src/embedding_feature_extractor.cc + src/embedding_network.cc + src/feature_extractor.cc + src/feature_extractor.h + src/feature_types.cc + src/fml_parser.cc + src/language_identifier_features.cc + src/lang_id_nn_params.cc + src/nnet_language_identifier.cc + src/registry.cc + src/relevant_script_feature.cc + src/sentence_features.cc + src/task_context.cc + src/task_context_params.cc + src/unicodetext.cc + src/utils.cc + src/workspace.cc + + src/script_span/generated_entities.cc + src/script_span/getonescriptspan.cc + src/script_span/getonescriptspan.h + src/script_span/getonescriptspan_test.cc + src/script_span/utf8statetable.cc + src/script_span/offsetmap.cc + src/script_span/text_processing.cc + src/script_span/text_processing.h + src/script_span/fixunicodevalue.cc + + # bindings src/binding.cc src/binding.h src/fake_protobuf.h @@ -47,12 +82,35 @@ add_library(${PROJECT_NAME} SHARED set_target_properties(${PROJECT_NAME} PROPERTIES PUBLIC_HEADER "src/binding.h") -# unit tests exec: -add_executable(language_identifier_main src/language_identifier_main.cc) -target_link_libraries(language_identifier_main cld3) +if (APPLE) + if (CMAKE_OSX_ARCHITECTURES MATCHES "arm64") + set(CLANG_LIB_DIR /opt/homebrew/opt/llvm/lib/c++) + elseif (CMAKE_OSX_ARCHITECTURES MATCHES "x86_64") + set(CLANG_LIB_DIR /usr/local/opt/llvm/lib/c++) + endif() + + target_link_libraries(${PROJECT_NAME} + -nostdlib++ + -Wl,${CLANG_LIB_DIR}/libc++.a + -Wl,${CLANG_LIB_DIR}/libc++abi.a) +else() + if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + # nop + elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows") + target_link_libraries(${PROJECT_NAME} -static-libgcc -static-libstdc++) + endif() +endif() + +# Build unit tests +if (CMAKE_CXX_COMPILER_ID MATCHES "GNU") + if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + add_executable(language_identifier_main src/language_identifier_main.cc) + target_link_libraries(language_identifier_main PRIVATE cld3) -add_executable(getonescriptspan_test src/script_span/getonescriptspan_test.cc) -target_link_libraries(getonescriptspan_test cld3) + add_executable(getonescriptspan_test src/script_span/getonescriptspan_test.cc) + target_link_libraries(getonescriptspan_test PRIVATE cld3) -add_executable(language_identifier_features_test src/language_identifier_features_test.cc) -target_link_libraries(language_identifier_features_test cld3) + add_executable(language_identifier_features_test src/language_identifier_features_test.cc) + target_link_libraries(language_identifier_features_test PRIVATE cld3) + endif() +endif() \ No newline at end of file diff --git a/src/LanguageIdentification.CLD3.Native/Native/src/binding.cc b/src/LanguageIdentification.CLD3.Native/Native/src/binding.cc index 807ca6f9..860d1ec7 100644 --- a/src/LanguageIdentification.CLD3.Native/Native/src/binding.cc +++ b/src/LanguageIdentification.CLD3.Native/Native/src/binding.cc @@ -3,44 +3,48 @@ using namespace chrome_lang_id; -void* CreateIdentifier(int minNumBytes, int maxNumBytes) { - return new NNetLanguageIdentifier(minNumBytes, maxNumBytes); -} +extern "C" { + EXPORT void* create_cld3(int minNumBytes, int maxNumBytes) { + return new NNetLanguageIdentifier(minNumBytes, maxNumBytes); + } -void FreeIdentifier(void* identifier) { - delete static_cast(identifier); -} + EXPORT void destroy_cld3(void* identifier) { + delete static_cast(identifier); + } -PredictionResult FindLanguage(void* identifier, const char* text) { - NNetLanguageIdentifier* nativeIdentifier = static_cast(identifier); - auto nativeResult = nativeIdentifier->FindLanguage(text); + EXPORT PredictionResult* cld3_find_language(void* identifier, const char* text, int* resultCount) { + NNetLanguageIdentifier* nativeIdentifier = static_cast(identifier); + auto nativeResult = nativeIdentifier->FindLanguage(text); + + *resultCount = 1; + PredictionResult* result = new PredictionResult[*resultCount]; + result[0].language = strdup(nativeResult.language.c_str()); + result[0].probability = nativeResult.probability; + result[0].is_reliable = nativeResult.is_reliable; + result[0].proportion = nativeResult.proportion; + return result; + } - PredictionResult result; - result.language = strdup(nativeResult.language.c_str()); - result.probability = nativeResult.probability; - result.is_reliable = nativeResult.is_reliable; - result.proportion = nativeResult.proportion; - return result; -} + EXPORT PredictionResult* cld3_find_languages(void* identifier, const char* text, int numLangs, int* resultCount) { + NNetLanguageIdentifier* nativeIdentifier = static_cast(identifier); + auto nativeResults = nativeIdentifier->FindTopNMostFreqLangs(text, numLangs); + + *resultCount = static_cast(nativeResults.size()); + PredictionResult* result = new PredictionResult[*resultCount]; + for (int i = 0; i < *resultCount; ++i) { + result[i].language = strdup(nativeResults[i].language.c_str()); + result[i].probability = nativeResults[i].probability; + result[i].is_reliable = nativeResults[i].is_reliable; + result[i].proportion = nativeResults[i].proportion; + } + return result; + } -PredictionResult* FindLanguages(void* identifier, const char* text, int numLangs, int* resultCount) { - NNetLanguageIdentifier* nativeIdentifier = static_cast(identifier); - auto nativeResults = nativeIdentifier->FindTopNMostFreqLangs(text, numLangs); - - *resultCount = static_cast(nativeResults.size()); - PredictionResult* result = new PredictionResult[*resultCount]; - for (int i = 0; i < *resultCount; ++i) { - result[i].language = strdup(nativeResults[i].language.c_str()); - result[i].probability = nativeResults[i].probability; - result[i].is_reliable = nativeResults[i].is_reliable; - result[i].proportion = nativeResults[i].proportion; + EXPORT void cld3_destroy_prediction_result(PredictionResult* results, int count) { + for (int i = 0; i < count; ++i) { + free((void*)results[i].language); + } + delete[] results; } - return result; } -void FreeResults(PredictionResult* results, int count) { - for (int i = 0; i < count; ++i) { - free((void*)results[i].language); - } - delete[] results; -} \ No newline at end of file diff --git a/src/LanguageIdentification.CLD3.Native/Native/src/binding.h b/src/LanguageIdentification.CLD3.Native/Native/src/binding.h index 7e2ba7a6..0abea8b8 100644 --- a/src/LanguageIdentification.CLD3.Native/Native/src/binding.h +++ b/src/LanguageIdentification.CLD3.Native/Native/src/binding.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include "base.h" #include "nnet_language_identifier.h" @@ -6,18 +6,17 @@ using namespace std; #ifndef EXPORT -# ifdef __linux__ +# if defined(_WIN32) || defined(_WIN64) +# define EXPORT __declspec(dllexport) +# elif defined(__GNUC__) || defined(__clang__) # define EXPORT __attribute__((visibility("default"))) # else -# if defined(_MSC_VER) -# define EXPORT __declspec(dllexport) -# else -# define EXPORT __attribute__((visibility("default"))) -# endif +# define EXPORT # endif #endif -extern "C" { +extern "C" +{ struct PredictionResult { const char* language; double probability; @@ -25,9 +24,9 @@ extern "C" { double proportion; }; - EXPORT void* CreateIdentifier(int minNumBytes, int maxNumBytes); - EXPORT void FreeIdentifier(void* identifier); - EXPORT PredictionResult FindLanguage(void* identifier, const char* text); - EXPORT PredictionResult* FindLanguages(void* identifier, const char* text, int numLangs, int* resultCount); - EXPORT void FreeResults(PredictionResult* results, int count); + EXPORT void* create_cld3(int minNumBytes, int maxNumBytes); + EXPORT void destroy_cld3(void* identifier); + EXPORT PredictionResult* cld3_find_language(void* identifier, const char* text, int* resultCount); + EXPORT PredictionResult* cld3_find_languages(void* identifier, const char* text, int numLangs, int* resultCount); + EXPORT void cld3_destroy_prediction_result(PredictionResult* results, int count); } \ No newline at end of file diff --git a/src/LanguageIdentification.CLD3.Native/Native/toolchain-mingw.cmake b/src/LanguageIdentification.CLD3.Native/Native/toolchain-mingw.cmake new file mode 100644 index 00000000..94e97c6b --- /dev/null +++ b/src/LanguageIdentification.CLD3.Native/Native/toolchain-mingw.cmake @@ -0,0 +1,16 @@ +set(CMAKE_SYSTEM_NAME Windows) +set(TOOLCHAIN_PREFIX x86_64-w64-mingw32) + +# cross compilers to use for C, C++ and Fortran +set(CMAKE_C_COMPILER ${TOOLCHAIN_PREFIX}-gcc) +set(CMAKE_CXX_COMPILER ${TOOLCHAIN_PREFIX}-g++) +set(CMAKE_Fortran_COMPILER ${TOOLCHAIN_PREFIX}-gfortran) +set(CMAKE_RC_COMPILER ${TOOLCHAIN_PREFIX}-windres) + +# target environment on the build host system +set(CMAKE_FIND_ROOT_PATH /usr/${TOOLCHAIN_PREFIX}) + +# modify default behavior of FIND_XXX() commands +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) \ No newline at end of file diff --git a/src/LanguageIdentification.CLD3.Native/Native/toolchain-osxcross.cmake b/src/LanguageIdentification.CLD3.Native/Native/toolchain-osxcross.cmake new file mode 100644 index 00000000..6e1223e1 --- /dev/null +++ b/src/LanguageIdentification.CLD3.Native/Native/toolchain-osxcross.cmake @@ -0,0 +1,7 @@ +set(CMAKE_SYSTEM_NAME Darwin) +set(CMAKE_SYSTEM_VERSION 1) + +# Path to the osxcross toolchain binaries +set(CMAKE_OSX_SYSROOT /usr/local/osxcross/target) +set(CMAKE_C_COMPILER /usr/local/osxcross/bin/o64-clang) +set(CMAKE_CXX_COMPILER /usr/local/osxcross/bin/o64-clang++) \ No newline at end of file diff --git a/src/LanguageIdentification.CLD3.Native/Scripts/run-build.macos.sh b/src/LanguageIdentification.CLD3.Native/Scripts/run-build.macos.sh new file mode 100644 index 00000000..cd169515 --- /dev/null +++ b/src/LanguageIdentification.CLD3.Native/Scripts/run-build.macos.sh @@ -0,0 +1,49 @@ +#!/bin/bash +set -e + +if [ -z "$1" ]; then + echo "Error: No architecture specified." + echo "Usage: $0 " + exit 1 +fi + +ARCH=$1 + +if [[ "$ARCH" != "x86_64" && "$ARCH" != "arm64" ]]; then + echo "Error: Invalid architecture specified. Use 'x86_64' or 'arm64'." + exit 1 +fi + +echo "Hello world $ARCH"; + +brew install llvm +npm install -g zx + +workspace="build_temp" + +mkdir "$workspace" -p +cp -a ../../third_party/cld3/. $workspace/. +cp -a Native/. $workspace + +ls -R . + +cd "$workspace" + +zx ../Native/monkey-patch.mjs + +mkdir build +cd build + +echo "Build for MacOS on $ARCH"; +rm -rf * +cmake -DCMAKE_OSX_ARCHITECTURES=$ARCH .. +make -j $(sysctl -n hw.logicalcpu) + +ls -R + +otool -L libcld3.dylib +cp libcld3.dylib ../../libcld3.$ARCH.dylib + +# Clean up +rm -rf "$workspace" +echo "Goodbye world"; diff --git a/src/LanguageIdentification.CLD3.Native/Scripts/run-build.sh b/src/LanguageIdentification.CLD3.Native/Scripts/run-build.sh index facae322..ef0e887b 100644 --- a/src/LanguageIdentification.CLD3.Native/Scripts/run-build.sh +++ b/src/LanguageIdentification.CLD3.Native/Scripts/run-build.sh @@ -13,25 +13,41 @@ ls -R . cd "$workspace" +curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.5/install.sh | bash +export NVM_DIR="$HOME/.nvm" +[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh" +nvm install 22 +nvm use 22 +npm install -g zx + zx ../Native/monkey-patch.mjs mkdir build cd build + +# Build for Linux +rm -rf * cmake .. make -j $(nproc) # make ./language_identifier_main # run tests ./language_identifier_features_test # run tests -cd .. - -echo $(pwd) -ls -R build -cd .. +ls -R -find "$workspace/build" -name "libcld3.so" -exec cp {} libcld3.so \; -rm -rf "$workspace" ldd libcld3.so +cp libcld3.so ../../libcld3.so +# Build for Windows +rm -rf * +cmake .. -DCMAKE_TOOLCHAIN_FILE=./toolchain-mingw.cmake +make -j $(nproc) # make + +ls -R + +cp libcld3.dll ../../libcld3.dll + +# Clean up +rm -rf "$workspace" echo "Goodbye world"; diff --git a/src/LanguageIdentification.CLD3.Native/Scripts/setup-build.sh b/src/LanguageIdentification.CLD3.Native/Scripts/setup-build.sh index f54c7eb6..324f3894 100644 --- a/src/LanguageIdentification.CLD3.Native/Scripts/setup-build.sh +++ b/src/LanguageIdentification.CLD3.Native/Scripts/setup-build.sh @@ -3,14 +3,11 @@ set -e echo "Installing build packages"; -apt-get update && apt-get install -y ca-certificates curl gnupg && \ - curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \ - echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list - apt -y update +apt -y install curl dirmngr apt-transport-https lsb-release ca-certificates apt -y install cmake apt -y install g++ -apt -y install nodejs +apt -y install gcc-mingw-w64-x86-64 g++-mingw-w64-x86-64 + -npm install -g zx diff --git a/src/LanguageIdentification.CLD3/CLD3Detector.cs b/src/LanguageIdentification.CLD3/CLD3Detector.cs index 5b5a1d9e..195c80d6 100644 --- a/src/LanguageIdentification.CLD3/CLD3Detector.cs +++ b/src/LanguageIdentification.CLD3/CLD3Detector.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Generic; +using System.Drawing; using System.Linq; using System.Runtime.InteropServices; using System.Threading; @@ -17,17 +18,29 @@ public class CLD3Detector : IDisposable public CLD3Detector(int minNumBytes, int maxNumBytes) { - if (!RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) + if (!IsSupported()) { throw new NotSupportedException( $"{nameof(CLD3Detector)} is not yet supported on {RuntimeInformation.RuntimeIdentifier}" ); } - _identifier = CLD3DetectorWrapper.CreateIdentifier(minNumBytes, maxNumBytes); + _identifier = CLD3DetectorWrapper.CreateCLD3(minNumBytes, maxNumBytes); _semaphore = new(1, 1); } + public static bool IsSupported() + { + return RuntimeInformation.OSArchitecture switch + { + Architecture.X64 when RuntimeInformation.IsOSPlatform(OSPlatform.Linux) => true, + Architecture.X64 when RuntimeInformation.IsOSPlatform(OSPlatform.Windows) => true, + //Architecture.X64 when RuntimeInformation.IsOSPlatform(OSPlatform.OSX) => true, + //Architecture.Arm64 when RuntimeInformation.IsOSPlatform(OSPlatform.OSX) => true, + _ => false, + }; + } + public void Dispose() { GC.SuppressFinalize(this); @@ -35,7 +48,7 @@ public void Dispose() try { _semaphore.Wait(); - CLD3DetectorWrapper.FreeIdentifier(_identifier); + CLD3DetectorWrapper.DestroyCLD3(_identifier); } finally { @@ -50,8 +63,29 @@ public void Dispose() /// List of language predictions public CLD3Prediction PredictLanguage(string text) { - var result = CLD3DetectorWrapper.FindLanguage(_identifier, text); - return new CLD3Prediction(result); + var resultPtr = CLD3DetectorWrapper.PredictLanguage( + identifier: _identifier, + text: text, + resultCount: out var resultCount + ); + + try + { + var nativeResult = new CLD3PredictionResult[resultCount]; + var structSize = Marshal.SizeOf(typeof(CLD3PredictionResult)); + + for (var i = 0; i < resultCount; i++) + { + nativeResult[i] = Marshal.PtrToStructure(resultPtr + i * structSize); + } + + var firstItem = nativeResult.First(); + return new CLD3Prediction(firstItem); + } + finally + { + CLD3DetectorWrapper.DestroyPredictionResult(resultPtr, resultCount); + } } /// @@ -65,7 +99,7 @@ public IEnumerable PredictLanguages( int count ) { - var resultPtr = CLD3DetectorWrapper.FindLanguages( + var resultPtr = CLD3DetectorWrapper.PredictLanguages( identifier: _identifier, text: text, numLangs: count, @@ -74,22 +108,22 @@ int count try { - var result = new CLD3PredictionResult[resultCount]; + var nativeResult = new CLD3PredictionResult[resultCount]; var structSize = Marshal.SizeOf(typeof(CLD3PredictionResult)); for (var i = 0; i < resultCount; i++) { - result[i] = Marshal.PtrToStructure(resultPtr + i * structSize); + nativeResult[i] = Marshal.PtrToStructure(resultPtr + i * structSize); } - return result + return nativeResult .OrderByDescending(x => x.Probability) .Select(x => new CLD3Prediction(x)) .ToArray(); } finally { - CLD3DetectorWrapper.FreeResults(resultPtr, resultCount); + CLD3DetectorWrapper.DestroyPredictionResult(resultPtr, resultCount); } } } diff --git a/src/LanguageIdentification.CLD3/Internal/CLD3DetectorWrapper.cs b/src/LanguageIdentification.CLD3/Internal/CLD3DetectorWrapper.cs index 1d856609..9fc9e0de 100644 --- a/src/LanguageIdentification.CLD3/Internal/CLD3DetectorWrapper.cs +++ b/src/LanguageIdentification.CLD3/Internal/CLD3DetectorWrapper.cs @@ -6,20 +6,29 @@ namespace Panlingo.LanguageIdentification.CLD3.Internal { internal static class CLD3DetectorWrapper { - [DllImport(CLD3NativeLibrary.Name, CallingConvention = CallingConvention.Cdecl)] - public static extern IntPtr CreateIdentifier(int minNumBytes, int maxNumBytes); + [DllImport(CLD3NativeLibrary.Name, EntryPoint = "create_cld3", CallingConvention = CallingConvention.Cdecl)] + public static extern IntPtr CreateCLD3(int minNumBytes, int maxNumBytes); - [DllImport(CLD3NativeLibrary.Name, CallingConvention = CallingConvention.Cdecl)] - public static extern void FreeIdentifier(IntPtr identifier); + [DllImport(CLD3NativeLibrary.Name, EntryPoint = "destroy_cld3", CallingConvention = CallingConvention.Cdecl)] + public static extern void DestroyCLD3(IntPtr identifier); - [DllImport(CLD3NativeLibrary.Name, CallingConvention = CallingConvention.Cdecl)] - public static extern CLD3PredictionResult FindLanguage(IntPtr identifier, string text); + [DllImport(CLD3NativeLibrary.Name, EntryPoint = "cld3_find_language", CallingConvention = CallingConvention.Cdecl)] + public static extern IntPtr PredictLanguage( + IntPtr identifier, + [MarshalAs(UnmanagedType.LPUTF8Str)] string text, + out int resultCount + ); - [DllImport(CLD3NativeLibrary.Name, CallingConvention = CallingConvention.Cdecl)] - public static extern IntPtr FindLanguages(IntPtr identifier, string text, int numLangs, out int resultCount); + [DllImport(CLD3NativeLibrary.Name, EntryPoint = "cld3_find_languages", CallingConvention = CallingConvention.Cdecl)] + public static extern IntPtr PredictLanguages( + IntPtr identifier, + [MarshalAs(UnmanagedType.LPUTF8Str)] string text, + int numLangs, + out int resultCount + ); - [DllImport(CLD3NativeLibrary.Name, CallingConvention = CallingConvention.Cdecl)] - public static extern void FreeResults(IntPtr results, int count); + [DllImport(CLD3NativeLibrary.Name, EntryPoint = "cld3_destroy_prediction_result", CallingConvention = CallingConvention.Cdecl)] + public static extern void DestroyPredictionResult(IntPtr results, int count); } } diff --git a/src/LanguageIdentification.CLD3/LanguageIdentification.CLD3.csproj b/src/LanguageIdentification.CLD3/LanguageIdentification.CLD3.csproj index 4e6828bc..2f715fe8 100644 --- a/src/LanguageIdentification.CLD3/LanguageIdentification.CLD3.csproj +++ b/src/LanguageIdentification.CLD3/LanguageIdentification.CLD3.csproj @@ -2,7 +2,7 @@ net5.0;net6.0;net7.0;net8.0 - 0.0.0.21 + 0.1.0 Panlingo.LanguageIdentification.CLD3 Panlingo.LanguageIdentification.CLD3 Panlingo.LanguageIdentification.CLD3 @@ -15,6 +15,9 @@ nlp lid language-identification language-detection cld3 README_CLD3.md +0.1.0.0 +- Windows support + 0.0.0.20: - Protobuf is not required now! diff --git a/src/LanguageIdentification.FastText.ConsoleTest/Program.cs b/src/LanguageIdentification.FastText.ConsoleTest/Program.cs index fc018ec3..2e294170 100644 --- a/src/LanguageIdentification.FastText.ConsoleTest/Program.cs +++ b/src/LanguageIdentification.FastText.ConsoleTest/Program.cs @@ -4,7 +4,7 @@ internal class Program { static void Main(string[] args) { - var text = "Hello, how are you? Привіт, як справи? Привет, как дела?"; + var text = "Привіт, як справи?"; using var fastText = new FastTextDetector(); diff --git a/src/LanguageIdentification.FastText.ConsoleTest/Properties/launchSettings.json b/src/LanguageIdentification.FastText.ConsoleTest/Properties/launchSettings.json index 65b8965a..1f434ec6 100644 --- a/src/LanguageIdentification.FastText.ConsoleTest/Properties/launchSettings.json +++ b/src/LanguageIdentification.FastText.ConsoleTest/Properties/launchSettings.json @@ -2,6 +2,13 @@ "profiles": { "Docker": { "commandName": "Docker" + }, + "WSL": { + "commandName": "WSL2", + "distributionName": "" + }, + "Project": { + "commandName": "Project" } } } \ No newline at end of file diff --git a/src/LanguageIdentification.FastText.Native/.gitignore b/src/LanguageIdentification.FastText.Native/.gitignore index 01a24561..d15bc74b 100644 --- a/src/LanguageIdentification.FastText.Native/.gitignore +++ b/src/LanguageIdentification.FastText.Native/.gitignore @@ -1,2 +1,5 @@ libfasttext.so +fasttext.dll +libfasttext.arm64.dylib +libfasttext.x86_64.dylib build_temp/** diff --git a/src/LanguageIdentification.FastText.Native/Dockerfile b/src/LanguageIdentification.FastText.Native/Dockerfile index 2948215c..2a0e5655 100644 --- a/src/LanguageIdentification.FastText.Native/Dockerfile +++ b/src/LanguageIdentification.FastText.Native/Dockerfile @@ -1,5 +1,4 @@ -FROM mcr.microsoft.com/dotnet/sdk:8.0 AS build -ARG BUILD_CONFIGURATION=Release +FROM debian:bullseye-slim AS build WORKDIR /repo COPY ["src/LanguageIdentification.FastText.Native/LanguageIdentification.FastText.Native.csproj", "src/LanguageIdentification.FastText.Native/"] diff --git a/src/LanguageIdentification.FastText.Native/FastTextNativeLibrary.cs b/src/LanguageIdentification.FastText.Native/FastTextNativeLibrary.cs index a0688250..064f5cae 100644 --- a/src/LanguageIdentification.FastText.Native/FastTextNativeLibrary.cs +++ b/src/LanguageIdentification.FastText.Native/FastTextNativeLibrary.cs @@ -5,7 +5,7 @@ public class FastTextNativeLibrary /// /// Name of native binary /// - public const string Name = "libfasttext.so"; + public const string Name = "fasttext"; } } diff --git a/src/LanguageIdentification.FastText.Native/LanguageIdentification.FastText.Native.csproj b/src/LanguageIdentification.FastText.Native/LanguageIdentification.FastText.Native.csproj index 375f561b..197a8b72 100644 --- a/src/LanguageIdentification.FastText.Native/LanguageIdentification.FastText.Native.csproj +++ b/src/LanguageIdentification.FastText.Native/LanguageIdentification.FastText.Native.csproj @@ -2,7 +2,7 @@ netstandard2.1 - 0.0.0.21 + 0.1.0 Panlingo.LanguageIdentification.FastText.Native Panlingo.LanguageIdentification.FastText.Native Panlingo.LanguageIdentification.FastText.Native @@ -62,6 +62,36 @@ + + + PreserveNewest + true + runtimes/win-x64/native + true + false + + + + + + PreserveNewest + true + runtimes/osx-x64/native/libfasttext.dylib + true + false + + + + + + PreserveNewest + true + runtimes/osx-arm64/native/libfasttext.dylib + true + false + + + diff --git a/src/LanguageIdentification.FastText.Native/Native/CMakeLists.txt b/src/LanguageIdentification.FastText.Native/Native/CMakeLists.txt index 0fe25023..9dfc83c8 100644 --- a/src/LanguageIdentification.FastText.Native/Native/CMakeLists.txt +++ b/src/LanguageIdentification.FastText.Native/Native/CMakeLists.txt @@ -1,15 +1,41 @@ -cmake_minimum_required(VERSION 2.8 FATAL_ERROR) -project(fasttext_bridge) +cmake_minimum_required(VERSION 3.10 FATAL_ERROR) -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17") -if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") +project(fasttext) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_C_STANDARD 99) +set(CMAKE_C_STANDARD_REQUIRED ON) + +if (CMAKE_CXX_COMPILER_ID MATCHES "GNU") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99 -pthread -funroll-loops -O3") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -pthread -funroll-loops -O3") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -lpthread") +elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99 -pthread -funroll-loops -O3") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -pthread -funroll-loops -O3") + + if (CMAKE_OSX_ARCHITECTURES MATCHES "arm64") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -arch arm64 -target arm64-apple-macos11") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -arch arm64 -target arm64-apple-macos11") + elseif (CMAKE_OSX_ARCHITECTURES MATCHES "x86_64") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -arch x86_64 -target x86_64-apple-macos10.15") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -arch x86_64 -target x86_64-apple-macos10.15") + endif() +elseif (CMAKE_CXX_COMPILER_ID MATCHES "MSVC") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /W3 /O2 /fp:precise /arch:AVX2") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W3 /O2 /fp:precise /arch:AVX2") endif() set(CMAKE_MACOSX_RPATH 1) set(CMAKE_POSITION_INDEPENDENT_CODE ON) +message(STATUS "CMake version: ${CMAKE_VERSION}") +message(STATUS "C++ compiler: ${CMAKE_CXX_COMPILER_ID}") +message(STATUS "C compiler: ${CMAKE_C_COMPILER_ID}") +message(STATUS "CXX flags: ${CMAKE_CXX_FLAGS}") +message(STATUS "C flags: ${CMAKE_C_FLAGS}") + add_subdirectory(fasttext EXCLUDE_FROM_ALL) include_directories( @@ -21,7 +47,24 @@ set(SOURCES ${PROJECT_SOURCE_DIR}/binding.cc) add_library(objlib OBJECT ${SOURCES}) -add_library(fasttext SHARED $) -set_target_properties(fasttext PROPERTIES PUBLIC_HEADER ${PROJECT_SOURCE_DIR}/binding.h) -target_link_libraries(fasttext fasttext-static_pic) +add_library(${PROJECT_NAME} SHARED $) +set_target_properties(${PROJECT_NAME} PROPERTIES PUBLIC_HEADER ${PROJECT_SOURCE_DIR}/binding.h) + +if (APPLE) + if (CMAKE_OSX_ARCHITECTURES MATCHES "arm64") + set(CLANG_LIB_DIR /opt/homebrew/opt/llvm/lib/c++) + elseif (CMAKE_OSX_ARCHITECTURES MATCHES "x86_64") + set(CLANG_LIB_DIR /usr/local/opt/llvm/lib/c++) + endif() + target_link_libraries(${PROJECT_NAME} fasttext-static_pic + -nostdlib++ + -Wl,${CLANG_LIB_DIR}/libc++.a + -Wl,${CLANG_LIB_DIR}/libc++abi.a) +else() + if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + target_link_libraries(${PROJECT_NAME} fasttext-static_pic) + elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows") + target_link_libraries(${PROJECT_NAME} fasttext-static_pic -static-libgcc -static-libstdc++) + endif() +endif() diff --git a/src/LanguageIdentification.FastText.Native/Native/binding.cc b/src/LanguageIdentification.FastText.Native/Native/binding.cc index c2f269ba..af7f58e6 100644 --- a/src/LanguageIdentification.FastText.Native/Native/binding.cc +++ b/src/LanguageIdentification.FastText.Native/Native/binding.cc @@ -18,30 +18,31 @@ extern "C" { *err_ptr = strdup(e.what()); } - void DestroyString(char* s) { + EXPORT void DestroyString(char* s) { if (s != nullptr) { free(s); } } - fasttext_t* CreateFastText(void) { + EXPORT fasttext_t* CreateFastText(void) { return (fasttext_t*)(new FastTextExtension()); } - void DestroyFastText(fasttext_t* handle) { + EXPORT void DestroyFastText(fasttext_t* handle) { FastTextExtension* x = (FastTextExtension*)handle; delete x; } - void FastTextLoadModel(fasttext_t* handle, const char* filename, char** err_ptr) { + EXPORT void FastTextLoadModel(fasttext_t* handle, const char* filename, char** err_ptr) { try { ((FastTextExtension*)handle)->loadModel(filename); - } catch (const std::invalid_argument& e) { + } + catch (const std::invalid_argument& e) { save_error(err_ptr, e); } } - void FastTextLoadModelData(fasttext_t* handle, const char* buffer, size_t buffer_length, char** err_ptr) { + EXPORT void FastTextLoadModelData(fasttext_t* handle, const char* buffer, size_t buffer_length, char** err_ptr) { try { ((FastTextExtension*)handle)->loadModelData(buffer, buffer_length); } @@ -50,16 +51,17 @@ extern "C" { } } - int FastTextGetModelDimensions(fasttext_t* handle) { + EXPORT int FastTextGetModelDimensions(fasttext_t* handle) { return ((FastTextExtension*)handle)->getDimension(); } - fasttext_predictions_t* FastTextPredict(fasttext_t* handle, const char* text, int32_t k, float threshold, char** err_ptr) { + EXPORT fasttext_predictions_t* FastTextPredict(fasttext_t* handle, const char* text, int32_t k, float threshold, char** err_ptr) { std::vector> predictions; std::stringstream ioss(text); try { ((FastTextExtension*)handle)->predictLine(ioss, predictions, k, threshold); - } catch (const std::invalid_argument& e) { + } + catch (const std::invalid_argument& e) { save_error(err_ptr, e); return nullptr; } @@ -75,7 +77,7 @@ extern "C" { return ret; } - void DestroyPredictions(fasttext_predictions_t* predictions) { + EXPORT void DestroyPredictions(fasttext_predictions_t* predictions) { if (predictions == nullptr) { return; } @@ -87,7 +89,7 @@ extern "C" { free(predictions); } - fasttext_labels_t* FastTextGetLabels(fasttext_t* handle) { + EXPORT fasttext_labels_t* FastTextGetLabels(fasttext_t* handle) { std::shared_ptr d = ((FastTextExtension*)handle)->getDictionary(); std::vector labels_freq = d->getCounts(fasttext::entry_type::label); size_t len = labels_freq.size(); @@ -106,7 +108,7 @@ extern "C" { return ret; } - void DestroyLabels(fasttext_labels_t* labels) { + EXPORT void DestroyLabels(fasttext_labels_t* labels) { if (labels == nullptr) { return; } @@ -118,18 +120,18 @@ extern "C" { free(labels); } - void FastTextAbort(fasttext_t* handle) { + EXPORT void FastTextAbort(fasttext_t* handle) { ((FastTextExtension*)handle)->abort(); } - fasttext_tokens_t* FastTextTokenize(fasttext_t* handle, const char* text) { + EXPORT fasttext_tokens_t* FastTextTokenize(fasttext_t* handle, const char* text) { std::vector text_split; std::shared_ptr d = ((FastTextExtension*)handle)->getDictionary(); std::stringstream ioss(text); std::string token; while (!ioss.eof()) { while (d->readWord(ioss, token)) { - text_split.push_back(token); + text_split.push_back(token); } } size_t len = text_split.size(); @@ -143,7 +145,7 @@ extern "C" { return ret; } - void DestroyTokens(fasttext_tokens_t* tokens) { + EXPORT void DestroyTokens(fasttext_tokens_t* tokens) { for (size_t i = 0; i < tokens->length; i++) { free(tokens->tokens[i]); } diff --git a/src/LanguageIdentification.FastText.Native/Native/binding.h b/src/LanguageIdentification.FastText.Native/Native/binding.h index ad4909f6..a9170377 100644 --- a/src/LanguageIdentification.FastText.Native/Native/binding.h +++ b/src/LanguageIdentification.FastText.Native/Native/binding.h @@ -6,14 +6,16 @@ #ifndef EXPORT # if defined(_WIN32) || defined(_WIN64) -# define EXPORT __declspec(dllimport) +# define EXPORT __declspec(dllexport) +# elif defined(__GNUC__) || defined(__clang__) +# define EXPORT __attribute__((visibility("default"))) # else -# define EXPORT extern +# define EXPORT # endif #endif -extern "C" { - +extern "C" +{ typedef struct fasttext_t fasttext_t; typedef struct { diff --git a/src/LanguageIdentification.FastText.Native/Scripts/run-build.macos.sh b/src/LanguageIdentification.FastText.Native/Scripts/run-build.macos.sh new file mode 100644 index 00000000..dcf0d7b0 --- /dev/null +++ b/src/LanguageIdentification.FastText.Native/Scripts/run-build.macos.sh @@ -0,0 +1,46 @@ +#!/bin/bash +set -e + +if [ -z "$1" ]; then + echo "Error: No architecture specified." + echo "Usage: $0 " + exit 1 +fi + +ARCH=$1 + +if [[ "$ARCH" != "x86_64" && "$ARCH" != "arm64" ]]; then + echo "Error: Invalid architecture specified. Use 'x86_64' or 'arm64'." + exit 1 +fi + +echo "Hello world $ARCH"; + +brew install llvm + +workspace="build_temp" + +mkdir "$workspace" -p +cp -a ../../third_party/fastText/. $workspace/fasttext +cp -a Native/. $workspace + +ls -R . + +cd "$workspace" + +mkdir build +cd build + +echo "Build for MacOS on $ARCH"; +rm -rf * +cmake -DCMAKE_OSX_ARCHITECTURES=$ARCH .. +make -j $(sysctl -n hw.logicalcpu) + +ls -R + +otool -L libfasttext.dylib +cp libfasttext.dylib ../../libfasttext.$ARCH.dylib + +# Clean up +rm -rf "$workspace" +echo "Goodbye world"; diff --git a/src/LanguageIdentification.FastText.Native/Scripts/run-build.ps1 b/src/LanguageIdentification.FastText.Native/Scripts/run-build.ps1 new file mode 100644 index 00000000..c34c510c --- /dev/null +++ b/src/LanguageIdentification.FastText.Native/Scripts/run-build.ps1 @@ -0,0 +1,45 @@ +# PowerShell equivalent script +# Ensure script stops if any command fails +$ErrorActionPreference = "Stop" + +Write-Output "Hello world" + +$workspace = "build_temp" + +# Create directory if it doesn't exist +if (-Not (Test-Path $workspace)) { + New-Item -Path $workspace -ItemType Directory + New-Item -Path "$workspace/fasttext" -ItemType Directory +} + +# Copy directories +Copy-Item -Path "../../third_party/fastText/*" -Destination "$workspace/fasttext" -Recurse -Force +Copy-Item -Path "Native/*" -Destination $workspace -Recurse -Force + +# List directory contents recursively +Get-ChildItem -Recurse -Path . + +Set-Location $workspace + +# Create and enter build directory +if (-Not (Test-Path "build")) { + New-Item -Path "build" -ItemType Directory +} +Set-Location "build" + +# Build for Windows +cmake .. +cmake --build . + +# List directory contents recursively +Get-ChildItem -Recurse -Path . + +# Display shared library dependencies +Copy-Item -Path ".\Debug\fasttext.dll" -Destination "../../fasttext.dll" + +# List directory contents recursively +Get-ChildItem -Recurse -Path . + +# Clean up +cd ../.. +Write-Output "Goodbye world" \ No newline at end of file diff --git a/src/LanguageIdentification.FastText.Native/Scripts/run-build.sh b/src/LanguageIdentification.FastText.Native/Scripts/run-build.sh index 6e001a81..e069b375 100644 --- a/src/LanguageIdentification.FastText.Native/Scripts/run-build.sh +++ b/src/LanguageIdentification.FastText.Native/Scripts/run-build.sh @@ -15,17 +15,17 @@ cd "$workspace" mkdir build cd build + +# Build for Linux +rm -rf * cmake .. make -j $(nproc) # make -cd .. -echo $(pwd) -ls -R build -cd .. +ls -R -find "$workspace/build" -name "libfasttext.so" -exec cp {} libfasttext.so \; -rm -rf "$workspace" ldd libfasttext.so +cp libfasttext.so ../../libfasttext.so +# Clean up +rm -rf "$workspace" echo "Goodbye world"; - diff --git a/src/LanguageIdentification.FastText/FastTextDetector.cs b/src/LanguageIdentification.FastText/FastTextDetector.cs index abdce527..5c7f4181 100644 --- a/src/LanguageIdentification.FastText/FastTextDetector.cs +++ b/src/LanguageIdentification.FastText/FastTextDetector.cs @@ -18,7 +18,7 @@ public class FastTextDetector : IDisposable public FastTextDetector() { - if (!RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) + if (!IsSupported()) { throw new NotSupportedException( $"{nameof(FastTextDetector)} is not yet supported on {RuntimeInformation.RuntimeIdentifier}" @@ -29,6 +29,18 @@ public FastTextDetector() _semaphore = new SemaphoreSlim(1, 1); } + public static bool IsSupported() + { + return RuntimeInformation.OSArchitecture switch + { + Architecture.X64 when RuntimeInformation.IsOSPlatform(OSPlatform.Linux) => true, + Architecture.X64 when RuntimeInformation.IsOSPlatform(OSPlatform.Windows) => true, + Architecture.X64 when RuntimeInformation.IsOSPlatform(OSPlatform.OSX) => true, + Architecture.Arm64 when RuntimeInformation.IsOSPlatform(OSPlatform.OSX) => true, + _ => false, + }; + } + public string ModelPath { get; private set; } = string.Empty; /// @@ -153,10 +165,11 @@ ref errPtr var predictions = Marshal.PtrToStructure(predictionPtr); var result = new List(); - for (ulong i = 0; i < predictions.Length; i++) + var structSize = Marshal.SizeOf(); + + for (var i = 0; i < (int)predictions.Length; i++) { - IntPtr elementPtr = new IntPtr(predictions.Predictions.ToInt64() + (long)(i * (uint)Marshal.SizeOf())); - var prediction = Marshal.PtrToStructure(elementPtr); + var prediction = Marshal.PtrToStructure(predictions.Predictions + i * structSize); var label = DecodeString(prediction.Label); result.Add(new FastTextPrediction( diff --git a/src/LanguageIdentification.FastText/Internal/FastTextDetectorWrapper.cs b/src/LanguageIdentification.FastText/Internal/FastTextDetectorWrapper.cs index f8815d77..3f4756b1 100644 --- a/src/LanguageIdentification.FastText/Internal/FastTextDetectorWrapper.cs +++ b/src/LanguageIdentification.FastText/Internal/FastTextDetectorWrapper.cs @@ -14,16 +14,22 @@ internal static class FastTextDetectorWrapper public static extern void DestroyFastText(IntPtr handle); [DllImport(FastTextNativeLibrary.Name, CallingConvention = CallingConvention.Cdecl)] - public static extern void FastTextLoadModel(IntPtr handle, string filename, ref IntPtr errptr); + public static extern void FastTextLoadModel(IntPtr handle, string filename, ref IntPtr errPtr); [DllImport(FastTextNativeLibrary.Name, CallingConvention = CallingConvention.Cdecl)] - public static extern void FastTextLoadModelData(IntPtr handle, IntPtr buffer, uint bufferLength, ref IntPtr errptr); + public static extern void FastTextLoadModelData(IntPtr handle, IntPtr buffer, uint bufferLength, ref IntPtr errPtr); [DllImport(FastTextNativeLibrary.Name, CallingConvention = CallingConvention.Cdecl)] public static extern int FastTextGetModelDimensions(IntPtr handle); [DllImport(FastTextNativeLibrary.Name, CallingConvention = CallingConvention.Cdecl)] - public static extern IntPtr FastTextPredict(IntPtr handle, string text, int k, float threshold, ref IntPtr errptr); + public static extern IntPtr FastTextPredict( + IntPtr handle, + [MarshalAs(UnmanagedType.LPUTF8Str)] string text, + int k, + float threshold, + ref IntPtr errPtr + ); [DllImport(FastTextNativeLibrary.Name, CallingConvention = CallingConvention.Cdecl)] public static extern void DestroyPredictions(IntPtr predictions); diff --git a/src/LanguageIdentification.FastText/LanguageIdentification.FastText.csproj b/src/LanguageIdentification.FastText/LanguageIdentification.FastText.csproj index 089e5159..486577bb 100644 --- a/src/LanguageIdentification.FastText/LanguageIdentification.FastText.csproj +++ b/src/LanguageIdentification.FastText/LanguageIdentification.FastText.csproj @@ -2,7 +2,7 @@ net5.0;net6.0;net7.0;net8.0 - 0.0.0.21 + 0.1.0 Panlingo.LanguageIdentification.FastText Panlingo.LanguageIdentification.FastText Panlingo.LanguageIdentification.FastText @@ -15,6 +15,9 @@ nlp lid language-identification language-detection fasttext README_FASTTEXT.md +0.1.0.0 +- Windows and MacOS support + 0.0.0.21: - Default FastText model is included in NuGet package diff --git a/src/LanguageIdentification.Lingua.Native/Dockerfile b/src/LanguageIdentification.Lingua.Native/Dockerfile index f60462e0..b0bbaabc 100644 --- a/src/LanguageIdentification.Lingua.Native/Dockerfile +++ b/src/LanguageIdentification.Lingua.Native/Dockerfile @@ -1,5 +1,4 @@ FROM ubuntu:22.04 AS build -ARG BUILD_CONFIGURATION=Release WORKDIR /repo COPY ["src/LanguageIdentification.Lingua.Native/LanguageIdentification.Lingua.Native.csproj", "src/LanguageIdentification.Lingua.Native/"] diff --git a/src/LanguageIdentification.Lingua.Native/LanguageIdentification.Lingua.Native.csproj b/src/LanguageIdentification.Lingua.Native/LanguageIdentification.Lingua.Native.csproj index 317961a2..a2fa47e8 100644 --- a/src/LanguageIdentification.Lingua.Native/LanguageIdentification.Lingua.Native.csproj +++ b/src/LanguageIdentification.Lingua.Native/LanguageIdentification.Lingua.Native.csproj @@ -2,7 +2,7 @@ netstandard2.1 - 0.0.0.21 + 0.0.0.23 Panlingo.LanguageIdentification.Lingua.Native Panlingo.LanguageIdentification.Lingua.Native Panlingo.LanguageIdentification.Lingua.Native diff --git a/src/LanguageIdentification.Lingua.Native/LinguaNativeLibrary.cs b/src/LanguageIdentification.Lingua.Native/LinguaNativeLibrary.cs index 4019366a..7c6326bb 100644 --- a/src/LanguageIdentification.Lingua.Native/LinguaNativeLibrary.cs +++ b/src/LanguageIdentification.Lingua.Native/LinguaNativeLibrary.cs @@ -5,6 +5,6 @@ public static class LinguaNativeLibrary /// /// Name of native binary /// - public const string Name = "liblingua.so"; + public const string Name = "lingua"; } } diff --git a/src/LanguageIdentification.Lingua/LanguageIdentification.Lingua.csproj b/src/LanguageIdentification.Lingua/LanguageIdentification.Lingua.csproj index 16c4a33b..60341491 100644 --- a/src/LanguageIdentification.Lingua/LanguageIdentification.Lingua.csproj +++ b/src/LanguageIdentification.Lingua/LanguageIdentification.Lingua.csproj @@ -2,7 +2,7 @@ net5.0;net6.0;net7.0;net8.0 - 0.0.0.21 + 0.0.0.23 Panlingo.LanguageIdentification.Lingua Panlingo.LanguageIdentification.Lingua Panlingo.LanguageIdentification.Lingua diff --git a/src/LanguageIdentification.Lingua/LinguaDetector.cs b/src/LanguageIdentification.Lingua/LinguaDetector.cs index 9e39e1f5..3a1808e4 100644 --- a/src/LanguageIdentification.Lingua/LinguaDetector.cs +++ b/src/LanguageIdentification.Lingua/LinguaDetector.cs @@ -16,7 +16,7 @@ public class LinguaDetector : IDisposable internal LinguaDetector(LinguaDetectorBuilder builder) { - if (!RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) + if (!IsSupported()) { throw new NotSupportedException( $"{nameof(LinguaDetector)} is not yet supported on {RuntimeInformation.RuntimeIdentifier}" @@ -30,6 +30,15 @@ internal LinguaDetector(LinguaDetectorBuilder builder) } } + public static bool IsSupported() + { + return RuntimeInformation.OSArchitecture switch + { + Architecture.X64 when RuntimeInformation.IsOSPlatform(OSPlatform.Linux) => true, + _ => false, + }; + } + /// /// Produces a prediction for 'text' /// diff --git a/src/LanguageIdentification.Lingua/LinguaDetectorBuilder.cs b/src/LanguageIdentification.Lingua/LinguaDetectorBuilder.cs index 2e1624ce..70f1221f 100644 --- a/src/LanguageIdentification.Lingua/LinguaDetectorBuilder.cs +++ b/src/LanguageIdentification.Lingua/LinguaDetectorBuilder.cs @@ -14,7 +14,7 @@ public class LinguaDetectorBuilder : IDisposable public LinguaDetectorBuilder(LinguaLanguage[] languages) { - if (!RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) + if (!LinguaDetector.IsSupported()) { throw new NotSupportedException( $"{nameof(LinguaDetector)} is not yet supported on {RuntimeInformation.RuntimeIdentifier}" diff --git a/src/LanguageIdentification.MediaPipe.Native/Dockerfile b/src/LanguageIdentification.MediaPipe.Native/Dockerfile index 078d583f..5fa55755 100644 --- a/src/LanguageIdentification.MediaPipe.Native/Dockerfile +++ b/src/LanguageIdentification.MediaPipe.Native/Dockerfile @@ -1,5 +1,4 @@ FROM ubuntu:22.04 AS build -ARG BUILD_CONFIGURATION=Release WORKDIR /repo COPY ["src/LanguageIdentification.MediaPipe.Native/LanguageIdentification.MediaPipe.Native.csproj", "src/LanguageIdentification.MediaPipe.Native/"] diff --git a/src/LanguageIdentification.MediaPipe.Native/LanguageIdentification.MediaPipe.Native.csproj b/src/LanguageIdentification.MediaPipe.Native/LanguageIdentification.MediaPipe.Native.csproj index 7cee9456..1c747b5b 100644 --- a/src/LanguageIdentification.MediaPipe.Native/LanguageIdentification.MediaPipe.Native.csproj +++ b/src/LanguageIdentification.MediaPipe.Native/LanguageIdentification.MediaPipe.Native.csproj @@ -2,7 +2,7 @@ netstandard2.1 - 0.0.0.21 + 0.0.0.23 Panlingo.LanguageIdentification.MediaPipe.Native Panlingo.LanguageIdentification.MediaPipe.Native Panlingo.LanguageIdentification.MediaPipe.Native diff --git a/src/LanguageIdentification.MediaPipe.Native/MediaPipeNativeLibrary.cs b/src/LanguageIdentification.MediaPipe.Native/MediaPipeNativeLibrary.cs index 44f627e6..74c0cf9a 100644 --- a/src/LanguageIdentification.MediaPipe.Native/MediaPipeNativeLibrary.cs +++ b/src/LanguageIdentification.MediaPipe.Native/MediaPipeNativeLibrary.cs @@ -5,7 +5,7 @@ public class MediaPipeNativeLibrary /// /// Name of native binary /// - public const string Name = "liblanguage_detector.so"; + public const string Name = "language_detector"; /// /// Name of model /// diff --git a/src/LanguageIdentification.MediaPipe/LanguageIdentification.MediaPipe.csproj b/src/LanguageIdentification.MediaPipe/LanguageIdentification.MediaPipe.csproj index e296f9e9..f4d48028 100644 --- a/src/LanguageIdentification.MediaPipe/LanguageIdentification.MediaPipe.csproj +++ b/src/LanguageIdentification.MediaPipe/LanguageIdentification.MediaPipe.csproj @@ -2,7 +2,7 @@ net5.0;net6.0;net7.0;net8.0 - 0.0.0.21 + 0.0.0.23 Panlingo.LanguageIdentification.MediaPipe Panlingo.LanguageIdentification.MediaPipe Panlingo.LanguageIdentification.MediaPipe diff --git a/src/LanguageIdentification.MediaPipe/MediaPipeDetector.cs b/src/LanguageIdentification.MediaPipe/MediaPipeDetector.cs index df87aa36..7a1a8610 100644 --- a/src/LanguageIdentification.MediaPipe/MediaPipeDetector.cs +++ b/src/LanguageIdentification.MediaPipe/MediaPipeDetector.cs @@ -25,7 +25,7 @@ public MediaPipeDetector(int resultCount = -1, float scoreThreshold = 0.0f, stri public MediaPipeDetector(MediaPipeOptions options) { - if (!RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) + if (!IsSupported()) { throw new NotSupportedException( $"{nameof(MediaPipeDetector)} is not yet supported on {RuntimeInformation.RuntimeIdentifier}" @@ -91,6 +91,15 @@ public MediaPipeDetector(MediaPipeOptions options) _semaphore = new SemaphoreSlim(1, 1); } + public static bool IsSupported() + { + return RuntimeInformation.OSArchitecture switch + { + Architecture.X64 when RuntimeInformation.IsOSPlatform(OSPlatform.Linux) => true, + _ => false, + }; + } + public IEnumerable PredictLanguages(string text) { var nativeResult = new LanguageDetectorResult(); diff --git a/src/LanguageIdentification.Tests/CLD2Tests.cs b/src/LanguageIdentification.Tests/CLD2Tests.cs index 870bee44..220b30ab 100644 --- a/src/LanguageIdentification.Tests/CLD2Tests.cs +++ b/src/LanguageIdentification.Tests/CLD2Tests.cs @@ -5,12 +5,14 @@ namespace Panlingo.LanguageIdentification.Tests; public class CLD2Tests { - [Theory] + [SkippableTheory] [InlineData("en", Constants.PHRASE_ENG_1, 0.9999)] [InlineData("uk", Constants.PHRASE_UKR_1, 0.9999)] - [InlineData("un", Constants.PHRASE_RUS_1, 0.9999)] + [InlineData("un", Constants.PHRASE_RUS_1, 0)] public void CLD2SingleLanguage(string languageCode, string text, double score) { + Skip.IfNot(CLD2Detector.IsSupported()); + using var cld2 = new CLD2Detector(); var predictions = cld2.PredictLanguage(text); diff --git a/src/LanguageIdentification.Tests/CLD3Tests.cs b/src/LanguageIdentification.Tests/CLD3Tests.cs index 10c0e662..7f7769c0 100644 --- a/src/LanguageIdentification.Tests/CLD3Tests.cs +++ b/src/LanguageIdentification.Tests/CLD3Tests.cs @@ -5,24 +5,46 @@ namespace Panlingo.LanguageIdentification.Tests; public class CLD3Tests { - [Theory] + [SkippableTheory] [InlineData("en", Constants.PHRASE_ENG_1, 0.9985)] [InlineData("uk", Constants.PHRASE_UKR_1, 0.9992)] [InlineData("ru", Constants.PHRASE_RUS_1, 0.9770)] public void CLD3SingleLanguage(string languageCode, string text, double score) { + Skip.IfNot(CLD3Detector.IsSupported()); + using var cld3 = new CLD3Detector(0, 512); var prediction = cld3.PredictLanguage(text: text); var predictions = cld3.PredictLanguages(text: text, count: 3); - var mainLanguage = predictions.FirstOrDefault(); - if (prediction is null || mainLanguage is null) + if (prediction is null) { throw new NullReferenceException(); } Assert.Equal(languageCode, prediction.Language); + Assert.Equal(score, prediction.Probability, Constants.EPSILON); + } + + [SkippableTheory] + [InlineData("en", Constants.PHRASE_ENG_1, 0.9985)] + [InlineData("uk", Constants.PHRASE_UKR_1, 0.9992)] + [InlineData("ru", Constants.PHRASE_RUS_1, 0.9770)] + public void CLD3MixedLanguage(string languageCode, string text, double score) + { + Skip.IfNot(CLD3Detector.IsSupported()); + + using var cld3 = new CLD3Detector(0, 512); + + var predictions = cld3.PredictLanguages(text: text, count: 3); + var mainLanguage = predictions.FirstOrDefault(); + + if (mainLanguage is null) + { + throw new NullReferenceException(); + } + Assert.Equal(languageCode, mainLanguage.Language); Assert.Equal(score, mainLanguage.Probability, Constants.EPSILON); } diff --git a/src/LanguageIdentification.Tests/FastTextTests.cs b/src/LanguageIdentification.Tests/FastTextTests.cs index f7862221..1d6a6f78 100644 --- a/src/LanguageIdentification.Tests/FastTextTests.cs +++ b/src/LanguageIdentification.Tests/FastTextTests.cs @@ -1,20 +1,24 @@ -using Panlingo.LanguageIdentification.FastText; +using System.Runtime.InteropServices; +using Panlingo.LanguageIdentification.FastText; using Panlingo.LanguageIdentification.Tests.Helpers; namespace Panlingo.LanguageIdentification.Tests; -public class FastTextTests +public class FastTextTests : IAsyncLifetime { - [Theory] + private readonly string _modelPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "models/fasttext176.bin"); + + [SkippableTheory] [InlineData("__label__en", Constants.PHRASE_ENG_1, 0.9955)] [InlineData("__label__uk", Constants.PHRASE_UKR_1, 0.9900)] [InlineData("__label__ru", Constants.PHRASE_RUS_1, 0.9983)] public void FastTextFileSingleLanguage(string languageCode, string text, double score) { + Skip.IfNot(FastTextDetector.IsSupported()); + using var fastText = new FastTextDetector(); - var modelPath = "/models/fasttext176.bin"; - fastText.LoadModel(modelPath); + fastText.LoadModel(_modelPath); var predictions = fastText.Predict(text: text, count: 10); var mainLanguage = predictions.FirstOrDefault(); @@ -28,16 +32,17 @@ public void FastTextFileSingleLanguage(string languageCode, string text, double Assert.Equal(score, mainLanguage.Probability, Constants.EPSILON); } - [Theory] + [SkippableTheory] [InlineData("__label__en", Constants.PHRASE_ENG_1, 0.9955)] [InlineData("__label__uk", Constants.PHRASE_UKR_1, 0.9900)] [InlineData("__label__ru", Constants.PHRASE_RUS_1, 0.9983)] public void FastTextStreamSingleLanguage(string languageCode, string text, double score) { + Skip.IfNot(FastTextDetector.IsSupported()); + using var fastText = new FastTextDetector(); - var modelPath = "/models/fasttext176.bin"; - using var stream = File.Open(modelPath, FileMode.Open); + using var stream = File.Open(_modelPath, FileMode.Open); fastText.LoadModel(stream); @@ -53,12 +58,14 @@ public void FastTextStreamSingleLanguage(string languageCode, string text, doubl Assert.Equal(score, mainLanguage.Probability, Constants.EPSILON); } - [Theory] + [SkippableTheory] [InlineData("__label__en", Constants.PHRASE_ENG_1, 1.0000)] [InlineData("__label__uk", Constants.PHRASE_UKR_1, 0.8511)] [InlineData("__label__ru", Constants.PHRASE_RUS_1, 0.9693)] public void FastTextContainedSingleLanguage(string languageCode, string text, double score) { + Skip.IfNot(FastTextDetector.IsSupported()); + using var fastText = new FastTextDetector(); fastText.LoadDefaultModel(); @@ -74,13 +81,14 @@ public void FastTextContainedSingleLanguage(string languageCode, string text, do Assert.Equal(score, mainLanguage.Probability, Constants.EPSILON); } - [Fact] + [SkippableFact] public void FastTextLabels() { + Skip.IfNot(FastTextDetector.IsSupported()); + using var fastText = new FastTextDetector(); - var modelPath = "/models/fasttext176.bin"; - fastText.LoadModel(modelPath); + fastText.LoadModel(_modelPath); var labels = fastText.GetLabels(); @@ -88,4 +96,23 @@ public void FastTextLabels() Assert.Contains(labels, x => x.Label == "__label__uk"); Assert.Contains(labels, x => x.Label == "__label__ru"); } + + public async Task InitializeAsync() + { + var url = "https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin"; + await FileHelper.DownloadAsync( + path: _modelPath, + url: url + ); + } + + public async Task DisposeAsync() + { + if (File.Exists(_modelPath)) + { + File.Delete(_modelPath); + } + + await Task.CompletedTask; + } } diff --git a/src/LanguageIdentification.Tests/Helpers/FileHelper.cs b/src/LanguageIdentification.Tests/Helpers/FileHelper.cs new file mode 100644 index 00000000..5c20991c --- /dev/null +++ b/src/LanguageIdentification.Tests/Helpers/FileHelper.cs @@ -0,0 +1,24 @@ +namespace Panlingo.LanguageIdentification.Tests.Helpers; + +public class FileHelper +{ + public static async Task DownloadAsync(string path, string url) + { + if (File.Exists(path)) + { + return; + } + + using var client = new HttpClient(); + using var stream = await client.GetStreamAsync(url); + + var directory = Path.GetDirectoryName(path) ?? throw new Exception("No directory"); + if (!Directory.Exists(directory)) + { + Directory.CreateDirectory(directory); + } + + using var file = new FileStream(path, FileMode.OpenOrCreate); + await stream.CopyToAsync(file); + } +} diff --git a/src/LanguageIdentification.Tests/LanguageIdentification.Tests.csproj b/src/LanguageIdentification.Tests/LanguageIdentification.Tests.csproj index 501548f8..8ce25e43 100644 --- a/src/LanguageIdentification.Tests/LanguageIdentification.Tests.csproj +++ b/src/LanguageIdentification.Tests/LanguageIdentification.Tests.csproj @@ -14,10 +14,18 @@ - - - - + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + diff --git a/src/LanguageIdentification.Tests/LinguaTests.cs b/src/LanguageIdentification.Tests/LinguaTests.cs index 6544a915..8c142e1e 100644 --- a/src/LanguageIdentification.Tests/LinguaTests.cs +++ b/src/LanguageIdentification.Tests/LinguaTests.cs @@ -5,12 +5,14 @@ namespace Panlingo.LanguageIdentification.Tests; public class LinguaTests { - [Theory] + [SkippableTheory] [InlineData(LinguaLanguage.English, Constants.PHRASE_ENG_1, 0.1666)] [InlineData(LinguaLanguage.Ukrainian, Constants.PHRASE_UKR_1, 0.8228)] [InlineData(LinguaLanguage.Russian, Constants.PHRASE_RUS_1, 0.3502)] public void LinguaSingleLanguage(LinguaLanguage languageCode, string text, double score) { + Skip.IfNot(LinguaDetector.IsSupported()); + using var linguaBuilder = new LinguaDetectorBuilder(Enum.GetValues()); using var lingua = linguaBuilder.Build(); @@ -26,12 +28,14 @@ public void LinguaSingleLanguage(LinguaLanguage languageCode, string text, doubl Assert.Equal(score, mainLanguage.Confidence, Constants.EPSILON); } - [Theory] + [SkippableTheory] [InlineData(LinguaLanguage.English, Constants.PHRASE_ENG_1, 0.1666)] [InlineData(LinguaLanguage.Ukrainian, Constants.PHRASE_UKR_1, 0.8228)] [InlineData(LinguaLanguage.Russian, Constants.PHRASE_RUS_1, 0.3502)] public void LinguaMixedLanguage(LinguaLanguage languageCode, string text, double score) { + Skip.IfNot(LinguaDetector.IsSupported()); + using var linguaBuilder = new LinguaDetectorBuilder(Enum.GetValues()); using var lingua = linguaBuilder.Build(); @@ -47,7 +51,7 @@ public void LinguaMixedLanguage(LinguaLanguage languageCode, string text, double Assert.Equal(score, mainLanguage.Confidence, Constants.EPSILON); } - [Theory] + [SkippableTheory] [InlineData(LinguaLanguage.Ukrainian, LinguaLanguageCode.Alpha2, "uk")] [InlineData(LinguaLanguage.Ukrainian, LinguaLanguageCode.Alpha3, "ukr")] [InlineData(LinguaLanguage.Hebrew, LinguaLanguageCode.Alpha2, "he")] @@ -56,6 +60,8 @@ public void LinguaMixedLanguage(LinguaLanguage languageCode, string text, double [InlineData(LinguaLanguage.Serbian, LinguaLanguageCode.Alpha3, "srp")] public void LinguaGetLanguageCode(LinguaLanguage language, LinguaLanguageCode type, string code) { + Skip.IfNot(LinguaDetector.IsSupported()); + using var linguaBuilder = new LinguaDetectorBuilder(Enum.GetValues()); using var lingua = linguaBuilder.Build(); @@ -63,9 +69,11 @@ public void LinguaGetLanguageCode(LinguaLanguage language, LinguaLanguageCode ty Assert.Equal(code, languageCode); } - [Fact] + [SkippableFact] public void LinguaBuilderReuse() { + Skip.IfNot(LinguaDetector.IsSupported()); + using var linguaBuilder = new LinguaDetectorBuilder(Enum.GetValues()); using var lingua1 = linguaBuilder.Build(); using var lingua2 = linguaBuilder.Build(); diff --git a/src/LanguageIdentification.Tests/MainTests.cs b/src/LanguageIdentification.Tests/MainTests.cs index 12eb4f88..ea1cdb08 100644 --- a/src/LanguageIdentification.Tests/MainTests.cs +++ b/src/LanguageIdentification.Tests/MainTests.cs @@ -1,14 +1,105 @@ -namespace Panlingo.LanguageIdentification.Tests; +using Microsoft.Build.Construction; + +namespace Panlingo.LanguageIdentification.Tests; public class MainTests { - /// - /// Checks the current OS and container environment - /// [Fact] - public void CheckPlatform() + public void CheckPackageVersion() { - Assert.Equal(PlatformID.Unix, Environment.OSVersion.Platform); - Assert.Equal("true", Environment.GetEnvironmentVariable("DOTNET_RUNNING_IN_CONTAINER")); + Type[] types = [ + typeof(Panlingo.LanguageIdentification.CLD2.CLD2Detector), + typeof(Panlingo.LanguageIdentification.CLD3.CLD3Detector), + typeof(Panlingo.LanguageIdentification.FastText.FastTextDetector), + typeof(Panlingo.LanguageIdentification.Lingua.LinguaDetector), + typeof(Panlingo.LanguageIdentification.MediaPipe.MediaPipeDetector), + typeof(Panlingo.LanguageIdentification.Whatlang.WhatlangDetector), + typeof(Panlingo.LanguageIdentification.CLD2.Native.CLD2NativeLibrary), + typeof(Panlingo.LanguageIdentification.CLD3.Native.CLD3NativeLibrary), + typeof(Panlingo.LanguageIdentification.FastText.Native.FastTextNativeLibrary), + typeof(Panlingo.LanguageIdentification.Lingua.Native.LinguaNativeLibrary), + typeof(Panlingo.LanguageIdentification.MediaPipe.Native.MediaPipeNativeLibrary), + typeof(Panlingo.LanguageIdentification.Whatlang.Native.WhatlangNativeLibrary), + ]; + + var root = AppDomain.CurrentDomain.BaseDirectory; + + var src = root; + while (src != "/") + { + if (Path.GetFileName(src) == "src") + { + break; + } + + src = Path.GetDirectoryName(src) ?? "/"; + } + + var projectFiles = Directory.GetFiles(src, "*.csproj", SearchOption.AllDirectories); + + var packageProjects = new Dictionary(); + + foreach (var projectFile in projectFiles) + { + var projectRootElement = ProjectRootElement.Open(projectFile); + var assemblyName = projectRootElement.Properties.FirstOrDefault(x => x.Name == "AssemblyName"); + var version = projectRootElement.Properties.FirstOrDefault(x => x.Name == "Version"); + + if (assemblyName is null || version is null) + { + continue; + } + + if (string.IsNullOrEmpty(assemblyName.Value) || string.IsNullOrEmpty(version.Value)) + { + continue; + } + + packageProjects[assemblyName.Value] = version.Value; + } + + if (packageProjects.Count == 0) + { + throw new Exception("Projects are not found"); + } + + var assemblies = AppDomain.CurrentDomain.GetAssemblies() + .Where(x => + { + foreach (var type in x.GetTypes()) + { + if (types.Contains(type)) + { + return true; + } + } + + return false; + }) + .ToArray(); + + var assemblyNames = new List(); + var packageNames = new List(); + + foreach (var assembly in assemblies) + { + var assemblyName = assembly.GetName(); + + assemblyNames.Add($"{assemblyName.Name} {assemblyName.Version}"); + if (assemblyName.Name != null && packageProjects.TryGetValue(assemblyName.Name, out var packageVersion)) + { + var a = Version.Parse(packageVersion); + var b = new Version( + major: a.Major != -1 ? a.Major : 0, + minor: a.Minor != -1 ? a.Minor : 0, + build: a.Build != -1 ? a.Build : 0, + revision: a.Revision != -1 ? a.Revision : 0 + ); + + packageNames.Add($"{assemblyName.Name} {b}"); + } + } + + Assert.Equal(packageNames, assemblyNames); } } diff --git a/src/LanguageIdentification.Tests/MediaPipeTests.cs b/src/LanguageIdentification.Tests/MediaPipeTests.cs index c9f698d5..0aaf4a1d 100644 --- a/src/LanguageIdentification.Tests/MediaPipeTests.cs +++ b/src/LanguageIdentification.Tests/MediaPipeTests.cs @@ -1,20 +1,22 @@ -using System.IO; -using Panlingo.LanguageIdentification.MediaPipe; +using Panlingo.LanguageIdentification.MediaPipe; using Panlingo.LanguageIdentification.Tests.Helpers; namespace Panlingo.LanguageIdentification.Tests; -public class MediaPipeTests +public class MediaPipeTests : IAsyncLifetime { - [Theory] + private readonly string _modelPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "models/mediapipe_language_detector.tflite"); + + [SkippableTheory] [InlineData("en", Constants.PHRASE_ENG_1, 0.9994)] [InlineData("uk", Constants.PHRASE_UKR_1, 0.9999)] [InlineData("ru", Constants.PHRASE_RUS_1, 0.9999)] public void MediaPipeFileSingleLanguage(string languageCode, string text, double score) { - var modelPath = "/models/mediapipe_language_detector.tflite"; + Skip.IfNot(MediaPipeDetector.IsSupported()); + using var mediaPipe = new MediaPipeDetector( - options: MediaPipeOptions.FromFile(modelPath).WithResultCount(10) + options: MediaPipeOptions.FromFile(_modelPath).WithResultCount(10) ); var predictions = mediaPipe.PredictLanguages(text: text); @@ -28,15 +30,16 @@ public void MediaPipeFileSingleLanguage(string languageCode, string text, double Assert.Equal(languageCode, mainLanguage.Language); Assert.Equal(score, mainLanguage.Probability, Constants.EPSILON); } - - [Theory] + + [SkippableTheory] [InlineData("en", Constants.PHRASE_ENG_1, 0.9994)] [InlineData("uk", Constants.PHRASE_UKR_1, 0.9999)] [InlineData("ru", Constants.PHRASE_RUS_1, 0.9999)] public void MediaPipeStreamSingleLanguage(string languageCode, string text, double score) { - var modelPath = "/models/mediapipe_language_detector.tflite"; - using var stream = File.Open(modelPath, FileMode.Open); + Skip.IfNot(MediaPipeDetector.IsSupported()); + + using var stream = File.Open(_modelPath, FileMode.Open); using var mediaPipe = new MediaPipeDetector( options: MediaPipeOptions.FromStream(stream).WithResultCount(10) @@ -54,14 +57,15 @@ public void MediaPipeStreamSingleLanguage(string languageCode, string text, doub Assert.Equal(score, mainLanguage.Probability, Constants.EPSILON); } - [Theory] + [SkippableTheory] [InlineData("en", Constants.PHRASE_ENG_1, 0.9994)] [InlineData("uk", Constants.PHRASE_UKR_1, 0.9999)] [InlineData("ru", Constants.PHRASE_RUS_1, 0.9999)] public void MediaPipeContainedSingleLanguage(string languageCode, string text, double score) { - var modelPath = "/models/mediapipe_language_detector.tflite"; - using var stream = File.Open(modelPath, FileMode.Open); + Skip.IfNot(MediaPipeDetector.IsSupported()); + + using var stream = File.Open(_modelPath, FileMode.Open); using var mediaPipe = new MediaPipeDetector( options: MediaPipeOptions.FromDefault().WithResultCount(10) @@ -78,4 +82,23 @@ public void MediaPipeContainedSingleLanguage(string languageCode, string text, d Assert.Equal(languageCode, mainLanguage.Language); Assert.Equal(score, mainLanguage.Probability, Constants.EPSILON); } + + public async Task InitializeAsync() + { + var url = "https://storage.googleapis.com/mediapipe-models/language_detector/language_detector/float32/1/language_detector.tflite"; + await FileHelper.DownloadAsync( + path: _modelPath, + url: url + ); + } + + public async Task DisposeAsync() + { + if (File.Exists(_modelPath)) + { + File.Delete(_modelPath); + } + + await Task.CompletedTask; + } } diff --git a/src/LanguageIdentification.Tests/WhatlangTests.cs b/src/LanguageIdentification.Tests/WhatlangTests.cs index 5ea352d6..74207c2b 100644 --- a/src/LanguageIdentification.Tests/WhatlangTests.cs +++ b/src/LanguageIdentification.Tests/WhatlangTests.cs @@ -5,12 +5,14 @@ namespace Panlingo.LanguageIdentification.Tests; public class WhatlangTests { - [Theory] + [SkippableTheory] [InlineData(WhatlangLanguage.Ron, Constants.PHRASE_ENG_1, 0.0274)] [InlineData(WhatlangLanguage.Ukr, Constants.PHRASE_UKR_1, 0.9999)] [InlineData(WhatlangLanguage.Rus, Constants.PHRASE_RUS_1, 0.2308)] public void WhatlangSingleLanguage(WhatlangLanguage languageCode, string text, double score) { + Skip.IfNot(WhatlangDetector.IsSupported()); + using var whatlang = new WhatlangDetector(); var prediction = whatlang.PredictLanguage(text: text); @@ -24,13 +26,15 @@ public void WhatlangSingleLanguage(WhatlangLanguage languageCode, string text, d Assert.Equal(score, prediction.Confidence, Constants.EPSILON); } - [Theory] + [SkippableTheory] [InlineData(WhatlangLanguage.Ukr, "ukr")] [InlineData(WhatlangLanguage.Uzb, "uzb")] [InlineData(WhatlangLanguage.Heb, "heb")] [InlineData(WhatlangLanguage.Srp, "srp")] public void WhatlangGetLanguageCode(WhatlangLanguage language, string code) { + Skip.IfNot(WhatlangDetector.IsSupported()); + using var whatlang = new WhatlangDetector(); var languageCode = whatlang.GetLanguageCode(language); diff --git a/src/LanguageIdentification.Whatlang.Native/Dockerfile b/src/LanguageIdentification.Whatlang.Native/Dockerfile index 9c1f75c8..80c722c2 100644 --- a/src/LanguageIdentification.Whatlang.Native/Dockerfile +++ b/src/LanguageIdentification.Whatlang.Native/Dockerfile @@ -1,5 +1,4 @@ -FROM mcr.microsoft.com/dotnet/sdk:8.0 AS build -ARG BUILD_CONFIGURATION=Release +FROM ubuntu:22.04 AS build WORKDIR /repo COPY ["src/LanguageIdentification.Whatlang.Native/LanguageIdentification.Whatlang.Native.csproj", "src/LanguageIdentification.Whatlang.Native/"] diff --git a/src/LanguageIdentification.Whatlang.Native/LanguageIdentification.Whatlang.Native.csproj b/src/LanguageIdentification.Whatlang.Native/LanguageIdentification.Whatlang.Native.csproj index 95a71f99..ab0966ce 100644 --- a/src/LanguageIdentification.Whatlang.Native/LanguageIdentification.Whatlang.Native.csproj +++ b/src/LanguageIdentification.Whatlang.Native/LanguageIdentification.Whatlang.Native.csproj @@ -2,7 +2,7 @@ netstandard2.1 - 0.0.0.21 + 0.0.0.23 Panlingo.LanguageIdentification.Whatlang.Native Panlingo.LanguageIdentification.Whatlang.Native Panlingo.LanguageIdentification.Whatlang.Native diff --git a/src/LanguageIdentification.Whatlang.Native/WhatlangNativeLibrary.cs b/src/LanguageIdentification.Whatlang.Native/WhatlangNativeLibrary.cs index 2ec7e231..836f41f6 100644 --- a/src/LanguageIdentification.Whatlang.Native/WhatlangNativeLibrary.cs +++ b/src/LanguageIdentification.Whatlang.Native/WhatlangNativeLibrary.cs @@ -5,6 +5,6 @@ public static class WhatlangNativeLibrary /// /// Name of native binary /// - public const string Name = "libwhatlang.so"; + public const string Name = "whatlang"; } } diff --git a/src/LanguageIdentification.Whatlang/LanguageIdentification.Whatlang.csproj b/src/LanguageIdentification.Whatlang/LanguageIdentification.Whatlang.csproj index 7f6fef1a..13fc8084 100644 --- a/src/LanguageIdentification.Whatlang/LanguageIdentification.Whatlang.csproj +++ b/src/LanguageIdentification.Whatlang/LanguageIdentification.Whatlang.csproj @@ -2,7 +2,7 @@ net5.0;net6.0;net7.0;net8.0 - 0.0.0.21 + 0.0.0.23 Panlingo.LanguageIdentification.Whatlang Panlingo.LanguageIdentification.Whatlang Panlingo.LanguageIdentification.Whatlang diff --git a/src/LanguageIdentification.Whatlang/WhatlangDetector.cs b/src/LanguageIdentification.Whatlang/WhatlangDetector.cs index fd3076eb..00b9c8b0 100644 --- a/src/LanguageIdentification.Whatlang/WhatlangDetector.cs +++ b/src/LanguageIdentification.Whatlang/WhatlangDetector.cs @@ -12,7 +12,7 @@ public class WhatlangDetector : IDisposable { public WhatlangDetector() { - if (!RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) + if (!IsSupported()) { throw new NotSupportedException( $"{nameof(WhatlangDetector)} is not yet supported on {RuntimeInformation.RuntimeIdentifier}" @@ -20,6 +20,15 @@ public WhatlangDetector() } } + public static bool IsSupported() + { + return RuntimeInformation.OSArchitecture switch + { + Architecture.X64 when RuntimeInformation.IsOSPlatform(OSPlatform.Linux) => true, + _ => false, + }; + } + /// /// Produces a prediction for 'text' /// diff --git a/src/test-ci.Dockerfile b/src/test-ci.Dockerfile index 27d7a2ab..bc46a1d6 100644 --- a/src/test-ci.Dockerfile +++ b/src/test-ci.Dockerfile @@ -3,7 +3,7 @@ WORKDIR /src COPY . . -RUN dotnet nuget add source /src/local-nugets +RUN dotnet nuget add source /src/local-packages RUN ls -R diff --git a/src/test.Dockerfile b/src/test.Dockerfile index 8fafba69..9e6935da 100644 --- a/src/test.Dockerfile +++ b/src/test.Dockerfile @@ -3,18 +3,3 @@ RUN wget https://aka.ms/getvsdbgsh && \ sh getvsdbgsh -v latest -l /vsdbg -### FastText -RUN apt -y update -RUN apt -y install curl -RUN mkdir /models -p -RUN curl --location -o /models/fasttext176.bin https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin -# RUN curl --location -o /models/fasttext217.bin https://huggingface.co/facebook/fasttext-language-identification/resolve/main/model.bin?download=true -### - -### MediaPipe -RUN apt -y update -RUN apt -y install curl -RUN curl --location -o /models/mediapipe_language_detector.tflite https://storage.googleapis.com/mediapipe-models/language_detector/language_detector/float32/1/language_detector.tflite -### - -