From 11552d0dc02fb9880f61f46e46115b6a50dada32 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Tue, 31 May 2022 21:26:39 -0500 Subject: [PATCH] v1.3 (#590) --- .github/workflows/build.yml | 8 +- CMakeLists.txt | 12 +- INSTALL.md | 124 +++++++ LICENSE | 2 +- README.md | 268 +++++--------- cmake/ExportAndPackageConfig.cmake | 14 +- cmake/FindDPCPP.cmake | 7 + ...ccaConfig.cmake.in => OCCAConfig.cmake.in} | 2 +- configure-cmake.sh | 43 +++ docs/LICENSE | 2 +- .../cpp/30_device_function/CMakeLists.txt | 5 + examples/cpp/30_device_function/Makefile | 30 ++ examples/cpp/30_device_function/README.md | 28 ++ .../cpp/30_device_function/addVectors.okl | 28 ++ examples/cpp/30_device_function/main.cpp | 75 ++++ examples/cpp/CMakeLists.txt | 3 +- include/occa/defines/windows.hpp | 22 -- modulefiles/occa | 15 + src/core/base.cpp | 11 +- src/mpi.cpp | 5 +- src/occa/internal/lang/modes/dpcpp.cpp | 2 +- src/occa/internal/lang/modes/opencl.cpp | 127 ++----- src/occa/internal/lang/modes/opencl.hpp | 12 +- src/occa/internal/lang/modes/withLauncher.cpp | 5 +- src/occa/internal/lang/qualifier.cpp | 4 +- src/occa/internal/modes/cuda/device.cpp | 37 +- src/occa/internal/modes/dpcpp/device.cpp | 14 +- src/occa/internal/modes/dpcpp/polyfill.hpp | 57 +++ .../internal/modes/dpcpp/registration.cpp | 3 +- src/occa/internal/modes/opencl/device.cpp | 16 +- src/occa/internal/modes/opencl/polyfill.hpp | 23 +- .../internal/modes/opencl/registration.cpp | 37 +- src/occa/internal/modes/opencl/utils.cpp | 184 +++++---- src/occa/internal/modes/opencl/utils.hpp | 46 +-- src/occa/internal/utils.hpp | 1 - src/occa/internal/utils/env.cpp | 7 +- src/occa/internal/utils/tls.hpp | 52 --- src/occa/internal/utils/tls.tpp | 90 ----- src/types/json.cpp | 1 + src/types/primitive.cpp | 24 +- tests/src/math/fpMath.cpp | 119 ++++++ tests/src/math/intMath.cpp | 77 ++++ tests/src/types/primitive.cpp | 348 ++++++++++++++++++ 43 files changed, 1361 insertions(+), 629 deletions(-) create mode 100644 INSTALL.md rename cmake/{occaConfig.cmake.in => OCCAConfig.cmake.in} (92%) create mode 100755 configure-cmake.sh create mode 100644 examples/cpp/30_device_function/CMakeLists.txt create mode 100644 examples/cpp/30_device_function/Makefile create mode 100644 examples/cpp/30_device_function/README.md create mode 100644 examples/cpp/30_device_function/addVectors.okl create mode 100644 examples/cpp/30_device_function/main.cpp create mode 100644 modulefiles/occa delete mode 100644 src/occa/internal/utils/tls.hpp delete mode 100644 src/occa/internal/utils/tls.tpp create mode 100644 tests/src/math/fpMath.cpp create mode 100644 tests/src/math/intMath.cpp diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b4331a000..a7efb9d5a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -2,9 +2,9 @@ name: Build on: push: - branches: [ main ] + branches: [ main, development ] pull_request: - branches: [ main ] + branches: [ main, development ] jobs: run: @@ -165,7 +165,7 @@ jobs: - name: Run CTests if: ${{ matrix.useCMake && !matrix.useoneAPI }} run: | - ctest --test-dir build --progress --output-on-failure --parallel 8 --schedule-random -E "examples_cpp_arrays-opencl|examples_cpp_generic_inline_kernel-opencl|examples_cpp_shared_memory-opencl|examples_cpp_shared_memory-dpcpp|examples_cpp_nonblocking_streams-dpcpp" + ctest --test-dir build --progress --output-on-failure --parallel 8 --schedule-random -E "examples_cpp_arrays-opencl|examples_cpp_for_loops-opencl|examples_cpp_generic_inline_kernel-opencl|examples_cpp_shared_memory-opencl|examples_cpp_nonblocking_streams-opencl|examples_cpp_shared_memory-dpcpp|examples_cpp_nonblocking_streams-dpcpp|examples_cpp_for_loops-dpcpp|examples_cpp_arrays-dpcpp" - name: Run CTests @@ -176,7 +176,7 @@ jobs: run: | source /opt/intel/oneapi/setvars.sh export SYCL_DEVICE_FILTER=opencl.cpu - ctest --test-dir build --progress --output-on-failure --parallel 8 --schedule-random -E "examples_cpp_arrays-opencl|examples_cpp_generic_inline_kernel-opencl|examples_cpp_shared_memory-opencl|examples_cpp_shared_memory-dpcpp|examples_cpp_nonblocking_streams-dpcpp" + ctest --test-dir build --progress --output-on-failure --parallel 8 --schedule-random -E "examples_cpp_arrays-opencl|examples_cpp_for_loops-opencl|examples_cpp_generic_inline_kernel-opencl|examples_cpp_shared_memory-opencl|examples_cpp_nonblocking_streams-opencl|examples_cpp_shared_memory-dpcpp|examples_cpp_nonblocking_streams-dpcpp|examples_cpp_for_loops-dpcpp|examples_cpp_arrays-dpcpp" - name: Upload code coverage if: ${{ matrix.OCCA_COVERAGE }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 925d874c6..d410c2d12 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,7 +10,7 @@ cmake_policy(SET CMP0056 NEW) #======================================= #---[ Build Config ]-------------------- -project(occa +project(OCCA DESCRIPTION "JIT Compilation for Multiple Architectures: C++, OpenMP, CUDA, HIP, OpenCL, Metal" HOMEPAGE_URL "https://github.com/libocca/occa" LANGUAGES C CXX) @@ -32,7 +32,7 @@ option(ENABLE_OPENCL "Build with OpenCL if available" ON) option(ENABLE_HIP "Build with HIP if available" ON) option(ENABLE_METAL "Build with Metal if available" ON) option(ENABLE_DPCPP "Build with SYCL/DPCPP if available" ON) -option(ENABLE_MPI "Build with MPI if available" ON) +option(ENABLE_MPI "Build with MPI if available" OFF) option(ENABLE_TESTS "Build tests" OFF) option(ENABLE_EXAMPLES "Build simple examples" OFF) @@ -312,3 +312,11 @@ add_subdirectory(bin) # Create a package config and associated files. include(ExportAndPackageConfig) + +install(CODE + "configure_file( + ${CMAKE_SOURCE_DIR}/modulefiles/occa + ${CMAKE_INSTALL_PREFIX}/modulefiles/occa + @ONLY + )" +) diff --git a/INSTALL.md b/INSTALL.md new file mode 100644 index 000000000..84b03d4d4 --- /dev/null +++ b/INSTALL.md @@ -0,0 +1,124 @@ +# INSTALLATION GUIDE + +## Requirements + +### Minimum + +- [CMake] v3.17 or newer +- C++17 compiler +- C11 compiler + +### Optional + + - Fortan 90 compiler + - CUDA 9 or later + - HIP 3.5 or later + - SYCL 2020 or later + - OpenCL 2.0 or later + - OpenMP 4.0 or later + +## Linux + +### **Configure** + +OCCA uses the [CMake] build system. For convenience, the shell script `configure-cmake.sh` has been provided to drive the Cmake build. The following table gives a list of build parameters which are set in the file. To override the default value, it is only necessary to assign the variable an alternate value at the top of the script or at the commandline. + +Example +```shell +$ CC=clang CXX=clang++ ENABLE_OPENMP="OFF" ./configure-cmake.sh +``` + +| Build Parameter | Description | Default | +| --------- | ----------- | ------- | +| BUILD_DIR | Directory used by CMake to build OCCA | `./build` | +| INSTALL_DIR | Directory where OCCA should be installed | `./install` | +| BUILD_TYPE | Optimization and debug level | `RelWithDebInfo` | +| CXX | C++11 compiler | `g++` | +| CXXFLAGS | C++ compiler flags | *empty* | +| CC | C11 compiler| `gcc` | +| CFLAGS | C compiler flags | *empty* | +| ENABLE_CUDA | Enable use of the CUDA backend | `ON`| +| ENABLE_HIP | Enable use of the HIP backend | `ON`| +| ENABLE_DPCPP | Enable use of the DPC++ backend | `ON`| +| ENABLE_OPENCL | Enable use of the OpenCL backend | `ON`| +| ENABLE_OPENMP | Enable use of the OpenMP backend | `ON`| +| ENABLE_METAL | Enable use of the Metal backend | `ON`| +| ENABLE_TESTS | Build OCCA's test harness | `ON` | +| ENABLE_EXAMPLES | Build OCCA examples | `ON` | +| ENABLE_FORTRAN | Build the Fortran language bindings | `OFF`| +| FC | Fortran 90 compiler | `gfortran` | +| FFLAGS | Fortran compiler flags | *empty* | + +#### Dependency Paths + +The following environment variables can be used to specify the path to third-party dependencies needed by different OCCA backends. The value assigned should be an absolute path to the parent directory, which typically contains subdirectories `bin`, `include`, and `lib`. + +| Backend | Environment Variable | Description | +| --- | --- | --- | +| CUDA | CUDATookit_ROOT | Path to the CUDA the NVIDIA CUDA Toolkit | +| HIP | HIP_ROOT | Path to the AMD HIP toolkit | +| OpenCL | OpenCL_ROOT | Path to the OpenCL headers and library | +| DPC++ | SYCL_ROOT | Path to the SYCL headers and library | + +### Building + +After CMake configuration is complete, OCCA can be built with the command +```shell +$ cmake --build build --parallel +``` + +When cross compiling for a different platform, the targeted hardware doesn't need to be available; however all dependencies—e.g., headers, libraries—must be present. Commonly this is the case for large HPC systems, where code is compiled on login nodes and run on compute nodes. + +### Testing + +CTest is used for the OCCA test harness and can be run using the command +```shell +$ ctest --test-dir BUILD_DIR --output-on-failure +``` + +Before running CTest, it may be necessary to set the environment variables `OCCA_CXX` and `OCCA_CC` since OCCA defaults to using gcc and g++. Tests for some backends may return a false negative otherwise. + +During testing, `BUILD_DIR/occa` is used for kernel caching. This directory may need to be cleared when rerunning tests after recompiling with an existing build directory. + +### Installation + +Commandline installation of OCCA can be accomplished with the following: +```shell +$ cmake --install BUILD_DIR --prefix INSTALL_DIR +``` +During installation, the [Env Modules](Env_Modules) file `INSTALL_DIR/modulefiles/occa` is generated. When this module is loaded, paths to the installed `bin`, `lib`, and `include` directories are appended to environment variables such as `PATH` and `LD_LIBRARY_PATH`. +To make use of this module, add the following to your `.modulerc` file +``` +module use -a INSTALL_DIR/modulfiles +``` + then at the commandline call +```shell +$ module load occa +``` + +### Building an OCCA application + +For convenience, OCCA provides CMake package files which are configured during installation. These package files define an imported target, `OCCA::libocca`, and look for all required dependencies. + +For example, the CMakeLists.txt of downstream projects using OCCA would include +```cmake +find_package(OCCA REQUIRED) + +add_executable(downstream-app ...) +target_link_libraries(downstream-app PRIVATE OCCA::libocca) + +add_library(downstream-lib ...) +target_link_libraries(downstream-lib PUBLIC OCCA::libocca) +``` +In the case of a downstream library, linking OCCA using the `PUBLIC` specifier ensures that CMake will automatically forward OCCA's dependencies to applications which use the library. + +## Mac OS + +> Do you use OCCA on Mac OS? Help other Mac OS users by contributing to the documentation here! + +## Windows + +> Do you use OCCA on Windows? Help other Windows users by contributing to the documentation here! + +[CMake]: https://cmake.org/ +[Env_Modules]: https://modules.readthedocs.io/en/latest/index.html \ No newline at end of file diff --git a/LICENSE b/LICENSE index 3c44e13df..f6110eb45 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2014-2021 David Medina and Tim Warburton +Copyright (c) 2014-2022 David Medina and Tim Warburton Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 8128a9ead..2fbe6f871 100644 --- a/README.md +++ b/README.md @@ -4,217 +4,151 @@

  -

- Build - codecov.io - Slack -

-  +
-### Table of Contents - -- [What is OCCA?](#what-is-occa) -- [Documentation](#documentation) -- [How to build](#build) - - [Linux](#build-linux) - - [MacOS](#build-macos) -- [Examples](#examples) - - [Hello World](#examples-hello-world) - - [Inline for-loops](#examples-for-loops) - - [Arrays + Functional Programming](#examples-arrays) -- [CLI](#cli) - - [Bash Autocomplete](#cli-autocomplete) -- [Similar Libraries](#similar-libraries) +[![license](https://img.shields.io/github/license/libocca/occa)](LICENSE) +![discussions](https://img.shields.io/github/discussions/libocca/occa) +[![slack](https://img.shields.io/badge/Chat-on%20Slack-%23522653)][OCCA_SLACK] +![github-ci](https://github.com/libocca/occa/workflows/Build/badge.svg) +![codecov](https://codecov.io/github/libocca/occa/coverage.svg) +[![twitter](https://img.shields.io/twitter/url?label=Twitter&style=social&url=https%3A%2F%2Ftwitter.com%2Flibocca)](https://twitter.com/libocca) +
  -

What is OCCA?

+## Performance, Portability, Transparency -In a nutshell, OCCA (like *oca*-rina) is an open-source library which aims to +OCCA is an open source, portable, and vendor neutral framework for parallel programming on heterogeneous platforms. The OCCA API provides unified models for heterogeneous programming concepts—such as a device, memory, or kernel—while the OCCA Kernel Language (OKL) enables the creation of portable device kernels using a directive-based extension to the C-language. -- Make it easy to program different types of devices (e.g. _CPU_, _GPU_, _FPGA_) -- Provide a [unified API](https://libocca.org/#/guide/occa/introduction) for interacting with backend device APIs (e.g. _OpenMP_, _CUDA_, _HIP_, _OpenCL_, _Metal_) -- JIT compile backend kernels and provide a [kernel language](https://libocca.org/#/guide/okl/introduction) (a minor extension to C) to abstract programming for each backend +Mission critical computational science and engineering applications from the public and private sectors rely on OCCA. Notable users include the U.S. Department of Energy and Shell. -The "Hello World" example of adding two vectors looks like: +**Key Features** -```cpp -@kernel void addVectors(const int entries, - const float *a, - const float *b, - float *ab) { - for (int i = 0; i < entries; ++i; @tile(16, @outer, @inner)) { - ab[i] = a[i] + b[i]; - } -} -``` +- **Multiple backends**—including CUDA, HIP, Data Parallel C++, OpenCL, OpenMP (CPU), and Metal +- **JIT compilation** and caching of kernels +- C, C++, and ***Fortran*** language support +- **Interoperability** with backend API and kernels +- **Transparency**—easy to understand how your code is mapped to each platform -Or we can inline it using C++ lambdas - -```cpp -// Capture variables -occa::scope scope({ - {"a", a}, - {"b", b}, - {"ab", ab} -}); - -occa::forLoop() - .tile({entries, 16}) - .run(OCCA_FUNCTION(scope, [=](const int i) -> void { - ab[i] = a[i] + b[i]; - })); -``` - -Or we can use a more functional way by using `occa::array` - -```cpp -// Capture variables -occa::scope scope({ - {"b", b} -}); - -occa::array ab = ( - a.map(OCCA_FUNCTION( - scope, - [=](const float &value, const int index) -> float { - return value + b[index]; - } - )) -); -``` -  +## Requirements -

Documentation

+### Minimum -We maintain our documentation on the [libocca.org](https://libocca.org) site +- [CMake] v3.17 or newer +- C++17 compiler +- C11 compiler -- [:notebook: Guide](https://libocca.org/#/guide) -- [:gear: API](https://libocca.org/#/api/) -- [🌟 Who is using OCCA?](https://libocca.org/#/gallery) -- [:lab_coat: Publications](https://libocca.org/#/publications) +### Optional -  + - Fortan 90 compiler + - CUDA 9 or later + - HIP 3.5 or later + - SYCL 2020 or later + - OpenCL 2.0 or later + - OpenMP 4.0 or later -

How to build

+## Build, Test, Install -```bash -git clone --depth 1 https://github.com/libocca/occa.git -cd occa -make -j 4 -``` +OCCA uses the [CMake] build system. Checkout the [installation guide](INSTALL.md) for a comprehensive overview of all build settings and instructions for building on [Windows](INSTALL.md#windows) or [Mac OS](INSTALL.md#mac-os). -Setup environment variables inside the `occa` directory +### Linux -

Linux

+For convenience, the shell script `configure-cmake.sh` has been provided to drive the CMake build. Compilers, flags, and other build parameters can be adjusted there. By default, this script uses `./build` and `./install` for the build and install directories. -```bash -export PATH+=":${PWD}/bin" -export LD_LIBRARY_PATH+=":${PWD}/lib" +The following demonstrates a typical sequence of shell commands to build, test, and install occa: +```shell +$ ./configure.sh +$ cmake --build build --parallel +$ ctest --test-dir build --output-on-failure +$ cmake --install build --prefix install ``` -

MacOS

+If dependencies are installed in a non-standard location, set the corresponding [environment variable](INSTALL.md#dependency-paths) to this path. -```bash -export PATH+=":${PWD}/bin" -export DYLD_LIBRARY_PATH+=":${PWD}/lib" -``` -  +## Use -

Examples

+### Environment -

Hello World

+During installation, the [Env Modules](Env_Modules) file `/modulefiles/occa` is generated. When this module is loaded, paths to the installed `bin`, `lib`, and `include` directories are appended to environment variables such as `PATH` and `LD_LIBRARY_PATH`. -The occa library is based on 3 different objects, all covered in the [01_add_vectors](/examples/cpp/01_add_vectors) example: -- `occa::device` -- `occa::memory` -- `occa::kernel` +### Building an OCCA application -```bash -cd examples/cpp/01_add_vectors -make -./main -``` +For convenience, OCCA provides CMake package files which are configured during installation. These package files define an imported target, `OCCA::libocca`, and look for all required dependencies. -

Inline for-loops

+For example, the CMakeLists.txt of downstream projects using OCCA would include +```cmake +find_package(OCCA REQUIRED) -Find how to inline `for` loops using `occa::forLoop` in example [02_for_loops](/examples/cpp/02_for_loops): +add_executable(downstream-app ...) +target_link_libraries(downstream-app PRIVATE OCCA::libocca) -```bash -cd examples/cpp/02_for_loops -make -./main +add_library(downstream-lib ...) +target_link_libraries(downstream-lib PUBLIC OCCA::libocca) ``` -  - -

Arrays + Functional Programming

- -Learn how to use `occa::array` in a functional way in example [03_arrays](/examples/cpp/03_arrays): - -```bash -cd examples/cpp/03_arrays -make -./main +### Command-line Interface + +The OCCA command-line interface can be found in `/bin/occa`. This tool can be used to query information about hardware and the configuration of OCCA on a given platform. + +For example, calling `occa info` will available OCCA backends and related hardware specs, while `occa env` display the values of OCCA related environment variables. To see the list of all available options, call `occa --help`. + +```shell +$ occa info +========+======================+================================= + CPU(s) | Processor Name | AMD EPYC 7532 32-Core Processor + | Memory | 251.6 GB + | Clock Frequency | 2.4 MHz + | SIMD Instruction Set | SSE2 + | SIMD Width | 128 bits + | L1d Cache Size | 1 MB + | L1i Cache Size | 1 MB + | L2 Cache Size | 16 MB + | L3 Cache Size | 256 MB +========+======================+================================= + OpenCL | Platform 0 | NVIDIA CUDA + |----------------------+--------------------------------- + | Device 0 | NVIDIA A100-PCIE-40GB + | Device Type | gpu + | Compute Cores | 108 + | Global Memory | 39.40 GB +========+======================+================================= + CUDA | Device Name | NVIDIA A100-PCIE-40GB + | Device ID | 0 + | Memory | 39.40 GB +========+======================+================================= ``` -  - -

CLI

- -There is an executable `occa` provided inside `bin` - -```bash -> occa +## Community -Usage: occa [OPTIONS] COMMAND [COMMAND...] +### Support -Helpful utilities related to OCCA workflows +Need help? Checkout the [repository wiki](https://github.com/libocca/occa/wiki) or ask a question in the [Q&A discussions category](https://github.com/libocca/occa/discussions/categories/q-a). -Commands: - autocomplete Prints shell functions to autocomplete occa - commands and arguments - clear Clears cached files and cache locks - compile Compile kernels - env Print environment variables used in OCCA - info Prints information about available backend modes - modes Prints available backend modes - translate Translate kernels - version Prints OCCA version +### Feedback -Arguments: - COMMAND Command to run - -Options: - -h, --help Print usage -``` - -  - -

Bash Autocomplete

- -```bash -if which occa > /dev/null 2>&1; then - eval "$(occa autocomplete bash)" -fi -``` +To provide feedback, start a conversation in the [general](https://github.com/libocca/occa/discussions/categories/general) or [ideas](https://github.com/libocca/occa/discussions/categories/ideas) discussion categories. -

Similar Libraries

+## Acknowledgements -OCCA is definitely not the only solution that aims to simplify programming on different hardware/accelerators. -Here is a list of other libraries that have taken different approaches: +This work was supported in part by +- Argonne Leadership Computing Facility, which is a DOE Office of Science User Facility supported under Contract DE-AC02-06CH11357 +- The Exascale Computing Project (17-SC-20-SC), a joint project of the U.S. Department of Energy’s Office of Science and National Nuclear Security Administration, responsible for delivering a capable exascale ecosystem, including software, applications, and hardware technology, to support the nation’s exascale computing imperative +- The Center for Efficient Exascale Discretizations (CEED), a co-design center within the U.S. Department of Energy Exascale Computing Project. +- Intel +- AMD +- Shell -- [Alpaka](https://github.com/alpaka-group/alpaka) +## License - > The alpaka library is a header-only C++14 abstraction library for accelerator development. Its aim is to provide performance portability across accelerators through the abstraction (not hiding!) of the underlying levels of parallelism. +OCCA is available under a [MIT license](LICENSE.MD) -- [RAJA](https://github.com/LLNL/RAJA) +[OCCA_WEBSITE]: https://libocca.org - > RAJA is a library of C++ software abstractions, primarily developed at Lawrence Livermore National Laboratory (LLNL), that enables architecture and programming model portability for HPC applications +[OCCA_SLACK]: https://join.slack.com/t/libocca/shared_invite/zt-4jcnu451-qPpPWUzhm7YQKY_HMhIsIw -- [Kokkos](https://github.com/kokkos/kokkos) +[CMake]: https://cmake.org/ - > Kokkos Core implements a programming model in C++ for writing performance portable applications targeting all major HPC platforms. For that purpose it provides abstractions for both parallel execution of code and data management. +[Env_Modules]: https://modules.readthedocs.io/en/latest/index.html \ No newline at end of file diff --git a/cmake/ExportAndPackageConfig.cmake b/cmake/ExportAndPackageConfig.cmake index 79f0361be..0517f6540 100644 --- a/cmake/ExportAndPackageConfig.cmake +++ b/cmake/ExportAndPackageConfig.cmake @@ -4,7 +4,7 @@ # occaConfigVersion.cmake, the version file associated with occaConfig.cmake # Install in subdirectory lib/cmake/PACKAGENAME, which is where cmake expects package config files -set(PackageConfigInstallLocation lib/cmake/occa) +set(PackageConfigInstallLocation lib/cmake/OCCA) set(ExportNamespace "OCCA::") # Set the exportPackageDependencies variable, for use in configuring occaConfig.cmake.in @@ -44,8 +44,8 @@ string(APPEND exportTargets "# ${ExportNamespace}occa The occa executable, e.g. include(CMakePackageConfigHelpers) # Create the PackageConfig file, based on the template configure_package_config_file( - "${CMAKE_CURRENT_LIST_DIR}/occaConfig.cmake.in" - "${CMAKE_CURRENT_BINARY_DIR}/occaConfig.cmake" + "${CMAKE_CURRENT_LIST_DIR}/OCCAConfig.cmake.in" + "${CMAKE_CURRENT_BINARY_DIR}/OCCAConfig.cmake" INSTALL_DESTINATION ${PackageConfigInstallLocation} # Only used as relative reference during in this function, does not determine actual install location NO_CHECK_REQUIRED_COMPONENTS_MACRO # As long as components are not used, don't need it ) @@ -57,7 +57,7 @@ file(READ "${OCCA_SOURCE_DIR}/include/occa/defines/occa.hpp" occadefs) string(REGEX MATCH "#define OCCA_VERSION_STR +\"([.0-9]*)\"" _ ${occadefs}) set(OCCA_VERSION_STR ${CMAKE_MATCH_1}) write_basic_package_version_file( - "${CMAKE_CURRENT_BINARY_DIR}/occaConfigVersion.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/OCCAConfigVersion.cmake" VERSION "${OCCA_VERSION_STR}" COMPATIBILITY AnyNewerVersion ) @@ -67,7 +67,7 @@ write_basic_package_version_file( install( EXPORT occaExport NAMESPACE ${ExportNamespace} - FILE occaTargets.cmake + FILE OCCATargets.cmake DESTINATION ${PackageConfigInstallLocation} ) @@ -76,8 +76,8 @@ install( # find_package(occa) will only match occaConfig.cmake, NOT OccaConfig.cmake install( FILES - "${CMAKE_CURRENT_BINARY_DIR}/occaConfig.cmake" - "${CMAKE_CURRENT_BINARY_DIR}/occaConfigVersion.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/OCCAConfig.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/OCCAConfigVersion.cmake" "${CMAKE_CURRENT_LIST_DIR}/FindDPCPP.cmake" "${CMAKE_CURRENT_LIST_DIR}/FindHIP.cmake" "${CMAKE_CURRENT_LIST_DIR}/FindMETAL.cmake" diff --git a/cmake/FindDPCPP.cmake b/cmake/FindDPCPP.cmake index 563a3d7bc..974607524 100644 --- a/cmake/FindDPCPP.cmake +++ b/cmake/FindDPCPP.cmake @@ -13,6 +13,7 @@ find_path( PATHS /opt/intel/oneapi/compiler/latest/linux ENV SYCL_ROOT + ${SYCL_ROOT} PATH_SUFFIXES include/sycl ) @@ -24,6 +25,9 @@ find_path( PATHS /opt/intel/oneapi/compiler/latest/linux ENV SYCL_ROOT + ${SYCL_ROOT} + PATH_SUFFIXES + include ) find_library( @@ -33,6 +37,9 @@ find_library( PATHS /opt/intel/oneapi/compiler/latest/linux ENV SYCL_ROOT + ${SYCL_ROOT} + PATH_SUFFIXES + lib ) include(FindPackageHandleStandardArgs) diff --git a/cmake/occaConfig.cmake.in b/cmake/OCCAConfig.cmake.in similarity index 92% rename from cmake/occaConfig.cmake.in rename to cmake/OCCAConfig.cmake.in index 4484d8eab..5d13e3f85 100644 --- a/cmake/occaConfig.cmake.in +++ b/cmake/OCCAConfig.cmake.in @@ -17,4 +17,4 @@ set(CMAKE_MODULE_PATH ${_CMAKE_MODULE_PATH}) unset(_CMAKE_MODULE_PATH) # The exported targets are defined in an auto-generated file: -include( "${CMAKE_CURRENT_LIST_DIR}/occaTargets.cmake" ) +include( "${CMAKE_CURRENT_LIST_DIR}/OCCATargets.cmake" ) diff --git a/configure-cmake.sh b/configure-cmake.sh new file mode 100755 index 000000000..41562fb06 --- /dev/null +++ b/configure-cmake.sh @@ -0,0 +1,43 @@ +# !/bin/bash +# Override default values here +#CC= +#CXX= +#FC= + +# Default build parameters +: ${BUILD_DIR:=`pwd`/build} +: ${INSTALL_DIR:=`pwd`/install} +: ${BUILD_TYPE:="RelWithDebInfo"} + +: ${CC:="gcc"} +: ${CXX:="g++"} +: ${FC:="gfortran"} + +: ${ENABLE_DPCPP:="ON"} +: ${ENABLE_OPENCL:="ON"} +: ${ENABLE_CUDA:="ON"} +: ${ENABLE_HIP="ON"} +: ${ENABLE_OPENMP="ON"} +: ${ENABLE_METAL="ON"} +: ${ENABLE_FORTRAN="OFF"} +: ${ENABLE_TESTS="ON"} +: ${ENABLE_EXAMPLES="ON"} + +cmake -S . -B ${BUILD_DIR} \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} \ + -DCMAKE_C_COMPILER=${CC} \ + -DCMAKE_CXX_COMPILER=${CXX} \ + -DCMAKE_Fortran_COMPILER=${FC} \ + -DCMAKE_CXX_FLAGS="${CXXFLAGS}" \ + -DCMAKE_C_FLAGS="${CFLAGS}" \ + -DCMAKE_Fortran_FLAGS="${FFLAGS}" \ + -DENABLE_OPENMP=${ENABLE_OPENMP} \ + -DENABLE_OPENCL=${ENABLE_OPENCL} \ + -DENABLE_DPCPP=${ENABLE_DPCPP} \ + -DENABLE_CUDA=${ENABLE_CUDA} \ + -DENABLE_HIP=${ENABLE_HIP} \ + -DENABLE_METAL=${ENABLE_METAL} \ + -DENABLE_FORTRAN=${ENABLE_FORTRAN} \ + -DENABLE_TESTS=${ENABLE_TESTS} \ + -DENABLE_EXAMPLES=${ENABLE_EXAMPLES} diff --git a/docs/LICENSE b/docs/LICENSE index aa58e1f87..473400bc8 100644 --- a/docs/LICENSE +++ b/docs/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2018 OCCA +Copyright (c) 2022 OCCA Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/examples/cpp/30_device_function/CMakeLists.txt b/examples/cpp/30_device_function/CMakeLists.txt new file mode 100644 index 000000000..a27e79bba --- /dev/null +++ b/examples/cpp/30_device_function/CMakeLists.txt @@ -0,0 +1,5 @@ +set(EXAMPLE_NAME "device_function") +compile_cpp_example_with_modes(${EXAMPLE_NAME} main.cpp) + +add_custom_target(cpp_example_${EXAMPLE_NAME}_okl ALL COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/addVectors.okl addVectors.okl) +add_dependencies(examples_cpp_${EXAMPLE_NAME} cpp_example_${EXAMPLE_NAME}_okl) diff --git a/examples/cpp/30_device_function/Makefile b/examples/cpp/30_device_function/Makefile new file mode 100644 index 000000000..bf13a8b42 --- /dev/null +++ b/examples/cpp/30_device_function/Makefile @@ -0,0 +1,30 @@ + +PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) + +ifndef OCCA_DIR + include $(PROJ_DIR)/../../../scripts/build/Makefile +else + include ${OCCA_DIR}/scripts/build/Makefile +endif + +#---[ COMPILATION ]------------------------------- +headers = $(wildcard $(incPath)/*.hpp) $(wildcard $(incPath)/*.tpp) +sources = $(wildcard $(srcPath)/*.cpp) + +objects = $(subst $(srcPath)/,$(objPath)/,$(sources:.cpp=.o)) + +executables: ${PROJ_DIR}/main + +${PROJ_DIR}/main: $(objects) $(headers) ${PROJ_DIR}/main.cpp + $(compiler) $(compilerFlags) -o ${PROJ_DIR}/main $(flags) $(objects) ${PROJ_DIR}/main.cpp $(paths) $(linkerFlags) + @if which install_name_tool > /dev/null 2>&1; then \ + install_name_tool -add_rpath "${OCCA_DIR}/lib" ${PROJ_DIR}/main; \ + fi + +$(objPath)/%.o:$(srcPath)/%.cpp $(wildcard $(subst $(srcPath)/,$(incPath)/,$(<:.cpp=.hpp))) $(wildcard $(subst $(srcPath)/,$(incPath)/,$(<:.cpp=.tpp))) + $(compiler) $(compilerFlags) -o $@ $(flags) -c $(paths) $< + +clean: + rm -f $(objPath)/*; + rm -f ${PROJ_DIR}/main; +#================================================= diff --git a/examples/cpp/30_device_function/README.md b/examples/cpp/30_device_function/README.md new file mode 100644 index 000000000..96d330ef3 --- /dev/null +++ b/examples/cpp/30_device_function/README.md @@ -0,0 +1,28 @@ +# Example: Add Vectors + +A 'Hello World' example showing the basics + +- Creating an OCCA device +- Allocating and setting memory +- Building a kernel (function that runs on the device) + +# Compiling the Example + +```bash +make +``` + +## Usage + +``` +> ./main --help + +Usage: ./main [OPTIONS] + +Example adding two vectors + +Options: + -d, --device Device properties (default: "{mode: 'Serial'}") + -h, --help Print usage + -v, --verbose Compile kernels in verbose mode +``` diff --git a/examples/cpp/30_device_function/addVectors.okl b/examples/cpp/30_device_function/addVectors.okl new file mode 100644 index 000000000..ff07ab0c7 --- /dev/null +++ b/examples/cpp/30_device_function/addVectors.okl @@ -0,0 +1,28 @@ +float add(const float* a, + int i, + const float* b, + int j) { + return a[i] + b[j]; +} + +#define BLOCK_SIZE 4 + +@kernel +void addVectors(const int N, + const float* a, + const float* b, + float *ab) { + + @outer + for(int i=0; i < N; i+=BLOCK_SIZE) { + @shared float s_b[BLOCK_SIZE]; + const float* g_a = a; + @inner + for(int j=0; j < BLOCK_SIZE; ++j) { + s_b[j] = b[i+j]; + @barrier; + + ab[i+j] = add(g_a,i+j,s_b,j); + } + } +} diff --git a/examples/cpp/30_device_function/main.cpp b/examples/cpp/30_device_function/main.cpp new file mode 100644 index 000000000..f8c17cd39 --- /dev/null +++ b/examples/cpp/30_device_function/main.cpp @@ -0,0 +1,75 @@ +#include +#include +#include + +//---[ Internal Tools ]----------------- +// Note: These headers are not officially supported +// Please don't rely on it outside of the occa examples +#include +#include +//====================================== + +occa::json parseArgs(int argc, const char **argv); + +int main(int argc, const char **argv) { + occa::json args = parseArgs(argc, argv); + + int entries = 12; + + std::vector a(entries); + std::vector b(entries); + std::vector ab(entries); + + for (int i = 0; i < entries; ++i) { + a[i] = i; + b[i] = 1 - i; + ab[i] = 0; + } + + occa::device device(args["options/device"].toString()); + + auto o_a = device.malloc(entries); + auto o_b = device.malloc(entries); + auto o_ab = device.malloc(entries); + + o_a.copyFrom(a.data()); + o_b.copyFrom(b.data()); + + auto addVectors = device.buildKernel("addVectors.okl","addVectors"); + addVectors(entries, o_a, o_b, o_ab); + o_ab.copyTo(ab.data()); + + for (int i = 0; i < entries; ++i) { + std::cout << i << ": " << ab[i] << '\n'; + } + for (int i = 0; i < entries; ++i) { + if (!occa::areBitwiseEqual(ab[i], a[i] + b[i])) { + throw 1; + } + } + + return 0; +} + +occa::json parseArgs(int argc, const char **argv) { + occa::cli::parser parser; + parser + .withDescription( + "Example adding two vectors" + ) + .addOption( + occa::cli::option('d', "device", + "Device properties (default: \"{mode: 'Serial'}\")") + .withArg() + .withDefaultValue("{mode: 'Serial'}") + ) + .addOption( + occa::cli::option('v', "verbose", + "Compile kernels in verbose mode") + ); + + occa::json args = parser.parseArgs(argc, argv); + occa::settings()["kernel/verbose"] = args["options/verbose"]; + + return args; +} diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt index b762ffed6..f282c63c2 100644 --- a/examples/cpp/CMakeLists.txt +++ b/examples/cpp/CMakeLists.txt @@ -15,7 +15,8 @@ add_subdirectory(14_cuda_interop) add_subdirectory(18_nonblocking_streams) add_subdirectory(20_native_dpcpp_kernel) +add_subdirectory(30_device_function) # Don't force-compile OpenGL examples # add_subdirectory(16_finite_difference) -# add_subdirectory(17_mandelbulb) \ No newline at end of file +# add_subdirectory(17_mandelbulb) diff --git a/include/occa/defines/windows.hpp b/include/occa/defines/windows.hpp index 047912516..9db158bd8 100644 --- a/include/occa/defines/windows.hpp +++ b/include/occa/defines/windows.hpp @@ -1,25 +1,3 @@ -/* The MIT License (MIT) - * - * Copyright (c) 2014-2018 David Medina and Tim Warburton - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - */ - #ifndef OCCA_DEFINES_WINDOWS_HEADER #define OCCA_DEFINES_WINDOWS_HEADER # if OCCA_OS == OCCA_WINDOWS_OS diff --git a/modulefiles/occa b/modulefiles/occa new file mode 100644 index 000000000..57dd07b3d --- /dev/null +++ b/modulefiles/occa @@ -0,0 +1,15 @@ +#%Module +module-whatis {} + +set OCCA_ROOT @CMAKE_INSTALL_PREFIX@ + +prepend-path PATH $OCCA_ROOT/bin +prepend-path LIBRARY_PATH $OCCA_ROOT/lib +prepend-path LD_LIBRARY_PATH $OCCA_ROOT/lib +prepend-path INCLUDE $OCCA_ROOT/include +prepend-path C_INCLUDE_PATH $OCCA_ROOT/include +prepend-path CPLUS_INCLUDE_PATH $OCCA_ROOT/include + +setenv OCCA_ROOT $OCCA_ROOT +setenv OCCA_DIR $OCCA_ROOT +setenv OCCA_INSTALL_DIR $OCCA_ROOT \ No newline at end of file diff --git a/src/core/base.cpp b/src/core/base.cpp index 7e5604080..29de088cd 100644 --- a/src/core/base.cpp +++ b/src/core/base.cpp @@ -1,16 +1,16 @@ +#include + #include #include #include #include #include #include -#include namespace occa { //---[ Device Functions ]------------- device host() { - static tls tdev; - device &dev = tdev.value(); + thread_local device dev; if (!dev.isInitialized()) { dev = occa::device({ {"mode", "Serial"} @@ -21,12 +21,11 @@ namespace occa { } device& getDevice() { - static tls tdev; - device &dev = tdev.value(); + thread_local device dev; if (!dev.isInitialized()) { dev = host(); } - return dev; + return std::ref(dev); } void setDevice(device d) { diff --git a/src/mpi.cpp b/src/mpi.cpp index e459a57e3..d3bcb3570 100644 --- a/src/mpi.cpp +++ b/src/mpi.cpp @@ -4,7 +4,6 @@ #include #include -#include namespace occa { namespace mpi { @@ -39,8 +38,8 @@ namespace occa { } char* getBuffer() { - static tls buffer_; - return buffer_.value().ptr; + thread_local buffer_t buffer_; + return buffer_.ptr; } void barrier() { diff --git a/src/occa/internal/lang/modes/dpcpp.cpp b/src/occa/internal/lang/modes/dpcpp.cpp index b4074d173..4c5268240 100644 --- a/src/occa/internal/lang/modes/dpcpp.cpp +++ b/src/occa/internal/lang/modes/dpcpp.cpp @@ -96,7 +96,7 @@ namespace occa root.addFirst( *(new directiveStatement( &root, - directiveToken(root.source->origin, "include ")))); + directiveToken(root.source->origin, "include \n using namespace sycl;\n")))); } void dpcppParser::addExtensions() diff --git a/src/occa/internal/lang/modes/opencl.cpp b/src/occa/internal/lang/modes/opencl.cpp index 6695c4916..17b2487d0 100644 --- a/src/occa/internal/lang/modes/opencl.cpp +++ b/src/occa/internal/lang/modes/opencl.cpp @@ -9,13 +9,13 @@ namespace occa { namespace lang { namespace okl { - qualifier_t openclParser::global("__global", qualifierType::custom); openclParser::openclParser(const occa::json &settings_) : withLauncher(settings_), constant("__constant", qualifierType::custom), kernel("__kernel", qualifierType::custom), - local("__local", qualifierType::custom) { + local("__local", qualifierType::custom), + global("__global",qualifierType::custom) { okl::addOklAttributes(*this); @@ -130,87 +130,19 @@ namespace occa { .nestedForEachDeclaration([&](variableDeclaration &decl) { variable_t &var = decl.variable(); if (var.hasAttribute("shared")) { - var.add(0, local); + var.add(0,local); } }); } - bool openclParser::sharedVariableMatcher(exprNode &expr) { - return expr.hasAttribute("shared"); - } - void openclParser::setGlobalQualifiers() { - root.children - .flatFilterByStatementType( - statementType::declaration - | statementType::functionDecl - | statementType::function - ) - .forEach(updateGlobalVariables); - } - - void openclParser::updateGlobalVariables(statement_t *smnt) { - if (smnt->type() & statementType::function) { - addGlobalToFunctionArgs( - smnt->to().function() - ); - } - else if (smnt->type() & statementType::functionDecl) { - addGlobalToFunctionArgs( - smnt->to().function() - ); - } - else { - declarationStatement &declSmnt = smnt->to(); - const int declCount = declSmnt.declarations.size(); - for (int i = 0; i < declCount; ++i) { - addGlobalToVariable( - declSmnt.declarations[i].variable() - ); - } - } - } - - void openclParser::addGlobalToFunctionArgs(function_t &func) { - const int argc = (int) func.args.size(); - for (int i = 0; i < argc; ++i) { - variable_t *arg = func.args[i]; - if (arg) { - addGlobalToVariable(*arg); - } - } - } - - void openclParser::addGlobalToVariable(variable_t &var) { - if (var.hasAttribute("globalPtr")) { - var.add(0, global); - } - } - - void openclParser::updateScopeStructVariables(statement_t *smnt) { - if (smnt->type() & statementType::function) { - addStructToFunctionArgs( - smnt->to().function() - ); - return; - } - - scope_t &scope = smnt->to().scope; - - keywordMap::iterator it = scope.keywords.begin(); - while (it != scope.keywords.end()) { - keyword_t &keyword = *(it->second); - - if (keyword.type() & keywordType::variable) { - addStructToVariable(keyword.to().variable); - } else if (keyword.type() & keywordType::function) { - addStructToFunctionArgs( - keyword.to().function - ); - } - - ++it; - } + statementArray::from(root) + .nestedForEachDeclaration([&](variableDeclaration &decl) { + variable_t &var = decl.variable(); + if (var.hasAttribute("globalPtr")) { + var.add(0,global); + } + }); } void openclParser::addStructToVariable(variable_t &var) { @@ -274,7 +206,30 @@ namespace occa { statementType::blockStatements | statementType::function ) - .forEach(updateScopeStructVariables); + .forEach([&](statement_t *smnt) { + if (smnt->type() & statementType::function) { + addStructToFunctionArgs( + smnt->to().function()); + return; + } + + scope_t &scope = smnt->to().scope; + + keywordMap::iterator it = scope.keywords.begin(); + while (it != scope.keywords.end()) { + keyword_t &keyword = *(it->second); + + if (keyword.type() & keywordType::variable) { + addStructToVariable(keyword.to().variable); + } else if (keyword.type() & keywordType::function) { + addStructToFunctionArgs( + keyword.to().function + ); + } + + ++it; + } + }); } void openclParser::setupKernels() { @@ -311,18 +266,16 @@ namespace occa { } void openclParser::setKernelQualifiers(function_t &function) { - function.returnType.add(0, kernel); - const int argCount = (int) function.args.size(); - for (int ai = 0; ai < argCount; ++ai) { - variable_t &arg = *(function.args[ai]); - arg.vartype = arg.vartype.flatten(); - if (arg.vartype.isPointerType()) { - arg.add(0, global); - } + for (auto arg : function.args) { + vartype_t &type = arg->vartype; + type = type.flatten(); + if (type.isPointerType()) + arg->add(0,global); } } + } } } diff --git a/src/occa/internal/lang/modes/opencl.hpp b/src/occa/internal/lang/modes/opencl.hpp index 5b50e7372..72d035ed9 100644 --- a/src/occa/internal/lang/modes/opencl.hpp +++ b/src/occa/internal/lang/modes/opencl.hpp @@ -11,8 +11,7 @@ namespace occa { qualifier_t constant; qualifier_t kernel; qualifier_t local; - // Hack until code-transformation API is done - static qualifier_t global; + qualifier_t global; openclParser(const occa::json &settings_ = occa::json()); @@ -34,16 +33,11 @@ namespace occa { void updateConstToConstant(); void setLocalQualifiers(); - static bool sharedVariableMatcher(exprNode &expr); void setGlobalQualifiers(); - static void updateGlobalVariables(statement_t *smnt); - static void addGlobalToFunctionArgs(function_t &func); - static void addGlobalToVariable(variable_t &var); - static void updateScopeStructVariables(statement_t *smnt); - static void addStructToVariable(variable_t &var); - static void addStructToFunctionArgs(function_t &func); + void addStructToVariable(variable_t &var); + void addStructToFunctionArgs(function_t &func); void addBarriers(); diff --git a/src/occa/internal/lang/modes/withLauncher.cpp b/src/occa/internal/lang/modes/withLauncher.cpp index 7aac09270..505d28580 100644 --- a/src/occa/internal/lang/modes/withLauncher.cpp +++ b/src/occa/internal/lang/modes/withLauncher.cpp @@ -151,8 +151,9 @@ namespace occa { bool withLauncher::isLastInnerLoop(forStatement &forSmnt) { blockStatement &parent = *(forSmnt.up); for(int smntIndex = forSmnt.childIndex()+1; smntIndextype() & statementType::for_) - && parent[smntIndex]->hasAttribute("inner")) { + if (statementArray::from(*parent[smntIndex]) + .flatFilterByAttribute("inner") + .length()) { return false; } } diff --git a/src/occa/internal/lang/qualifier.cpp b/src/occa/internal/lang/qualifier.cpp index e76ffd610..168cda962 100644 --- a/src/occa/internal/lang/qualifier.cpp +++ b/src/occa/internal/lang/qualifier.cpp @@ -273,8 +273,8 @@ namespace occa { } qualifiers.push_back(qualifiers[count - 1]); - for (int i = 0; i < (count - 1); ++i) { - qualifiers[i + 1] = qualifiers[i]; + for (int i = (count - 1); i > 0; --i) { + qualifiers[i] = qualifiers[i - 1]; } qualifiers[0] = qualifier; return *this; diff --git a/src/occa/internal/modes/cuda/device.cpp b/src/occa/internal/modes/cuda/device.cpp index 37856ff3b..248d77930 100644 --- a/src/occa/internal/modes/cuda/device.cpp +++ b/src/occa/internal/modes/cuda/device.cpp @@ -291,44 +291,11 @@ namespace occa { sys::addCompilerLibraryFlags(compilerFlags); } - //---[ PTX Check Command ]-------- - std::stringstream command; - if (allProps.has("compiler_env_script")) { - command << allProps["compiler_env_script"] << " && "; - } - - command << compiler - << ' ' << compilerFlags - << " -Xptxas -v,-dlcm=cg" -#if (OCCA_OS == OCCA_WINDOWS_OS) - << " -D OCCA_OS=OCCA_WINDOWS_OS -D _MSC_VER=1800" -#endif - << " -I" << env::OCCA_DIR << "include" - << " -I" << env::OCCA_INSTALL_DIR << "include" - << " -L" << env::OCCA_INSTALL_DIR << "lib -locca" - << " -x cu -c " << sourceFilename - << " -o " << ptxBinaryFilename; - - if (!verbose) { - command << " > /dev/null 2>&1"; - } - const std::string &ptxCommand = command.str(); - if (verbose) { - io::stdout << "Compiling [" << kernelName << "]\n" << ptxCommand << "\n"; - } - -#if (OCCA_OS & (OCCA_LINUX_OS | OCCA_MACOS_OS)) - ignoreResult( system(ptxCommand.c_str()) ); -#else - ignoreResult( system(("\"" + ptxCommand + "\"").c_str()) ); -#endif - //================================ - //---[ Compiling Command ]-------- - command.str(""); + std::stringstream command; command << allProps["compiler"] << ' ' << compilerFlags - << " -ptx" + << " -cubin" #if (OCCA_OS == OCCA_WINDOWS_OS) << " -D OCCA_OS=OCCA_WINDOWS_OS -D _MSC_VER=1800" #endif diff --git a/src/occa/internal/modes/dpcpp/device.cpp b/src/occa/internal/modes/dpcpp/device.cpp index 93ed90cad..7f39f6316 100644 --- a/src/occa/internal/modes/dpcpp/device.cpp +++ b/src/occa/internal/modes/dpcpp/device.cpp @@ -44,8 +44,6 @@ namespace occa dpcppDevice = devices[deviceID]; dpcppContext = ::sycl::context(devices[deviceID]); - - std::cout << "Target Device is: " << dpcppDevice.get_info<::sycl::info::device::name>() << "\n"; } occa::json &kernelProps = properties["kernel"]; @@ -57,14 +55,18 @@ namespace occa getDpcppStream(currentStream).finish(); } - //@todo: update kernel hashing hash_t device::hash() const { if (!hash_.initialized) { std::stringstream ss; - ss << "platform: " << platformID << ' ' - << "device: " << deviceID; + auto p = dpcppDevice.get_platform(); + ss << "platform name: " << p.get_info<::sycl::info::platform::name>() + << " platform vendor: " << p.get_info<::sycl::info::platform::vendor>() + << " platform version: " << p.get_info<::sycl::info::platform::version>() + << " device name: " << dpcppDevice.get_info<::sycl::info::device::name>() + << " device vendor: " << dpcppDevice.get_info<::sycl::info::device::vendor>() + << " device version: " << dpcppDevice.get_info<::sycl::info::device::version>(); hash_ = occa::hash(ss.str()); } return hash_; @@ -73,7 +75,7 @@ namespace occa hash_t device::kernelHash(const occa::json &props) const { return ( - occa::hash(props["compiler_flags"]) ^ props["compiler_flags"]); + occa::hash(props["compiler"]) ^ props["compiler_flags"]); } lang::okl::withLauncher *device::createParser(const occa::json &props) const diff --git a/src/occa/internal/modes/dpcpp/polyfill.hpp b/src/occa/internal/modes/dpcpp/polyfill.hpp index 51eb2052d..0e0807d02 100644 --- a/src/occa/internal/modes/dpcpp/polyfill.hpp +++ b/src/occa/internal/modes/dpcpp/polyfill.hpp @@ -10,6 +10,9 @@ #include namespace sycl { + class device; + class platform; + template struct id { @@ -54,6 +57,7 @@ namespace sycl platform, name, vendor, + version, max_work_item_sizes, max_work_group_size }; @@ -121,6 +125,13 @@ namespace sycl using return_type = std::string; }; + template <> + class param_traits + { + public: + using return_type = std::string; + }; + template <> class param_traits { @@ -149,6 +160,20 @@ namespace sycl using return_type = std::string; }; + template <> + class param_traits + { + public: + using return_type = std::string; + }; + + template <> + class param_traits + { + public: + using return_type = std::string; + }; + template class param_traits { @@ -216,6 +241,8 @@ namespace sycl throw sycl::exception(); return false; } + + sycl::platform get_platform() const; }; template <> @@ -258,6 +285,14 @@ namespace sycl return "Error--DPC++ not enabled!"; } + template <> + inline info::param_traits::return_type + device::get_info() const + { + throw sycl::exception(); + return "Error--DPC++ not enabled!"; + } + template <> inline info::param_traits::return_type device::get_info() const @@ -315,6 +350,28 @@ namespace sycl return "Error--DPC++ not enabled!"; } + template <> + inline info::param_traits::return_type + platform::get_info() const + { + throw sycl::exception(); + return "Error--DPC++ not enabled!"; + } + + template <> + inline info::param_traits::return_type + platform::get_info() const + { + throw sycl::exception(); + return "Error--DPC++ not enabled!"; + } + + inline platform device::get_platform() const + { + throw sycl::exception(); + return platform(); + } + class context { public: diff --git a/src/occa/internal/modes/dpcpp/registration.cpp b/src/occa/internal/modes/dpcpp/registration.cpp index fef3a9785..a71d2fca0 100644 --- a/src/occa/internal/modes/dpcpp/registration.cpp +++ b/src/occa/internal/modes/dpcpp/registration.cpp @@ -24,8 +24,9 @@ namespace occa { auto platform_list = ::sycl::platform::get_platforms(); for (auto p : platform_list) { + std::string platform_name_str = p.get_info<::sycl::info::platform::name>(); section - .add("Platform " + toString(platform_id), p.get_info<::sycl::info::platform::name>()) + .add("Platform " + toString(platform_id), platform_name_str) .addDivider(); int device_id{0}; diff --git a/src/occa/internal/modes/opencl/device.cpp b/src/occa/internal/modes/opencl/device.cpp index ddad12f6e..f1314a632 100644 --- a/src/occa/internal/modes/opencl/device.cpp +++ b/src/occa/internal/modes/opencl/device.cpp @@ -48,6 +48,12 @@ namespace occa { compilerFlags = (std::string) kernelProps["compiler_flags"]; } + std::string ocl_c_ver = "2.0"; + if (env::var("OCCA_OPENCL_C_VERSION").size()) { + ocl_c_ver = env::var("OCCA_OPENCL_C_VERSION"); + } + compilerFlags += " -cl-std=CL" + ocl_c_ver; + kernelProps["compiler_flags"] = compilerFlags; } @@ -71,8 +77,12 @@ namespace occa { hash_t device::hash() const { if (!hash_.initialized) { std::stringstream ss; - ss << "platform: " << platformID << ' ' - << "device: " << deviceID; + ss << "platform name: " << opencl::platformName(platformID) + << " platform vendor: " << opencl::platformVendor(platformID) + << " platform version: " << opencl::platformVersion(platformID) + << " device name: " << opencl::deviceName(platformID,deviceID) + << " device vendor: " << opencl::deviceVendor(platformID,deviceID) + << " device version: " << opencl::deviceVersion(platformID,deviceID); hash_ = occa::hash(ss.str()); } return hash_; @@ -352,7 +362,7 @@ namespace occa { } udim_t device::memorySize() const { - return opencl::getDeviceMemorySize(clDevice); + return opencl::deviceGlobalMemSize(clDevice); } //================================== } diff --git a/src/occa/internal/modes/opencl/polyfill.hpp b/src/occa/internal/modes/opencl/polyfill.hpp index 85429e490..faaa18342 100644 --- a/src/occa/internal/modes/opencl/polyfill.hpp +++ b/src/occa/internal/modes/opencl/polyfill.hpp @@ -34,6 +34,7 @@ namespace occa { typedef int cl_buffer_create_type; typedef int cl_command_queue_properties; + typedef int cl_platform_info; typedef int cl_device_info; typedef int cl_device_type; typedef int cl_kernel_work_group_info; @@ -69,17 +70,23 @@ namespace occa { static cl_command_queue_properties CL_QUEUE_PROFILING_ENABLE = 0; + static cl_platform_info CL_PLATFORM_NAME = 0; + static cl_platform_info CL_PLATFORM_VENDOR = 1; + static cl_platform_info CL_PLATFORM_VERSION = 2; + static cl_device_info CL_DEVICE_GLOBAL_MEM_SIZE = 0; static cl_device_info CL_DEVICE_MAX_COMPUTE_UNITS = 1; static cl_device_info CL_DEVICE_NAME = 2; static cl_device_info CL_DEVICE_TYPE = 3; static cl_device_info CL_DEVICE_VENDOR = 4; - static cl_device_info CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS = 5; - static cl_device_info CL_DEVICE_MAX_WORK_ITEM_SIZES = 6; + static cl_device_info CL_DEVICE_VERSION = 5; + static cl_device_info CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS = 6; + static cl_device_info CL_DEVICE_MAX_WORK_ITEM_SIZES = 7; static cl_device_type CL_DEVICE_TYPE_ACCELERATOR = 0; static cl_device_type CL_DEVICE_TYPE_CPU = 1; static cl_device_type CL_DEVICE_TYPE_GPU = 2; + static cl_device_type CL_DEVICE_TYPE_ALL = 3; static cl_kernel_work_group_info CL_KERNEL_WORK_GROUP_SIZE = 0; @@ -191,6 +198,8 @@ namespace occa { size_t *param_value_size_ret) { return OCCA_OPENCL_IS_NOT_ENABLED; } + + // ---[ Platform ]-------------------- inline cl_int clGetPlatformIDs(cl_uint num_entries, cl_platform_id *platforms, @@ -198,6 +207,16 @@ namespace occa { return OCCA_OPENCL_IS_NOT_ENABLED; } + inline cl_int clGetPlatformInfo( + cl_platform_id platform, + cl_platform_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) { + + return OCCA_OPENCL_IS_NOT_ENABLED; + } + // ---[ Event ]--------------------- inline cl_int clEnqueueMarker(cl_command_queue command_queue, cl_event *event) { diff --git a/src/occa/internal/modes/opencl/registration.cpp b/src/occa/internal/modes/opencl/registration.cpp index 1bec8724b..8c7bff234 100644 --- a/src/occa/internal/modes/opencl/registration.cpp +++ b/src/occa/internal/modes/opencl/registration.cpp @@ -21,17 +21,40 @@ namespace occa { if (section.size() == 0) { int platformCount = getPlatformCount(); for (int platformId = 0; platformId < platformCount; ++platformId) { + std::string platform_name_str = platformName(platformId); + section + .add("Platform " + toString(platformId), platform_name_str) + .addDivider(); + int deviceCount = getDeviceCountInPlatform(platformId); for (int deviceId = 0; deviceId < deviceCount; ++deviceId) { - udim_t bytes = getDeviceMemorySize(platformId, deviceId); - std::string bytesStr = stringifyBytes(bytes); + std::string device_name_str = deviceName(platformId, deviceId); + info::device_type type = deviceType(platformId, deviceId); + std::string device_type_str; + switch (type) { + case info::device_type::cpu: + device_type_str = "cpu"; + break; + case info::device_type::gpu: + device_type_str = "gpu"; + break; + case info::device_type::accelerator: + device_type_str = "accelerator"; + break; + case info::device_type::all: + device_type_str = "all!?"; + break; + } + + int compute_cores = deviceCoreCount(platformId, deviceId); + udim_t global_memory_B = deviceGlobalMemSize(platformId, deviceId); + std::string global_memory_str = stringifyBytes(global_memory_B); section - .add("Device Name" , deviceName(platformId, deviceId)) - .add("Driver Vendor", info::vendor(deviceVendor(platformId, deviceId))) - .add("Platform ID" , toString(platformId)) - .add("Device ID" , toString(deviceId)) - .add("Memory" , bytesStr) + .add("Device " + toString(deviceId), device_name_str) + .add("Device Type", device_type_str) + .add("Compute Cores", toString(compute_cores)) + .add("Global Memory", global_memory_str) .addDivider(); } } diff --git a/src/occa/internal/modes/opencl/utils.cpp b/src/occa/internal/modes/opencl/utils.cpp index 3a5c22981..9dd9d1420 100644 --- a/src/occa/internal/modes/opencl/utils.cpp +++ b/src/occa/internal/modes/opencl/utils.cpp @@ -17,26 +17,6 @@ namespace occa { clProgram(NULL), clKernel(NULL) {} - namespace info { - std::string deviceType(int type) { - if (type & CPU) return "CPU"; - if (type & GPU) return "GPU"; - if (type & FPGA) return "FPGA"; - if (type & XeonPhi) return "Xeon Phi"; - - return "N/A"; - } - - std::string vendor(int type) { - if (type & Intel) return "Intel"; - if (type & AMD) return "AMD"; - if (type & NVIDIA) return "NVIDIA"; - if (type & Altera) return "Altera"; - - return "N/A"; - } - } - bool isEnabled() { cl_uint platformCount = 0; cl_int error = clGetPlatformIDs(0, NULL, &platformCount); @@ -51,17 +31,6 @@ namespace occa { return false; } - cl_device_type deviceType(int type) { - cl_device_type ret = 0; - - if (type & info::CPU) ret |= CL_DEVICE_TYPE_CPU; - if (type & info::GPU) ret |= CL_DEVICE_TYPE_GPU; - if (type & info::FPGA) ret |= CL_DEVICE_TYPE_ACCELERATOR; - if (type & info::XeonPhi) ret |= CL_DEVICE_TYPE_ACCELERATOR; - - return ret; - } - int getPlatformCount() { cl_uint platformCount; @@ -84,7 +53,95 @@ namespace occa { return ret; } - int getDeviceCount(int type) { + std::string platformStrInfo(cl_platform_id clPID, + cl_platform_info clInfo) { + size_t bytes; + + OCCA_OPENCL_ERROR("OpenCL: Getting Platform String Info", + clGetPlatformInfo(clPID, + clInfo, + 0, NULL, &bytes)); + + char *buffer = new char[bytes + 1]; + buffer[bytes] = '\0'; + + OCCA_OPENCL_ERROR("OpenCL: Getting Platform String Info", + clGetPlatformInfo(clPID, + clInfo, + bytes, buffer, NULL)); + + std::string ret = buffer; + + delete [] buffer; + + size_t firstNS = ret.size(); + size_t lastNS = ret.size(); + + size_t i; + + for (i = 0; i < ret.size(); ++i) { + if ((ret[i] != ' ') && + (ret[i] != '\t') && + (ret[i] != '\n')) { + firstNS = i; + break; + } + } + + if (i == ret.size()) { + return ""; + } + + for (i = (ret.size() - 1); i > firstNS; --i) { + if ((ret[i] != ' ') && + (ret[i] != '\t') && + (ret[i] != '\n')) { + lastNS = i; + break; + } + } + + if (i == firstNS) { + return ""; + } + return ret.substr(firstNS, (lastNS - firstNS + 1)); + } + + std::string platformName(int pID) { + cl_platform_id clPID = platformID(pID); + return platformStrInfo(clPID, CL_PLATFORM_NAME); + } + + std::string platformVendor(int pID) { + cl_platform_id clPID = platformID(pID); + return platformStrInfo(clPID, CL_PLATFORM_VENDOR); + } + + std::string platformVersion(int pID) { + cl_platform_id clPID = platformID(pID); + return platformStrInfo(clPID, CL_PLATFORM_VERSION); + } + + cl_device_type deviceType(info::device_type type) { + cl_device_type dtype; + switch (type) { + case info::device_type::cpu: + dtype = CL_DEVICE_TYPE_CPU; + break; + case info::device_type::gpu: + dtype = CL_DEVICE_TYPE_GPU; + break; + case info::device_type::accelerator: + dtype = CL_DEVICE_TYPE_ACCELERATOR; + break; + case info::device_type::all: + dtype = CL_DEVICE_TYPE_ALL; + break; + } + return dtype; + } + + int getDeviceCount(info::device_type type) { int pCount = opencl::getPlatformCount(); int ret = 0; @@ -94,17 +151,16 @@ namespace occa { return ret; } - int getDeviceCountInPlatform(int pID, int type) { + int getDeviceCountInPlatform(int pID, info::device_type type) { cl_platform_id clPID = platformID(pID); cl_uint deviceCount = 0; - clGetDeviceIDs(clPID, deviceType(type), 0, NULL, &deviceCount); return deviceCount; } - cl_device_id deviceID(int pID, int dID, int type) { + cl_device_id deviceID(int pID, int dID, info::device_type type) { cl_device_id *devices = new cl_device_id[dID + 1]; cl_platform_id clPID = platformID(pID); @@ -180,47 +236,33 @@ namespace occa { return deviceStrInfo(clDID, CL_DEVICE_NAME); } - int deviceType(int pID, int dID) { + info::device_type deviceType(int pID, int dID) { cl_device_id clDID = deviceID(pID, dID); - int ret = 0; - cl_device_type clDeviceType; - OCCA_OPENCL_ERROR("OpenCL: Get Device Type", - clGetDeviceInfo(clDID, - CL_DEVICE_TYPE, - sizeof(clDeviceType), &clDeviceType, NULL)); + OCCA_OPENCL_ERROR( + "OpenCL: Get Device Type", + clGetDeviceInfo(clDID,CL_DEVICE_TYPE,sizeof(clDeviceType), &clDeviceType, NULL) + ); - if (clDeviceType & CL_DEVICE_TYPE_CPU) { - ret |= info::CPU; - } else if (clDeviceType & CL_DEVICE_TYPE_GPU) { - ret |= info::GPU; - } - return ret; + if (clDeviceType & CL_DEVICE_TYPE_CPU) + return info::device_type::cpu; + if (clDeviceType & CL_DEVICE_TYPE_GPU) + return info::device_type::gpu; + if (clDeviceType & CL_DEVICE_TYPE_ACCELERATOR) + return info::device_type::accelerator; + + return info::device_type::all; } - int deviceVendor(int pID, int dID) { + std::string deviceVendor(int pID, int dID) { cl_device_id clDID = deviceID(pID, dID); - int ret = 0; - - std::string vendor = deviceStrInfo(clDID, CL_DEVICE_VENDOR); - - if (vendor.find("AMD") != std::string::npos || - vendor.find("Advanced Micro Devices") != std::string::npos || - vendor.find("ATI") != std::string::npos) { - - ret |= info::AMD; - } else if (vendor.find("Intel") != std::string::npos) { - ret |= info::Intel; - } else if (vendor.find("Altera") != std::string::npos) { - ret |= info::Altera; - } else if (vendor.find("Nvidia") != std::string::npos || - vendor.find("NVIDIA") != std::string::npos) { - - ret |= info::NVIDIA; - } + return deviceStrInfo(clDID, CL_DEVICE_VENDOR); + } - return ret; + std::string deviceVersion(int pID, int dID) { + cl_device_id clDID = deviceID(pID, dID); + return deviceStrInfo(clDID, CL_DEVICE_VERSION); } int deviceCoreCount(int pID, int dID) { @@ -235,7 +277,7 @@ namespace occa { return ret; } - udim_t getDeviceMemorySize(cl_device_id dID) { + udim_t deviceGlobalMemSize(cl_device_id dID) { cl_ulong ret; OCCA_OPENCL_ERROR("OpenCL: Get Device Available Memory", @@ -246,10 +288,10 @@ namespace occa { return ret; } - udim_t getDeviceMemorySize(int pID, int dID) { + udim_t deviceGlobalMemSize(int pID, int dID) { cl_device_id clDID = deviceID(pID, dID); - return getDeviceMemorySize(clDID); + return deviceGlobalMemSize(clDID); } void buildProgramFromSource(info_t &info, diff --git a/src/occa/internal/modes/opencl/utils.hpp b/src/occa/internal/modes/opencl/utils.hpp index 40748f085..1f8217383 100644 --- a/src/occa/internal/modes/opencl/utils.hpp +++ b/src/occa/internal/modes/opencl/utils.hpp @@ -21,50 +21,38 @@ namespace occa { }; namespace info { - static const int CPU = (1 << 0); - static const int GPU = (1 << 1); - static const int FPGA = (1 << 3); - static const int XeonPhi = (1 << 2); - static const int anyType = (CPU | GPU | FPGA | XeonPhi); - - static const int Intel = (1 << 4); - static const int AMD = (1 << 5); - static const int Altera = (1 << 6); - static const int NVIDIA = (1 << 7); - static const int anyVendor = (Intel | AMD | Altera | NVIDIA); - - static const int any = (anyType | anyVendor); - - std::string deviceType(int type); - std::string vendor(int type); + enum class device_type { + cpu, gpu, accelerator, all = cpu | gpu | accelerator + }; } bool isEnabled(); - cl_device_type deviceType(int type); - int getPlatformCount(); - cl_platform_id platformID(int pID); - int getDeviceCount(int type = info::any); - int getDeviceCountInPlatform(int pID, int type = info::any); + std::string platformStrInfo(cl_platform_id clPID, cl_platform_info clInfo); + std::string platformName(int pID); + std::string platformVendor(int pID); + std::string platformVersion(int pID); - cl_device_id deviceID(int pID, int dID, int type = info::any); + int getDeviceCount(info::device_type deviceType = info::device_type::all); + int getDeviceCountInPlatform(int pID, info::device_type type = info::device_type::all); - std::string deviceStrInfo(cl_device_id clDID, - cl_device_info clInfo); + cl_device_id deviceID(int pID, int dID, info::device_type deviceType = info::device_type::all); + std::string deviceStrInfo(cl_device_id clDID, cl_device_info clInfo); std::string deviceName(int pID, int dID); + std::string deviceVendor(int pID, int dID); + std::string deviceVersion(int pID, int dID); - int deviceType(int pID, int dID); - - int deviceVendor(int pID, int dID); + cl_device_type deviceType(info::device_type type); + info::device_type deviceType(int pID, int dID); int deviceCoreCount(int pID, int dID); - udim_t getDeviceMemorySize(cl_device_id dID); - udim_t getDeviceMemorySize(int pID, int dID); + udim_t deviceGlobalMemSize(cl_device_id dID); + udim_t deviceGlobalMemSize(int pID, int dID); void buildProgramFromSource(info_t &info, const std::string &source, diff --git a/src/occa/internal/utils.hpp b/src/occa/internal/utils.hpp index 1b35ab6cb..f8c45236b 100644 --- a/src/occa/internal/utils.hpp +++ b/src/occa/internal/utils.hpp @@ -10,7 +10,6 @@ #include #include #include -#include #include #endif diff --git a/src/occa/internal/utils/env.cpp b/src/occa/internal/utils/env.cpp index 5cc8c34d7..75263915f 100644 --- a/src/occa/internal/utils/env.cpp +++ b/src/occa/internal/utils/env.cpp @@ -1,19 +1,18 @@ #include +#include #include #include #include #include -#include namespace occa { json& settings() { - static tls settings_; - json& props = settings_.value(); + thread_local json props; if (!props.size()) { props = env::baseSettings(); } - return props; + return std::ref(props); } namespace env { diff --git a/src/occa/internal/utils/tls.hpp b/src/occa/internal/utils/tls.hpp deleted file mode 100644 index 8951bc5e5..000000000 --- a/src/occa/internal/utils/tls.hpp +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef OCCA_INTERNAL_UTILS_TLS_HEADER -#define OCCA_INTERNAL_UTILS_TLS_HEADER - -#include - -#include - -#if (OCCA_OS & (OCCA_LINUX_OS | OCCA_MACOS_OS)) -# include -#else -# include -# include -#endif - -namespace occa { - template - class tls { - private: -#if (OCCA_OS & (OCCA_LINUX_OS | OCCA_MACOS_OS)) - pthread_key_t pkey; -#else - thread_local TM value_; -#endif - - public: - tls(const TM &val = TM()); - - template - tls(const tls &t); - - ~tls(); - - template - const TM2& operator = (const TM2 &val); - template - const TM2& operator = (const tls &t); - - TM& value(); - const TM& value() const; - - operator TM (); - operator TM () const; - }; - - template - std::ostream& operator << (std::ostream &out, - const tls &t); -} - -#include "tls.tpp" - -#endif diff --git a/src/occa/internal/utils/tls.tpp b/src/occa/internal/utils/tls.tpp deleted file mode 100644 index 800617db2..000000000 --- a/src/occa/internal/utils/tls.tpp +++ /dev/null @@ -1,90 +0,0 @@ -namespace occa { - template - tls::tls(const TM &val) { -#if (OCCA_OS & (OCCA_LINUX_OS | OCCA_MACOS_OS)) - pthread_key_create(&pkey, NULL); - pthread_setspecific(pkey, new TM(val)); -#else - value_ = val; -#endif - } - - template - template - tls::tls(const tls &t) { -#if (OCCA_OS & (OCCA_LINUX_OS | OCCA_MACOS_OS)) - pthread_key_create(&pkey, NULL); - pthread_setspecific(pkey, new TM(t.value())); -#else - value_ = t.value_; -#endif - } - - template - tls::~tls() { -#if (OCCA_OS & (OCCA_LINUX_OS | OCCA_MACOS_OS)) - delete (TM*) pthread_getspecific(pkey); - pthread_key_delete(pkey); -#endif - } - - template - template - const TM2& tls::operator = (const TM2 &val) { -#if (OCCA_OS & (OCCA_LINUX_OS | OCCA_MACOS_OS)) - delete &(value()); - pthread_setspecific(pkey, new TM(val)); -#else - value_ = val; -#endif - return val; - } - - template - template - const TM2& tls::operator = (const tls &t) { - const TM2 &val = t.value(); -#if (OCCA_OS & (OCCA_LINUX_OS | OCCA_MACOS_OS)) - delete &(value()); - pthread_setspecific(pkey, new TM(val)); -#else - value_ = val; -#endif - return val; - } - - template - TM& tls::value() { -#if (OCCA_OS & (OCCA_LINUX_OS | OCCA_MACOS_OS)) - return *((TM*) pthread_getspecific(pkey)); -#else - return value_; -#endif - } - - template - const TM& tls::value() const { -#if (OCCA_OS & (OCCA_LINUX_OS | OCCA_MACOS_OS)) - return *((TM*) pthread_getspecific(pkey)); -#else - return value_; -#endif - } - - template - tls::operator TM () { - return value(); - } - - template - tls::operator TM () const { - return value(); - } - - template - std::ostream& operator << (std::ostream &out, - const tls &t) { - out << t.value(); - return out; - } -} diff --git a/src/types/json.cpp b/src/types/json.cpp index 040ef7c7d..1e6dce7f6 100644 --- a/src/types/json.cpp +++ b/src/types/json.cpp @@ -10,6 +10,7 @@ namespace occa { json::json(const std::string &name, const primitive &value) { + type = object_; (*this)[name] = value; } diff --git a/src/types/primitive.cpp b/src/types/primitive.cpp index 1d1b83e2a..fc9a06964 100644 --- a/src/types/primitive.cpp +++ b/src/types/primitive.cpp @@ -690,7 +690,7 @@ namespace occa { primitive primitive::bitAnd(const primitive &a, const primitive &b) { const int retType = (a.type > b.type) ? a.type : b.type; switch(retType) { - case primitiveType::bool_ : return primitive(a.to() & b.to()); + case primitiveType::bool_ : OCCA_FORCE_ERROR("Cannot apply operator & to bool type"); break; case primitiveType::int8_ : return primitive(a.to() & b.to()); case primitiveType::uint8_ : return primitive(a.to() & b.to()); case primitiveType::int16_ : return primitive(a.to() & b.to()); @@ -709,7 +709,7 @@ namespace occa { primitive primitive::bitOr(const primitive &a, const primitive &b) { const int retType = (a.type > b.type) ? a.type : b.type; switch(retType) { - case primitiveType::bool_ : return primitive(a.to() | b.to()); + case primitiveType::bool_ : OCCA_FORCE_ERROR("Cannot apply operator | to bool type"); break; case primitiveType::int8_ : return primitive(a.to() | b.to()); case primitiveType::uint8_ : return primitive(a.to() | b.to()); case primitiveType::int16_ : return primitive(a.to() | b.to()); @@ -728,7 +728,7 @@ namespace occa { primitive primitive::xor_(const primitive &a, const primitive &b) { const int retType = (a.type > b.type) ? a.type : b.type; switch(retType) { - case primitiveType::bool_ : return primitive(a.to() ^ b.to()); + case primitiveType::bool_ : OCCA_FORCE_ERROR("Cannot apply operator ^ to bool type"); break; case primitiveType::int8_ : return primitive(a.to() ^ b.to()); case primitiveType::uint8_ : return primitive(a.to() ^ b.to()); case primitiveType::int16_ : return primitive(a.to() ^ b.to()); @@ -888,7 +888,7 @@ namespace occa { primitive& primitive::bitAndEq(primitive &a, const primitive &b) { const int retType = (a.type > b.type) ? a.type : b.type; switch(retType) { - case primitiveType::bool_ : a = (a.to() & b.to()); break; + case primitiveType::bool_ : OCCA_FORCE_ERROR("Cannot apply operator &= to bool type"); break; case primitiveType::int8_ : a = (a.to() & b.to()); break; case primitiveType::uint8_ : a = (a.to() & b.to()); break; case primitiveType::int16_ : a = (a.to() & b.to()); break; @@ -897,8 +897,8 @@ namespace occa { case primitiveType::uint32_ : a = (a.to() & b.to()); break; case primitiveType::int64_ : a = (a.to() & b.to()); break; case primitiveType::uint64_ : a = (a.to() & b.to()); break; - case primitiveType::float_ : OCCA_FORCE_ERROR("Cannot apply operator & to float type"); break; - case primitiveType::double_ : OCCA_FORCE_ERROR("Cannot apply operator & to double type"); break; + case primitiveType::float_ : OCCA_FORCE_ERROR("Cannot apply operator &= to float type"); break; + case primitiveType::double_ : OCCA_FORCE_ERROR("Cannot apply operator &= to double type"); break; default: ; } return a; @@ -907,7 +907,7 @@ namespace occa { primitive& primitive::bitOrEq(primitive &a, const primitive &b) { const int retType = (a.type > b.type) ? a.type : b.type; switch(retType) { - case primitiveType::bool_ : a = (a.to() | b.to()); break; + case primitiveType::bool_ : OCCA_FORCE_ERROR("Cannot apply operator |= to bool type"); break; case primitiveType::int8_ : a = (a.to() | b.to()); break; case primitiveType::uint8_ : a = (a.to() | b.to()); break; case primitiveType::int16_ : a = (a.to() | b.to()); break; @@ -916,8 +916,8 @@ namespace occa { case primitiveType::uint32_ : a = (a.to() | b.to()); break; case primitiveType::int64_ : a = (a.to() | b.to()); break; case primitiveType::uint64_ : a = (a.to() | b.to()); break; - case primitiveType::float_ : OCCA_FORCE_ERROR("Cannot apply operator | to float type"); break; - case primitiveType::double_ : OCCA_FORCE_ERROR("Cannot apply operator | to double type"); break; + case primitiveType::float_ : OCCA_FORCE_ERROR("Cannot apply operator |= to float type"); break; + case primitiveType::double_ : OCCA_FORCE_ERROR("Cannot apply operator |= to double type"); break; default: ; } return a; @@ -926,7 +926,7 @@ namespace occa { primitive& primitive::xorEq(primitive &a, const primitive &b) { const int retType = (a.type > b.type) ? a.type : b.type; switch(retType) { - case primitiveType::bool_ : a = (a.to() ^ b.to()); break; + case primitiveType::bool_ : OCCA_FORCE_ERROR("Cannot apply operator ^= to bool type"); break; case primitiveType::int8_ : a = (a.to() ^ b.to()); break; case primitiveType::uint8_ : a = (a.to() ^ b.to()); break; case primitiveType::int16_ : a = (a.to() ^ b.to()); break; @@ -935,8 +935,8 @@ namespace occa { case primitiveType::uint32_ : a = (a.to() ^ b.to()); break; case primitiveType::int64_ : a = (a.to() ^ b.to()); break; case primitiveType::uint64_ : a = (a.to() ^ b.to()); break; - case primitiveType::float_ : OCCA_FORCE_ERROR("Cannot apply operator ^ to float type"); break; - case primitiveType::double_ : OCCA_FORCE_ERROR("Cannot apply operator ^ to double type"); break; + case primitiveType::float_ : OCCA_FORCE_ERROR("Cannot apply operator ^= to float type"); break; + case primitiveType::double_ : OCCA_FORCE_ERROR("Cannot apply operator ^= to double type"); break; default: ; } return a; diff --git a/tests/src/math/fpMath.cpp b/tests/src/math/fpMath.cpp new file mode 100644 index 000000000..e45f31233 --- /dev/null +++ b/tests/src/math/fpMath.cpp @@ -0,0 +1,119 @@ +#include +#include + +#include "occa.hpp" + +std::vector arg_types = {"float","double"}; + +std::string unary_args = "x"; +std::string binary_args = "x,y"; +std::string ternary_args = "x,y,z"; + +std::vector unary_functions = { + "fabs", + "sqrt", + "cbrt", + "cos", + "sin", + "tan", + "acos", + "asin", + "atan", + "cosh", + "sinh", + "tanh", + "acosh", + "asinh", + "atanh", + "exp", + "log" +}; + +std::vector binary_functions = { + "fmax", + "fmin", + "hypot", + "pow" +}; + +std::vector ternary_functions = {"fma"}; + +std::string kernel_front_half = +"@kernel \n" +"void f(const int dummy_arg) { \n" +" @outer \n" +" for (int b=0; b<1; ++b) { \n" +" @inner \n" +" for (int t=0; t<1; ++t) { \n" +; + +std::string kernel_back_half = +" } \n" +" } \n" +"} \n" +; + +void testUnaryFunctions(const occa::device& d) { + for (auto fp_type : arg_types) { + std::string arg_decl = + " " + fp_type + " " + unary_args + ";\n"; + for(auto func : unary_functions) { + std::string function_call = + " " + fp_type + " w = " + func + "(" + unary_args + ");\n"; + std::string kernel_src = + kernel_front_half + arg_decl + function_call +kernel_back_half; + + occa::kernel k = d.buildKernelFromString(kernel_src,"f"); + } + } +} + +void testBinaryFunctions(const occa::device& d) { + for (auto fp_type : arg_types) { + std::string arg_decl = + " " + fp_type + " " + binary_args + ";\n"; + for(auto func : binary_functions) { + std::string function_call = + " " + fp_type + " w = " + func + "(" + binary_args + ");\n"; + std::string kernel_src = + kernel_front_half + arg_decl + function_call +kernel_back_half; + + occa::kernel k = d.buildKernelFromString(kernel_src,"f"); + } + } +} + +void testTernaryFunctions(const occa::device& d) { + for (auto fp_type : arg_types) { + std::string arg_decl = + " " + fp_type + " " + ternary_args + ";\n"; + for(auto func : ternary_functions) { + std::string function_call = + " " + fp_type + " w = " + func + "(" + ternary_args + ");\n"; + std::string kernel_src = + kernel_front_half + arg_decl + function_call +kernel_back_half; + + occa::kernel k = d.buildKernelFromString(kernel_src,"f"); + } + } +} + +int main() { + std::vector devices = { + occa::device({{"mode", "Serial"}}), + occa::device({{"mode", "OpenMP"}}), + occa::device({{"mode", "CUDA"},{"device_id", 0}}), + occa::device({{"mode", "HIP"},{"device_id", 0}}), + occa::device({{"mode", "OpenCL"},{"platform_id",0},{"device_id", 0}}), + occa::device({{"mode", "dpcpp"},{"platform_id",0},{"device_id", 0}}) + }; + + for(auto &d : devices) { + std::cout << "Testing mode: " << d.mode() << "\n"; + testUnaryFunctions(d); + testBinaryFunctions(d); + testTernaryFunctions(d); + } + + return 0; +} \ No newline at end of file diff --git a/tests/src/math/intMath.cpp b/tests/src/math/intMath.cpp new file mode 100644 index 000000000..204239606 --- /dev/null +++ b/tests/src/math/intMath.cpp @@ -0,0 +1,77 @@ +#include +#include + +#include "occa.hpp" + +std::vector arg_types = {"int"}; + +std::string unary_args = "x"; +std::string binary_args = "x,y"; + +std::vector unary_functions = {"abs"}; + +std::vector binary_functions = {"max","min"}; + +std::string kernel_front_half = +"@kernel \n" +"void f(const int dummy_arg) { \n" +" @outer \n" +" for (int b=0; b<1; ++b) { \n" +" @inner \n" +" for (int t=0; t<1; ++t) { \n" +; + +std::string kernel_back_half = +" } \n" +" } \n" +"} \n" +; + +void testUnaryFunctions(const occa::device& d) { + for (auto&& int_type : arg_types) { + const std::string arg_decl = + " " + int_type + " " + unary_args + "; \n"; + for (auto&& func : unary_functions) { + const std::string function_call = + " " + int_type + " w = " + func + "(" + unary_args + "); \n"; + const std::string kernel_src = + kernel_front_half + arg_decl + function_call +kernel_back_half; + + occa::kernel k = d.buildKernelFromString(kernel_src,"f"); + } + } +} + +void testBinaryFunctions(const occa::device& d) { + for (auto&& int_type : arg_types) { + const std::string arg_decl = + " " + int_type + " " + binary_args + "; \n"; + for (auto&& func : binary_functions) { + const std::string function_call = + " " + int_type + " w = " + func + "(" + binary_args + "); \n"; + const std::string kernel_src = + kernel_front_half + arg_decl + function_call +kernel_back_half; + + occa::kernel k = d.buildKernelFromString(kernel_src,"f"); + } + } +} + +int main() { + std::vector devices = { + occa::device({{"mode", "Serial"}}), + occa::device({{"mode", "OpenMP"}}), + occa::device({{"mode", "CUDA"},{"device_id", 0}}), + occa::device({{"mode", "HIP"},{"device_id", 0}}), + occa::device({{"mode", "OpenCL"},{"platform_id",0},{"device_id", 0}}), + occa::device({{"mode", "dpcpp"},{"platform_id",0},{"device_id", 0}}) + }; + + for(auto &d : devices) { + std::cout << "Testing mode: " << d.mode() << "\n"; + testUnaryFunctions(d); + testBinaryFunctions(d); + } + + return 0; +} \ No newline at end of file diff --git a/tests/src/types/primitive.cpp b/tests/src/types/primitive.cpp index af99661d8..cd7e6ed35 100644 --- a/tests/src/types/primitive.cpp +++ b/tests/src/types/primitive.cpp @@ -17,6 +17,12 @@ void testLeftIncrement(); void testRightIncrement(); void testLeftDecrement(); void testRightDecrement(); +void testBitAnd(); +void testBitOr(); +void testBitXor(); +void testBitAndEq(); +void testBitOrEq(); +void testBitXorEq(); int main(const int argc, const char **argv) { testInit(); @@ -32,6 +38,12 @@ int main(const int argc, const char **argv) { testRightIncrement(); testLeftDecrement(); testRightDecrement(); + void testBitAnd(); + void testBitOr(); + void testBitXor(); + void testBitAndEq(); + void testBitOrEq(); + void testBitXorEq(); return 0; } @@ -495,3 +507,339 @@ void testRightDecrement() { occa::primitive::rightDecrement(p); ASSERT_EQ((double) 6 ,(double) p); } + +void testBitAnd() { + occa::primitive p,q,r; + + p = occa::primitive(true); + q = occa::primitive(false); + ASSERT_THROW(occa::primitive::bitAnd(p,q)); + + p = occa::primitive((uint8_t) 7); + q = occa::primitive((uint8_t) 0b00000111); + r = occa::primitive::bitAnd(p,q); + ASSERT_EQ(uint8_t(7),(uint8_t) r); + + p = occa::primitive((uint16_t) 7); + q = occa::primitive((uint16_t) 0b0000000000000111); + r = occa::primitive::bitAnd(p,q); + ASSERT_EQ(uint16_t(7),(uint16_t) r); + + p = occa::primitive((uint32_t) 7); + q = occa::primitive((uint32_t) 0b00000000000000000000000000000111); + r = occa::primitive::bitAnd(p,q); + ASSERT_EQ(uint32_t(7),(uint32_t) r); + + p = occa::primitive((uint64_t) 7); + q = occa::primitive((uint64_t) 0b0000000000000000000000000000000000000000000000000000000000000111); + r = occa::primitive::bitAnd(p,q); + ASSERT_EQ(uint64_t(7),(uint64_t) r); + + p = occa::primitive((int8_t) 7); + q = occa::primitive((int8_t) 0b00000111); + r = occa::primitive::bitAnd(p,q); + ASSERT_EQ(int8_t(7),(int8_t) r); + + p = occa::primitive((int16_t) 7); + q = occa::primitive((int16_t) 0b0000000000000111); + r = occa::primitive::bitAnd(p,q); + ASSERT_EQ(int16_t(7),(int16_t) r); + + p = occa::primitive((int32_t) 7); + q = occa::primitive((int32_t) 0b00000000000000000000000000000111); + r = occa::primitive::bitAnd(p,q); + ASSERT_EQ(int32_t(7),(int32_t) r); + + p = occa::primitive((int64_t) 7); + q = occa::primitive((int64_t) 0b0000000000000000000000000000000000000000000000000000000000000111); + r = occa::primitive::bitAnd(p,q); + ASSERT_EQ(int64_t(7),(int64_t) r); + + p = occa::primitive((float) 7); + q = occa::primitive((float) 1); + ASSERT_THROW(occa::primitive::bitAnd(p,q)); + + p = occa::primitive((double) 7); + q = occa::primitive((double) 1); + ASSERT_THROW(occa::primitive::bitAnd(p,q)); +} + +void testBitOr() { + occa::primitive p,q,r; + + p = occa::primitive(true); + q = occa::primitive(false); + ASSERT_THROW(occa::primitive::bitOr(p,q)); + + p = occa::primitive((uint8_t) 7); + q = occa::primitive((uint8_t) 0b00001111); + r = occa::primitive::bitOr(p,q); + ASSERT_EQ(uint8_t(15),(uint8_t) r); + + p = occa::primitive((uint16_t) 7); + q = occa::primitive((uint16_t) 0b0000000000001111); + r = occa::primitive::bitOr(p,q); + ASSERT_EQ(uint16_t(15),(uint16_t) r); + + p = occa::primitive((uint32_t) 7); + q = occa::primitive((uint32_t) 0b00000000000000000000000000001111); + r = occa::primitive::bitOr(p,q); + ASSERT_EQ(uint32_t(15),(uint32_t) r); + + p = occa::primitive((uint64_t) 7); + q = occa::primitive((uint64_t) 0b0000000000000000000000000000000000000000000000000000000000001111); + r = occa::primitive::bitOr(p,q); + ASSERT_EQ(uint64_t(15),(uint64_t) r); + + p = occa::primitive((int8_t) 7); + q = occa::primitive((int8_t) 0b00001111); + r = occa::primitive::bitOr(p,q); + ASSERT_EQ(int8_t(15),(int8_t) r); + + p = occa::primitive((int16_t) 7); + q = occa::primitive((int16_t) 0b0000000000001111); + r = occa::primitive::bitOr(p,q); + ASSERT_EQ(int16_t(15),(int16_t) r); + + p = occa::primitive((int32_t) 7); + q = occa::primitive((int32_t) 0b00000000000000000000000000001111); + r = occa::primitive::bitOr(p,q); + ASSERT_EQ(int32_t(15),(int32_t) r); + + p = occa::primitive((int64_t) 7); + q = occa::primitive((int64_t) 0b0000000000000000000000000000000000000000000000000000000000001111); + r = occa::primitive::bitOr(p,q); + ASSERT_EQ(int64_t(15),(int64_t) r); + + p = occa::primitive((float) 7); + q = occa::primitive((float) 1); + ASSERT_THROW(occa::primitive::bitOr(p,q)); + + p = occa::primitive((double) 7); + q = occa::primitive((double) 1); + ASSERT_THROW(occa::primitive::bitOr(p,q)); +} + +void testBitXor() { + occa::primitive p,q,r; + + p = occa::primitive(true); + q = occa::primitive(false); + ASSERT_THROW(occa::primitive::xor_(p,q)); + + p = occa::primitive((uint8_t) 7); + q = occa::primitive((uint8_t) 0b00000111); + r = occa::primitive::xor_(p,q); + ASSERT_EQ(uint8_t(0),(uint8_t) r); + + p = occa::primitive((uint16_t) 7); + q = occa::primitive((uint16_t) 0b0000000000000111); + r = occa::primitive::xor_(p,q); + ASSERT_EQ(uint16_t(0),(uint16_t) r); + + p = occa::primitive((uint32_t) 7); + q = occa::primitive((uint32_t) 0b00000000000000000000000000000111); + r = occa::primitive::xor_(p,q); + ASSERT_EQ(uint32_t(0),(uint32_t) r); + + p = occa::primitive((uint64_t) 7); + q = occa::primitive((uint64_t) 0b0000000000000000000000000000000000000000000000000000000000000111); + r = occa::primitive::xor_(p,q); + ASSERT_EQ(uint64_t(0),(uint64_t) r); + + p = occa::primitive((int8_t) 7); + q = occa::primitive((int8_t) 0b00000111); + r = occa::primitive::xor_(p,q); + ASSERT_EQ(int8_t(0),(int8_t) r); + + p = occa::primitive((int16_t) 7); + q = occa::primitive((int16_t) 0b0000000000000111); + r = occa::primitive::xor_(p,q); + ASSERT_EQ(int16_t(0),(int16_t) r); + + p = occa::primitive((int32_t) 7); + q = occa::primitive((int32_t) 0b00000000000000000000000000000111); + r = occa::primitive::xor_(p,q); + ASSERT_EQ(int32_t(0),(int32_t) r); + + p = occa::primitive((int64_t) 7); + q = occa::primitive((int64_t) 0b0000000000000000000000000000000000000000000000000000000000000111); + r = occa::primitive::xor_(p,q); + ASSERT_EQ(int64_t(0),(int64_t) r); + + p = occa::primitive((float) 7); + q = occa::primitive((float) 1); + ASSERT_THROW(occa::primitive::xor_(p,q)); + + p = occa::primitive((double) 7); + q = occa::primitive((double) 1); + ASSERT_THROW(occa::primitive::xor_(p,q)); +} + +void testBitAndEq() { + occa::primitive p,q; + + p = occa::primitive(true); + q = occa::primitive(false); + ASSERT_THROW(occa::primitive::bitAndEq(p,q)); + + p = occa::primitive((uint8_t) 7); + q = occa::primitive((uint8_t) 0b00000111); + p = occa::primitive::bitAndEq(p,q); + ASSERT_EQ(uint8_t(7),(uint8_t) p); + + p = occa::primitive((uint16_t) 7); + q = occa::primitive((uint16_t) 0b0000000000000111); + p = occa::primitive::bitAndEq(p,q); + ASSERT_EQ(uint16_t(7),(uint16_t) p); + + p = occa::primitive((uint32_t) 7); + q = occa::primitive((uint32_t) 0b00000000000000000000000000000111); + p = occa::primitive::bitAndEq(p,q); + ASSERT_EQ(uint32_t(7),(uint32_t) p); + + p = occa::primitive((uint64_t) 7); + q = occa::primitive((uint64_t) 0b0000000000000000000000000000000000000000000000000000000000000111); + p = occa::primitive::bitAndEq(p,q); + ASSERT_EQ(uint64_t(7),(uint64_t) p); + + p = occa::primitive((int8_t) 7); + q = occa::primitive((int8_t) 0b00000111); + p = occa::primitive::bitAndEq(p,q); + ASSERT_EQ(int8_t(7),(int8_t) p); + + p = occa::primitive((int16_t) 7); + q = occa::primitive((int16_t) 0b0000000000000111); + p = occa::primitive::bitAndEq(p,q); + ASSERT_EQ(int16_t(7),(int16_t) p); + + p = occa::primitive((int32_t) 7); + q = occa::primitive((int32_t) 0b00000000000000000000000000000111); + p = occa::primitive::bitAndEq(p,q); + ASSERT_EQ(int32_t(7),(int32_t) p); + + p = occa::primitive((int64_t) 7); + q = occa::primitive((int64_t) 0b0000000000000000000000000000000000000000000000000000000000000111); + p = occa::primitive::bitAndEq(p,q); + ASSERT_EQ(int64_t(7),(int64_t) p); + + p = occa::primitive((float) 7); + q = occa::primitive((float) 1); + ASSERT_THROW(occa::primitive::bitAndEq(p,q)); + + p = occa::primitive((double) 7); + q = occa::primitive((double) 1); + ASSERT_THROW(occa::primitive::bitAndEq(p,q)); +} + +void testBitOrEq() { + occa::primitive p,q; + + p = occa::primitive(true); + q = occa::primitive(false); + ASSERT_THROW(occa::primitive::bitOrEq(p,q)); + + p = occa::primitive((uint8_t) 7); + q = occa::primitive((uint8_t) 0b00001111); + p = occa::primitive::bitOrEq(p,q); + ASSERT_EQ(uint8_t(15),(uint8_t) p); + + p = occa::primitive((uint16_t) 7); + q = occa::primitive((uint16_t) 0b0000000000001111); + p = occa::primitive::bitOrEq(p,q); + ASSERT_EQ(uint16_t(15),(uint16_t) p); + + p = occa::primitive((uint32_t) 7); + q = occa::primitive((uint32_t) 0b00000000000000000000000000001111); + p = occa::primitive::bitOrEq(p,q); + ASSERT_EQ(uint32_t(15),(uint32_t) p); + + p = occa::primitive((uint64_t) 7); + q = occa::primitive((uint64_t) 0b0000000000000000000000000000000000000000000000000000000000001111); + p = occa::primitive::bitOrEq(p,q); + ASSERT_EQ(uint64_t(15),(uint64_t) p); + + p = occa::primitive((int8_t) 7); + q = occa::primitive((int8_t) 0b00001111); + p = occa::primitive::bitOrEq(p,q); + ASSERT_EQ(int8_t(15),(int8_t) p); + + p = occa::primitive((int16_t) 7); + q = occa::primitive((int16_t) 0b0000000000001111); + p = occa::primitive::bitOrEq(p,q); + ASSERT_EQ(int16_t(15),(int16_t) p); + + p = occa::primitive((int32_t) 7); + q = occa::primitive((int32_t) 0b00000000000000000000000000001111); + p = occa::primitive::bitOrEq(p,q); + ASSERT_EQ(int32_t(15),(int32_t) p); + + p = occa::primitive((int64_t) 7); + q = occa::primitive((int64_t) 0b0000000000000000000000000000000000000000000000000000000000001111); + p = occa::primitive::bitOrEq(p,q); + ASSERT_EQ(int64_t(15),(int64_t) p); + + p = occa::primitive((float) 7); + q = occa::primitive((float) 1); + ASSERT_THROW(occa::primitive::bitOrEq(p,q)); + + p = occa::primitive((double) 7); + q = occa::primitive((double) 1); + ASSERT_THROW(occa::primitive::bitOrEq(p,q)); +} + +void testBitXorEq() { + occa::primitive p,q; + + p = occa::primitive(true); + q = occa::primitive(false); + ASSERT_THROW(occa::primitive::xorEq(p,q)); + + p = occa::primitive((uint8_t) 7); + q = occa::primitive((uint8_t) 0b00000111); + p = occa::primitive::xorEq(p,q); + ASSERT_EQ(uint8_t(0),(uint8_t) p); + + p = occa::primitive((uint16_t) 7); + q = occa::primitive((uint16_t) 0b0000000000000111); + p = occa::primitive::xorEq(p,q); + ASSERT_EQ(uint16_t(0),(uint16_t) p); + + p = occa::primitive((uint32_t) 7); + q = occa::primitive((uint32_t) 0b00000000000000000000000000000111); + p = occa::primitive::xorEq(p,q); + ASSERT_EQ(uint32_t(0),(uint32_t) p); + + p = occa::primitive((uint64_t) 7); + q = occa::primitive((uint64_t) 0b0000000000000000000000000000000000000000000000000000000000000111); + p = occa::primitive::xorEq(p,q); + ASSERT_EQ(uint64_t(0),(uint64_t) p); + + p = occa::primitive((int8_t) 7); + q = occa::primitive((int8_t) 0b00000111); + p = occa::primitive::xorEq(p,q); + ASSERT_EQ(int8_t(0),(int8_t) p); + + p = occa::primitive((int16_t) 7); + q = occa::primitive((int16_t) 0b0000000000000111); + p = occa::primitive::xorEq(p,q); + ASSERT_EQ(int16_t(0),(int16_t) p); + + p = occa::primitive((int32_t) 7); + q = occa::primitive((int32_t) 0b00000000000000000000000000000111); + p = occa::primitive::xorEq(p,q); + ASSERT_EQ(int32_t(0),(int32_t) p); + + p = occa::primitive((int64_t) 7); + q = occa::primitive((int64_t) 0b0000000000000000000000000000000000000000000000000000000000000111); + p = occa::primitive::xorEq(p,q); + ASSERT_EQ(int64_t(0),(int64_t) p); + + p = occa::primitive((float) 7); + q = occa::primitive((float) 1); + ASSERT_THROW(occa::primitive::xorEq(p,q)); + + p = occa::primitive((double) 7); + q = occa::primitive((double) 1); + ASSERT_THROW(occa::primitive::xorEq(p,q)); +} \ No newline at end of file