Skip to content

Commit

Permalink
Compute Library v24.09
Browse files Browse the repository at this point in the history
  • Loading branch information
Jenkins committed Sep 25, 2024
1 parent de7288c commit c61bd33
Show file tree
Hide file tree
Showing 124 changed files with 3,554 additions and 1,397 deletions.
5 changes: 2 additions & 3 deletions Android.bp
Original file line number Diff line number Diff line change
Expand Up @@ -202,12 +202,9 @@ cc_library_static {
"src/core/AccessWindowAutoPadding.cpp",
"src/core/AccessWindowStatic.cpp",
"src/core/AccessWindowTranspose.cpp",
"src/core/CL/CLCommandBuffer.cpp",
"src/core/CL/CLCompatCommandBuffer.cpp",
"src/core/CL/CLCompileContext.cpp",
"src/core/CL/CLHelpers.cpp",
"src/core/CL/CLKernelLibrary.cpp",
"src/core/CL/CLMutableCommandBuffer.cpp",
"src/core/CL/CLUtils.cpp",
"src/core/CL/DefaultLWSHeuristics.cpp",
"src/core/CL/ICLKernel.cpp",
Expand Down Expand Up @@ -466,6 +463,7 @@ cc_library_static {
"src/cpu/kernels/activation/generic/neon/qasymm8.cpp",
"src/cpu/kernels/activation/generic/neon/qasymm8_signed.cpp",
"src/cpu/kernels/activation/generic/neon/qsymm16.cpp",
"src/cpu/kernels/activation/heuristics/CpuActivationKernelHeuristics.cpp",
"src/cpu/kernels/add/generic/neon/fp16.cpp",
"src/cpu/kernels/add/generic/neon/fp32.cpp",
"src/cpu/kernels/add/generic/neon/impl.cpp",
Expand Down Expand Up @@ -1032,6 +1030,7 @@ cc_library_static {
"src/runtime/experimental/operators/CpuGemmConv2d.cpp",
"src/runtime/experimental/operators/CpuGemmDirectConv2d.cpp",
"src/runtime/experimental/operators/CpuMul.cpp",
"src/runtime/experimental/operators/CpuSoftmax.cpp",
"src/runtime/experimental/operators/CpuSub.cpp",
"src/runtime/experimental/operators/CpuTranspose.cpp",
"src/runtime/experimental/operators/CpuWinogradConv2d.cpp",
Expand Down
21 changes: 13 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
list(APPEND CMAKE_MESSAGE_CONTEXT ArmCompute)
project(
ArmCompute
VERSION 41.0.0
VERSION 42.0.0
DESCRIPTION
"The Arm Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A CPU and Arm® Mali™ GPU architectures"
LANGUAGES C CXX ASM)
Expand Down Expand Up @@ -138,11 +138,12 @@ if(ARM_COMPUTE_OPENMP)
endif()

# ---------------------------------------------------------------------
# SVE Library
# SVE Object Library

add_library(arm_compute_sve "")
add_library(arm_compute_sve OBJECT "")
target_compile_options(arm_compute_sve
PRIVATE "-march=armv8.2-a+sve+fp16+dotprod")
PRIVATE "-march=armv8.2-a+sve+fp16+dotprod"
PRIVATE "-fPIC")
target_compile_definitions(arm_compute_sve PRIVATE ARM_COMPUTE_ENABLE_BF16)
target_compile_definitions(arm_compute_sve PRIVATE ENABLE_SVE)
target_compile_definitions(arm_compute_sve PRIVATE ARM_COMPUTE_ENABLE_SVE)
Expand All @@ -160,11 +161,12 @@ target_include_directories(
src/core/NEON/kernels/arm_gemm/merges)

# ---------------------------------------------------------------------
# SVE2 Library
# SVE2 Object Library

add_library(arm_compute_sve2 "")
add_library(arm_compute_sve2 OBJECT "")
target_compile_options(arm_compute_sve2
PRIVATE "-march=armv8.6-a+sve2+fp16+dotprod")
PRIVATE "-march=armv8.6-a+sve2+fp16+dotprod"
PRIVATE "-fPIC")
target_compile_definitions(arm_compute_sve2 PRIVATE ARM_COMPUTE_ENABLE_SVE2)
target_compile_definitions(arm_compute_sve2 PRIVATE ARM_COMPUTE_ENABLE_BF16)
target_compile_definitions(arm_compute_sve2 PRIVATE ENABLE_SVE)
Expand Down Expand Up @@ -205,8 +207,11 @@ target_include_directories(
target_compile_options(arm_compute PUBLIC ${COMMON_CXX_FLAGS})

add_library(ArmCompute::Core ALIAS arm_compute)

# arm_compute_sve and arm_compute_sve2 obj files will not be public in the arm_compute.so
target_link_libraries(
arm_compute PUBLIC arm_compute_sve arm_compute_sve2)
arm_compute PRIVATE $<TARGET_OBJECTS:arm_compute_sve>
PRIVATE $<TARGET_OBJECTS:arm_compute_sve2>)

# ---------------------------------------------------------------------
# Graph Library
Expand Down
24 changes: 12 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
<img src="https://raw.githubusercontent.com/ARM-software/ComputeLibrary/gh-pages/ACL_logo.png"/><br><br>
</div>

# Compute Library ![](https://img.shields.io/badge/latest_release-24.08.1-green)
# Compute Library ![](https://img.shields.io/badge/latest_release-24.09-green)


The Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A, Arm® Neoverse® and Arm® Mali™ GPUs architectures.<br>
Expand Down Expand Up @@ -37,7 +37,7 @@ Key Features:
<br>

## Documentation
[![Documentation](https://img.shields.io/badge/documentation-24.08.1-green)](https://artificial-intelligence.sites.arm.com/computelibrary/v24.08.1/index.xhtml)
[![Documentation](https://img.shields.io/badge/documentation-24.09-green)](https://artificial-intelligence.sites.arm.com/computelibrary/v24.09/index.xhtml)

> Note: The documentation includes the reference API, changelogs, build guide, contribution guide, errata, etc.
Expand All @@ -50,22 +50,22 @@ All the binaries can be downloaded from [here](https://github.com/ARM-software/C

| Platform | Operating System | Release archive (Download) |
| -------------- | ---------------- | -------------------------- |
| Raspberry Pi 4 | Linux® 32bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-armv7a-cpu-bin.tar.gz) |
| Raspberry Pi 4 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-aarch64-cpu-bin.tar.gz) |
| Odroid N2 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-aarch64-cpu-gpu-bin.tar.gz) |
| HiKey960 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-aarch64-cpu-gpu-bin.tar.gz) |
| Raspberry Pi 4 | Linux® 32bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-armv7a-cpu-bin.tar.gz) |
| Raspberry Pi 4 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-bin.tar.gz) |
| Odroid N2 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-gpu-bin.tar.gz) |
| HiKey960 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-gpu-bin.tar.gz) |

<br>

| Architecture | Operating System | Release archive (Download) |
| ------------ | ---------------- | -------------------------- |
| armv7 | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-armv7a-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-armv7a-cpu-gpu-bin.tar.gz) |
| arm64-v8a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-android-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-android-aarch64-cpu-gpu-bin.tar.gz) |
| arm64-v8a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-aarch64-cpu-gpu-bin.tar.gz) |
| armv7 | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-armv7a-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-armv7a-cpu-gpu-bin.tar.gz) |
| arm64-v8a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-android-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-android-aarch64-cpu-gpu-bin.tar.gz) |
| arm64-v8a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-gpu-bin.tar.gz) |

<br>

Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v24.08.1-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v24.08.1)
Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v24.09-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v24.09)

Pre-build binaries are generated with the following security / good coding practices related flags:
> -Wall, -Wextra, -Wformat=2, -Winit-self, -Wstrict-overflow=2, -Wswitch-default, -Woverloaded-virtual, -Wformat-security, -Wctor-dtor-privacy, -Wsign-promo, -Weffc++, -pedantic, -fstack-protector-strong
Expand Down Expand Up @@ -107,13 +107,13 @@ Pre-build binaries are generated with the following security / good coding pract

## Experimental builds

**⚠ Important** Bazel and CMake builds are experimental CPU only builds, please see the [documentation](https://artificial-intelligence.sites.arm.com/computelibrary/v24.08.1/how_to_build.xhtml) for more details.
**⚠ Important** Bazel and CMake builds are experimental CPU only builds, please see the [documentation](https://artificial-intelligence.sites.arm.com/computelibrary/v24.09/how_to_build.xhtml) for more details.

<br>

## How to contribute

Contributions to the Compute Library are more than welcome. If you are interested on contributing, please have a look at our [how to contribute guidelines](https://artificial-intelligence.sites.arm.com/computelibrary/v24.08.1/contribution_guidelines.xhtml).
Contributions to the Compute Library are more than welcome. If you are interested on contributing, please have a look at our [how to contribute guidelines](https://artificial-intelligence.sites.arm.com/computelibrary/v24.09/contribution_guidelines.xhtml).

### Developer Certificate of Origin (DCO)
Before the Compute Library accepts your contribution, you need to certify its origin and give us your permission. To manage this process we use the Developer Certificate of Origin (DCO) V1.1 (https://developercertificate.org/)
Expand Down
4 changes: 2 additions & 2 deletions SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ import codecs
import platform
import SCons

VERSION = "v24.08.1"
LIBRARY_VERSION_MAJOR = 41
VERSION = "v24.09"
LIBRARY_VERSION_MAJOR = 42
LIBRARY_VERSION_MINOR = 0
LIBRARY_VERSION_PATCH = 0
SONAME_VERSION = str(LIBRARY_VERSION_MAJOR) + "." + str(LIBRARY_VERSION_MINOR) + "." + str(LIBRARY_VERSION_PATCH)
Expand Down
8 changes: 6 additions & 2 deletions SConstruct
Original file line number Diff line number Diff line change
Expand Up @@ -281,8 +281,12 @@ if env['cppthreads']:

if env['openmp']:
env.Append(CPPDEFINES = [('ARM_COMPUTE_OPENMP_SCHEDULER', 1)])
env.Append(CXXFLAGS = ['-fopenmp'])
env.Append(LINKFLAGS = ['-fopenmp'])
if not 'windows' in env['os']:
env.Append(CXXFLAGS = ['-fopenmp'])
env.Append(LINKFLAGS = ['-fopenmp'])
else:
env.Append(CXXFLAGS = ['-openmp'])
env.Append(LINKFLAGS = ['libomp.lib'])

# Validate and define state
if env['estate'] == 'auto':
Expand Down
3 changes: 3 additions & 0 deletions arm_compute/core/utils/DataTypeUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,12 @@ inline size_t element_size_from_data_type(DataType dt)
case DataType::S32:
case DataType::F32:
return 4;
case DataType::F64:
case DataType::U64:
case DataType::S64:
return 8;
case DataType::SIZET:
return sizeof(size_t); // portable
default:
ARM_COMPUTE_ERROR("Undefined element size for given data type");
return 0;
Expand Down
10 changes: 6 additions & 4 deletions arm_compute/runtime/CL/CLScheduler.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2022 Arm Limited.
* Copyright (c) 2016-2022, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
Expand All @@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef ARM_COMPUTE_CLSCHEDULER_H
#define ARM_COMPUTE_CLSCHEDULER_H
#ifndef ACL_ARM_COMPUTE_RUNTIME_CL_CLSCHEDULER_H
#define ACL_ARM_COMPUTE_RUNTIME_CL_CLSCHEDULER_H

#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLTypes.h"
Expand Down Expand Up @@ -211,6 +211,8 @@ class CLScheduler final
bool _job_chaining_enabled;
int _job_chaining_size;
int _job_chaining_count;
unsigned int _enqueue_count;
unsigned int _flush_count;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLSCHEDULER_H */
#endif // ACL_ARM_COMPUTE_RUNTIME_CL_CLSCHEDULER_H
11 changes: 7 additions & 4 deletions arm_compute/runtime/CL/CLTensorAllocator.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2021 Arm Limited.
* Copyright (c) 2016-2021, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
Expand All @@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef ARM_COMPUTE_CLTENSORALLOCATOR_H
#define ARM_COMPUTE_CLTENSORALLOCATOR_H
#ifndef ACL_ARM_COMPUTE_RUNTIME_CL_CLTENSORALLOCATOR_H
#define ACL_ARM_COMPUTE_RUNTIME_CL_CLTENSORALLOCATOR_H

#include "arm_compute/core/CL/CLTypes.h"
#include "arm_compute/core/CL/OpenCL.h"
Expand Down Expand Up @@ -106,6 +106,9 @@ class CLTensorAllocator : public ITensorAllocator
*
*/
void free() override;

bool is_allocated() const override;

/** Import an existing memory as a tensor's backing memory
*
* @warning memory should have been created under the same context that Compute Library uses.
Expand Down Expand Up @@ -156,4 +159,4 @@ class CLTensorAllocator : public ITensorAllocator
CLInt32Array _offset; /**< Offsets array in case of quantized per channel data type */
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLTENSORALLOCATOR_H */
#endif // ACL_ARM_COMPUTE_RUNTIME_CL_CLTENSORALLOCATOR_H
14 changes: 10 additions & 4 deletions arm_compute/runtime/ITensorAllocator.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2021 Arm Limited.
* Copyright (c) 2016-2021, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
Expand All @@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef ARM_COMPUTE_ITENSORALLOCATOR_H
#define ARM_COMPUTE_ITENSORALLOCATOR_H
#ifndef ACL_ARM_COMPUTE_RUNTIME_ITENSORALLOCATOR_H
#define ACL_ARM_COMPUTE_RUNTIME_ITENSORALLOCATOR_H

#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
Expand Down Expand Up @@ -91,6 +91,12 @@ class ITensorAllocator
*/
virtual void free() = 0;

/** Return whether the tensor is currently allocated.
*
* @return true if the tensor is allocated, false otherwise.
*/
virtual bool is_allocated() const = 0;

protected:
/** Interface to be implemented by the child class to lock the memory allocation for the CPU to access.
*
Expand All @@ -106,4 +112,4 @@ class ITensorAllocator
size_t _alignment{}; /**< Tensor's alignment in bytes */
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_ITENSORALLOCATOR_H */
#endif // ACL_ARM_COMPUTE_RUNTIME_ITENSORALLOCATOR_H
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ class NEArithmeticSubtraction : public IFunction
* |QASYMM8 |QASYMM8 |QASYMM8 |
* |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
* |QSYMM16 |QSYMM16 |QASYMM16 |
* |QSYMM16 |QSYMM16 |S32 |
* |U8 |U8 |U8 |
* |S16 |S16 |S16 |
* |S32 |S32 |S32 |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,9 @@ class NEPixelWiseMultiplication : public IFunction
* |U8 |S16 |S16 |
* |S16 |U8 |S16 |
* |S16 |S16 |S16 |
* |S32 |S32 |S32 |
* |F16 |F16 |F16 |
* |F32 |S32 |F32 |
* |F32 |F32 |F32 |
*
* @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported.
* For all other scale values only round to zero (implemented as round towards minus infinity) is supported.
Expand Down
15 changes: 5 additions & 10 deletions arm_compute/runtime/NEON/functions/NEReverse.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@ class NEReverse : public INESimpleFunctionNoBorder
* - All
*
* Valid data type configurations:
* |src0 |src1 |dst |
* |:--------------|:--------------|:--------------|
* |All |U32, S32 |All |
* |src0 |src1 |dst |
* |:---------------------------|:--------------|:---------------------------|
* |All except SIZET <= 32-bits |U32, S32 |All except SIZET <= 32-bits |
*
* @param[in] input Input tensor. Data types supported: All
* @param[in] input Input tensor. Data types supported: All except SIZET <= 32-bit data types
* @param[out] output Output tensor. Data type supported: Same as @p input
* @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32/S32
* @param[in] use_inverted_axis Reverse ACL axis indices convention, if true, (inverted)axis = (tensor_rank - 1) - axis
Expand All @@ -60,12 +60,7 @@ class NEReverse : public INESimpleFunctionNoBorder
void configure(const ITensor *input, ITensor *output, const ITensor *axis, const bool use_inverted_axis = false);
/** Static function to check if given info will lead to a valid configuration of NEReverseKernel
*
* @param[in] input Input tensor info. Data types supported: All
* @param[in] output Output tensor info. Data type supported: Same as @p input
* @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32/S32
* @param[in] use_inverted_axis Reverse ACL axis indices convention, if true, (inverted)axis = (tensor_rank - 1) - axis
*
* @return a status
* Similar to @ref NEReverse::configure()
*/
static Status validate(const ITensorInfo *input,
const ITensorInfo *output,
Expand Down
Loading

0 comments on commit c61bd33

Please sign in to comment.