diff --git a/CMakeLists.txt b/CMakeLists.txt index 9d1d0a0f..29802cd1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,10 +11,13 @@ IF (NOT WIN32) SET(CMAKE_C_FLAGS "-std=c99 -Werror=implicit-function-declaration ${CMAKE_C_FLAGS}") ENDIF (NOT WIN32) IF(CUDA_HAS_FP16 OR NOT ${CUDA_VERSION} LESS 7.5) - ADD_DEFINITIONS(-DTH_GENERIC_USE_HALF=1) ADD_DEFINITIONS(-DCUDA_HAS_FP16=1) ENDIF() +IF (ANDROID) + ADD_DEFINITIONS(-DTHC_MIN_MATH) +ENDIF() + INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS}) ADD_SUBDIRECTORY(lib) diff --git a/TensorMath.lua b/TensorMath.lua index 936d897e..d1f63745 100644 --- a/TensorMath.lua +++ b/TensorMath.lua @@ -551,6 +551,7 @@ local handledTypeaccreals = { for k, Tensor_ in pairs(handledTypenames) do Tensor = Tensor_ + if Tensor == 'CudaHalfTensor' then interface:print("#ifdef CUDA_HALF_TENSOR") end @@ -592,6 +593,8 @@ for k, Tensor_ in pairs(handledTypenames) do cname("zero"), {{name=Tensor, returned=true}}) + interface:print("#ifndef THC_MIN_MATH") + wrap("zeros", cname("zeros"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, @@ -1335,6 +1338,8 @@ void cutorch_%sMath_init(lua_State *L) } ]], Tensor, Tensor, Tensor, Tensor)) + interface:print("#endif") + if Tensor == 'CudaHalfTensor' then interface:print("#endif") end diff --git a/generic/TensorOperator.c b/generic/TensorOperator.c index ddd50a40..4990178c 100644 --- a/generic/TensorOperator.c +++ b/generic/TensorOperator.c @@ -2,6 +2,7 @@ #define THC_GENERIC_FILE "generic/TensorOperator.c" #else +#ifndef THC_GENERIC_NO_MATH static int cutorch_TensorOperator_(__add__)(lua_State *L) { THCTensor *tensor1 = luaT_toudata(L, 1, torch_Tensor); @@ -242,13 +243,16 @@ static int cutorch_TensorOperator_(__div__)(lua_State *L) return 1; } +#endif static const struct luaL_Reg cutorch_TensorOperator_(_) [] = { +#ifndef THC_GENERIC_NO_MATH {"__add__", cutorch_TensorOperator_(__add__)}, {"__sub__", cutorch_TensorOperator_(__sub__)}, {"__unm__", cutorch_TensorOperator_(__unm__)}, {"__mul__", cutorch_TensorOperator_(__mul__)}, {"__div__", cutorch_TensorOperator_(__div__)}, +#endif {NULL, NULL} }; diff --git a/init.c b/init.c index 894be2ea..617146c1 100644 --- a/init.c +++ b/init.c @@ -1056,26 +1056,30 @@ int luaopen_libcutorch(lua_State *L) cutorch_HalfTensorCopy_init(L); #endif + cutorch_CudaTensorOperator_init(L); +#ifndef THC_MIN_MATH cutorch_CudaByteTensorOperator_init(L); cutorch_CudaCharTensorOperator_init(L); cutorch_CudaShortTensorOperator_init(L); cutorch_CudaIntTensorOperator_init(L); cutorch_CudaLongTensorOperator_init(L); - cutorch_CudaTensorOperator_init(L); cutorch_CudaDoubleTensorOperator_init(L); #ifdef CUDA_HALF_TENSOR cutorch_CudaHalfTensorOperator_init(L); +#endif #endif + cutorch_CudaTensorMath_init(L); +#ifndef THC_MIN_MATH cutorch_CudaByteTensorMath_init(L); cutorch_CudaCharTensorMath_init(L); cutorch_CudaShortTensorMath_init(L); cutorch_CudaIntTensorMath_init(L); cutorch_CudaLongTensorMath_init(L); - cutorch_CudaTensorMath_init(L); cutorch_CudaDoubleTensorMath_init(L); #ifdef CUDA_HALF_TENSOR cutorch_CudaHalfTensorMath_init(L); +#endif #endif cutorch_Event_init(L); @@ -1091,6 +1095,13 @@ int luaopen_libcutorch(lua_State *L) #endif lua_setfield(L, -2, "hasHalf"); +#ifdef THC_MIN_MATH + lua_pushboolean(L, 1); +#else + lua_pushboolean(L, 0); +#endif + lua_setfield(L, -2, "minMath"); + /* store gpu driver version in field */ int driverVersion; THCudaCheck(cudaDriverGetVersion(&driverVersion)); diff --git a/lib/THC/CMakeLists.txt b/lib/THC/CMakeLists.txt index a18451af..eeeaafc2 100644 --- a/lib/THC/CMakeLists.txt +++ b/lib/THC/CMakeLists.txt @@ -75,6 +75,10 @@ IF ($ENV{TH_BINARY_BUILD}) SET(CMAKE_CXX_FLAGS "-static-libstdc++ ${CMAKE_CXX_FLAGS}") ENDIF() +# IF (ARM) + ADD_DEFINITIONS(-DTHC_MIN_MATH) +# ENDIF() + IF(APPLE) IF(${CUDA_VERSION} LESS 6.0) # work around for mac os x bug: @@ -206,6 +210,7 @@ IF(USE_MAGMA) TARGET_LINK_LIBRARIES(THC ${MAGMA_LIBRARIES} ${CUDA_cusparse_LIBRARY}) ENDIF(USE_MAGMA) +IF(NOT ANDROID) IF(NOT THC_SO_VERSION) SET(THC_SO_VERSION 0) ENDIF(NOT THC_SO_VERSION) @@ -213,7 +218,7 @@ MESSAGE(STATUS "THC_SO_VERSION: ${THC_SO_VERSION}") SET_TARGET_PROPERTIES(THC PROPERTIES VERSION ${THC_SO_VERSION} SOVERSION ${THC_SO_VERSION}) - +ENDIF() INSTALL(TARGETS THC RUNTIME DESTINATION "${THC_INSTALL_BIN_SUBDIR}" diff --git a/lib/THC/THCGenerateByteType.h b/lib/THC/THCGenerateByteType.h index c1cb415f..a7278a26 100644 --- a/lib/THC/THCGenerateByteType.h +++ b/lib/THC/THCGenerateByteType.h @@ -7,6 +7,9 @@ #define Real Byte #define CReal CudaByte #define THC_REAL_IS_BYTE +# ifdef THC_MIN_MATH +# define THC_GENERIC_NO_MATH 1 +# endif #line 1 THC_GENERIC_FILE #include THC_GENERIC_FILE #undef real @@ -14,6 +17,7 @@ #undef Real #undef CReal #undef THC_REAL_IS_BYTE +#undef THC_GENERIC_NO_MATH #ifndef THCGenerateAllTypes #undef THC_GENERIC_FILE diff --git a/lib/THC/THCGenerateCharType.h b/lib/THC/THCGenerateCharType.h index f16a3cac..31e5d83d 100644 --- a/lib/THC/THCGenerateCharType.h +++ b/lib/THC/THCGenerateCharType.h @@ -7,6 +7,9 @@ #define Real Char #define CReal CudaChar #define THC_REAL_IS_CHAR +# ifdef THC_MIN_MATH +# define THC_GENERIC_NO_MATH 1 +# endif #line 1 THC_GENERIC_FILE #include THC_GENERIC_FILE #undef real @@ -14,7 +17,7 @@ #undef Real #undef CReal #undef THC_REAL_IS_CHAR - +#undef THC_GENERIC_NO_MATH #ifndef THCGenerateAllTypes #undef THC_GENERIC_FILE #endif diff --git a/lib/THC/THCGenerateDoubleType.h b/lib/THC/THCGenerateDoubleType.h index fdf6a8eb..9d1af647 100644 --- a/lib/THC/THCGenerateDoubleType.h +++ b/lib/THC/THCGenerateDoubleType.h @@ -7,6 +7,9 @@ #define Real Double #define CReal CudaDouble #define THC_REAL_IS_DOUBLE +# ifdef THC_MIN_MATH +# define THC_GENERIC_NO_MATH 1 +# endif #line 1 THC_GENERIC_FILE #include THC_GENERIC_FILE #undef real @@ -14,7 +17,7 @@ #undef Real #undef CReal #undef THC_REAL_IS_DOUBLE - +#undef THC_GENERIC_NO_MATH #ifndef THCGenerateAllTypes #ifndef THCGenerateFloatTypes #undef THC_GENERIC_FILE diff --git a/lib/THC/THCGenerateHalfType.h b/lib/THC/THCGenerateHalfType.h index 77d4c0ad..a982b98f 100644 --- a/lib/THC/THCGenerateHalfType.h +++ b/lib/THC/THCGenerateHalfType.h @@ -15,14 +15,16 @@ #ifdef CUDA_HALF_TENSOR #define CReal CudaHalf #endif - +# if defined THC_MIN_MATH +# define THC_GENERIC_NO_MATH 1 +# endif #define THC_REAL_IS_HALF #line 1 THC_GENERIC_FILE #include THC_GENERIC_FILE #undef real #undef accreal #undef Real - +#undef THC_GENERIC_NO_MATH #ifdef CUDA_HALF_TENSOR #undef CReal #endif diff --git a/lib/THC/THCGenerateIntType.h b/lib/THC/THCGenerateIntType.h index 41ca248a..77adcf6c 100644 --- a/lib/THC/THCGenerateIntType.h +++ b/lib/THC/THCGenerateIntType.h @@ -7,6 +7,9 @@ #define Real Int #define CReal CudaInt #define THC_REAL_IS_INT +# ifdef THC_MIN_MATH +# define THC_GENERIC_NO_MATH 1 +# endif #line 1 THC_GENERIC_FILE #include THC_GENERIC_FILE #undef real @@ -14,7 +17,7 @@ #undef Real #undef CReal #undef THC_REAL_IS_INT - +#undef THC_GENERIC_NO_MATH #ifndef THCGenerateAllTypes #undef THC_GENERIC_FILE #endif diff --git a/lib/THC/THCGenerateShortType.h b/lib/THC/THCGenerateShortType.h index ae85f8c8..982d2ce5 100644 --- a/lib/THC/THCGenerateShortType.h +++ b/lib/THC/THCGenerateShortType.h @@ -7,6 +7,9 @@ #define Real Short #define CReal CudaShort #define THC_REAL_IS_SHORT +# ifdef THC_MIN_MATH +# define THC_GENERIC_NO_MATH 1 +# endif #line 1 THC_GENERIC_FILE #include THC_GENERIC_FILE #undef real @@ -14,7 +17,7 @@ #undef Real #undef CReal #undef THC_REAL_IS_SHORT - +#undef THC_GENERIC_NO_MATH #ifndef THCGenerateAllTypes #undef THC_GENERIC_FILE #endif diff --git a/lib/THC/THCHalf.h b/lib/THC/THCHalf.h index 7c055e7a..af7f63fd 100644 --- a/lib/THC/THCHalf.h +++ b/lib/THC/THCHalf.h @@ -3,10 +3,12 @@ #include "THCGeneral.h" +#ifndef THC_MIN_MATH /* We compile with CudaHalfTensor support if we have this: */ #if CUDA_VERSION >= 7050 || CUDA_HAS_FP16 #define CUDA_HALF_TENSOR 1 #endif +#endif #ifdef CUDA_HALF_TENSOR diff --git a/lib/THC/generic/THCTensorMath.cu b/lib/THC/generic/THCTensorMath.cu index 46746f72..b73488fe 100644 --- a/lib/THC/generic/THCTensorMath.cu +++ b/lib/THC/generic/THCTensorMath.cu @@ -390,5 +390,4 @@ accreal THCTensor_(trace)(THCState *state, THCTensor *src_) { THCTensor_(free)(state, diag); return trace; } - #endif diff --git a/lib/THC/generic/THCTensorMathBlas.cu b/lib/THC/generic/THCTensorMathBlas.cu index 63c9989f..837b4276 100644 --- a/lib/THC/generic/THCTensorMathBlas.cu +++ b/lib/THC/generic/THCTensorMathBlas.cu @@ -1,7 +1,7 @@ #ifndef THC_GENERIC_FILE #define THC_GENERIC_FILE "generic/THCTensorMathBlas.cu" #else - +# ifndef THC_GENERIC_NO_MATH THC_API accreal THCTensor_(dot)(THCState *state, THCTensor *self, THCTensor *src) { @@ -596,5 +596,6 @@ THCTensor_(baddbmm)(THCState *state, THCTensor *result, real beta, THCTensor *t, THError("unimplemented data type"); #endif } +# endif /* THC_GENERIC_NO_MATH */ #endif diff --git a/lib/THC/generic/THCTensorMathCompareT.cu b/lib/THC/generic/THCTensorMathCompareT.cu index 4b59abf1..7085681f 100644 --- a/lib/THC/generic/THCTensorMathCompareT.cu +++ b/lib/THC/generic/THCTensorMathCompareT.cu @@ -1,7 +1,7 @@ #ifndef THC_GENERIC_FILE #define THC_GENERIC_FILE "generic/THCTensorMathCompareT.cu" #else - +# ifndef THC_GENERIC_NO_MATH THC_API void THCTensor_(ltTensor)(THCState *state, THCudaByteTensor *self_, THCTensor *src1, THCTensor *src2) { @@ -109,5 +109,5 @@ THCTensor_(neTensorT)(THCState *state, THCTensor *self_, THCTensor *src1, THCTen TensorNEOp::DataType, typename TensorUtils::DataType>()); } - +# endif /* THC_GENERIC_NO_MATH */ #endif diff --git a/lib/THC/generic/THCTensorMathMagma.cu b/lib/THC/generic/THCTensorMathMagma.cu index 635834dd..84ba8d1b 100644 --- a/lib/THC/generic/THCTensorMathMagma.cu +++ b/lib/THC/generic/THCTensorMathMagma.cu @@ -1,7 +1,7 @@ #ifndef THC_GENERIC_FILE #define THC_GENERIC_FILE "generic/THCTensorMathMagma.cu" #else - +# ifndef THC_GENERIC_NO_MATH #if defined(THC_REAL_IS_FLOAT) || defined(THC_REAL_IS_DOUBLE) #ifdef USE_MAGMA @@ -646,5 +646,5 @@ THC_API void THCTensor_(qr)(THCState *state, THCTensor *rq_, THCTensor *rr_, THC } #endif - +# endif /* THC_GENERIC_NO_MATH */ #endif diff --git a/lib/THC/generic/THCTensorMathPairwise.cu b/lib/THC/generic/THCTensorMathPairwise.cu index 0b4094b8..8986f36c 100644 --- a/lib/THC/generic/THCTensorMathPairwise.cu +++ b/lib/THC/generic/THCTensorMathPairwise.cu @@ -1,7 +1,7 @@ #ifndef THC_GENERIC_FILE #define THC_GENERIC_FILE "generic/THCTensorMathPairwise.cu" #else - +# ifndef THC_GENERIC_NO_MATH THC_API void THCTensor_(add)(THCState *state, THCTensor *self_, THCTensor *src_, real value) { @@ -209,5 +209,5 @@ THC_API int THCTensor_(equal)(THCState *state, THCTensor *self_, THCTensor *src_ return min != 0; } - +# endif /* THC_GENERIC_NO_MATH */ #endif diff --git a/lib/THC/generic/THCTensorMathPointwise.cu b/lib/THC/generic/THCTensorMathPointwise.cu index b97908a2..5dfa0275 100644 --- a/lib/THC/generic/THCTensorMathPointwise.cu +++ b/lib/THC/generic/THCTensorMathPointwise.cu @@ -1,7 +1,7 @@ #ifndef THC_GENERIC_FILE #define THC_GENERIC_FILE "generic/THCTensorMathPointwise.cu" #else - +# ifndef THC_GENERIC_NO_MATH #define IMPLEMENT_CUDA_TENSOR_BASIC_FUNC_(NAME, CFUNC, REAL) \ struct Tensor_##NAME##_##REAL##_Op { \ __device__ __forceinline__ void operator()(real* out, real* in) const { \ @@ -518,5 +518,5 @@ THCTensor_(addcdiv)(THCState *state, THCTensor *self_, THCTensor *t, real value, THCudaCheck(cudaGetLastError()); } - +# endif /* THC_GENERIC_NO_MATH */ #endif diff --git a/lib/THC/generic/THCTensorMathReduce.cu b/lib/THC/generic/THCTensorMathReduce.cu index ed0e2049..36e7940e 100644 --- a/lib/THC/generic/THCTensorMathReduce.cu +++ b/lib/THC/generic/THCTensorMathReduce.cu @@ -16,6 +16,41 @@ THCTensor_(sum)(THCState* state, THCTensor *self, THCTensor *src, long dimension THCudaCheck(cudaGetLastError()); } +THC_API accreal +THCTensor_(sumall)(THCState *state, THCTensor *self) { + THAssert(THCTensor_(checkGPU)(state, 1, self)); + accreal val; + if (!THC_reduceAll(state, self, + thrust::identity(), + ReduceAdd(), + ReduceAdd(), + ScalarConvert::to(0), + &val, 0)) { + THArgCheck(false, 1, CUTORCH_DIM_WARNING); + } + + THCudaCheck(cudaGetLastError()); + return val; +} + +THC_API real +THCTensor_(minall)(THCState *state, THCTensor *self) { + THAssert(THCTensor_(checkGPU)(state, 1, self)); + real val; + if (!THC_reduceAll(state, self, + thrust::identity(), + ReduceMin(), + ReduceMin(), + THCNumerics::max(), &val, 0)) { + THArgCheck(false, 1, CUTORCH_DIM_WARNING); + } + + THCudaCheck(cudaGetLastError()); + return val; +} + +# ifndef THC_GENERIC_NO_MATH + THC_API void THCTensor_(prod)(THCState* state, THCTensor *self, THCTensor *src, long dimension) { THAssert(THCTensor_(checkGPU)(state, 2, self, src)); @@ -246,22 +281,6 @@ accreal THCTensor_(dist)(THCState *state, THCTensor *self, #endif -THC_API accreal -THCTensor_(sumall)(THCState *state, THCTensor *self) { - THAssert(THCTensor_(checkGPU)(state, 1, self)); - accreal val; - if (!THC_reduceAll(state, self, - thrust::identity(), - ReduceAdd(), - ReduceAdd(), - ScalarConvert::to(0), - &val, 0)) { - THArgCheck(false, 1, CUTORCH_DIM_WARNING); - } - - THCudaCheck(cudaGetLastError()); - return val; -} THC_API accreal THCTensor_(prodall)(THCState *state, THCTensor *self) { @@ -293,22 +312,6 @@ THCTensor_(meanall)(THCState *state, THCTensor *self) return THCTensor_(sumall)(state, self)/THCTensor_(nElement)(state, self); } -THC_API real -THCTensor_(minall)(THCState *state, THCTensor *self) { - THAssert(THCTensor_(checkGPU)(state, 1, self)); - real val; - if (!THC_reduceAll(state, self, - thrust::identity(), - ReduceMin(), - ReduceMin(), - THCNumerics::max(), &val, 0)) { - THArgCheck(false, 1, CUTORCH_DIM_WARNING); - } - - THCudaCheck(cudaGetLastError()); - return val; -} - THC_API real THCTensor_(maxall)(THCState *state, THCTensor *self) { THAssert(THCTensor_(checkGPU)(state, 1, self)); @@ -360,5 +363,5 @@ THCTensor_(min)(THCState *state, state, values, indices, src, dimension, init, MinValuePair::DataType, long>()); } - +# endif /* THC_GENERIC_NO_MATH */ #endif diff --git a/lib/THC/generic/THCTensorMathScan.cu b/lib/THC/generic/THCTensorMathScan.cu index 8a8e4345..29cdc36d 100644 --- a/lib/THC/generic/THCTensorMathScan.cu +++ b/lib/THC/generic/THCTensorMathScan.cu @@ -1,7 +1,7 @@ #ifndef THC_GENERIC_FILE #define THC_GENERIC_FILE "generic/THCTensorMathScan.cu" #else - +# ifndef THC_GENERIC_NO_MATH template __host__ void THCTensor_(scanOuterDim)(THCState *state, THCTensor *tgt, THCTensor *src, long dimension, @@ -85,5 +85,5 @@ void THCTensor_(cumprod)(THCState *state, THCTensor *self, THCTensor *src, long return THCTensor_(scanDim)(state, self, src, dimension, ScalarConvert::to(1.0), MulOp()); } - +# endif /* THC_GENERIC_NO_MATH */ #endif diff --git a/lib/THC/generic/THCTensorRandom.cu b/lib/THC/generic/THCTensorRandom.cu index f6d69799..202ae57f 100644 --- a/lib/THC/generic/THCTensorRandom.cu +++ b/lib/THC/generic/THCTensorRandom.cu @@ -1,7 +1,7 @@ #ifndef THC_GENERIC_FILE #define THC_GENERIC_FILE "generic/THCTensorRandom.cu" #else - +# ifndef THC_GENERIC_NO_MATH #define NUM_BLOCKS min((int)THCCeilDiv(size, (ptrdiff_t) BLOCK_SIZE), MAX_NUM_BLOCKS) #if defined(THC_REAL_IS_FLOAT) || defined(THC_REAL_IS_DOUBLE) || defined(THC_REAL_IS_HALF) @@ -347,5 +347,5 @@ THC_API void THCTensor_(geometric)(THCState* state, THCTensor *self_, double p) THCTensor_(freeCopyTo)(state, self, self_); }; #undef NUM_BLOCKS - +# endif /* THC_GENERIC_NO_MATH */ #endif diff --git a/lib/THC/generic/THCTensorScatterGather.cu b/lib/THC/generic/THCTensorScatterGather.cu index c120f88a..d1830c72 100644 --- a/lib/THC/generic/THCTensorScatterGather.cu +++ b/lib/THC/generic/THCTensorScatterGather.cu @@ -1,7 +1,7 @@ #ifndef THC_GENERIC_FILE #define THC_GENERIC_FILE "generic/THCTensorScatterGather.cu" #else - +# ifndef THC_GENERIC_NO_MATH #define RUN(TYPE, DIMS, REAL) \ THCudaTensor_gatherKernel \ <<>>( \ @@ -262,5 +262,5 @@ THCTensor_(scatterFill)(THCState* state, THCTensor *tensor, } #undef RUN - +# endif /* THC_GENERIC_NO_MATH */ #endif diff --git a/lib/THC/generic/THCTensorSort.cu b/lib/THC/generic/THCTensorSort.cu index afef796e..052a021e 100644 --- a/lib/THC/generic/THCTensorSort.cu +++ b/lib/THC/generic/THCTensorSort.cu @@ -1,7 +1,7 @@ #ifndef THC_GENERIC_FILE #define THC_GENERIC_FILE "generic/THCTensorSort.cu" #else - +# ifndef THC_GENERIC_NO_MATH // In alignment with default sort on a c++ map, this function // will permute key and value tensors identically, and // in such a way that the 'key' tensor is ordered numerically @@ -332,5 +332,5 @@ THC_API void THCTensor_(sort)(THCState* state, THCudaCheck(cudaGetLastError()); } - +# endif /* THC_GENERIC_NO_MATH */ #endif