Skip to content

Commit

Permalink
Change partially the texture str_tex. The code IS NOT compiling yet w…
Browse files Browse the repository at this point in the history
…ith CUDA.

- We understand that str_tex = h_str, this was mediated by the variable gammaArray before.
- h_str is not accesible for str_tex. We don't know if include it as argument or in one header.
  • Loading branch information
charlyqchm committed May 16, 2024
1 parent e776251 commit 8da1460
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 37 deletions.
15 changes: 11 additions & 4 deletions g2g/analytic_integral/cuda/coulomb.cu
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,11 @@ void CoulombIntegral<scalar_type>::calc_gradient( double* qm_forces, bool cpu_fi
//
// The STR table for F(m,U) calculation is being accessed via texture fetches
//
cudaBindTextureToArray(str_tex,gammaArray);
// cudaBindTextureToArray(str_tex,gammaArray);


G2G::CudaMatrix<scalar_type> str_tex;
str_tex = h_str;

#define coulomb_forces_parameters \
os_int.term_type_counts[i], os_int.factor_ac_dev.data, os_int.nuc_dev.data, os_int.dens_values_dev.data+dens_offset, os_int.func_code_dev.data+offset,os_int.local_dens_dev.data+offset, \
Expand Down Expand Up @@ -303,7 +307,8 @@ void CoulombIntegral<scalar_type>::calc_gradient( double* qm_forces, bool cpu_fi
cudaStreamDestroy(stream[i]);
}

cudaUnbindTexture(str_tex);
// cudaUnbindTexture(str_tex);
str_tex.deallocate();

os_int.get_gradient_output(qm_forces, partial_out_size);

Expand Down Expand Up @@ -337,7 +342,9 @@ void CoulombIntegral<scalar_type>::fit_aux_density( void )
//
// The STR table for F(m,U) calculation is being accessed via texture fetches
//
cudaBindTextureToArray(str_tex,gammaArray);
// cudaBindTextureToArray(str_tex,gammaArray);
G2G::CudaMatrix<scalar_type> str_tex;
str_tex=h_str;

#define fit1_parameters \
os_int.term_type_counts[i], os_int.factor_ac_dev.data, os_int.nuc_dev.data, os_int.dens_values_dev.data+dens_offset, os_int.func_code_dev.data+offset,os_int.local_dens_dev.data+offset, \
Expand Down Expand Up @@ -473,7 +480,7 @@ void CoulombIntegral<scalar_type>::calc_fock( double& Es )
cudaStreamDestroy(stream[i]);
}

cudaUnbindTexture(str_tex);
// cudaUnbindTexture(str_tex);

/* The procedure os_int.get_fock_output will calculate the coulomb term for the fock matrix and the coulomb energy
contribution. As for the energy, closed shell goes through N/2 MO and then multiplies times two the results, the
Expand Down
7 changes: 3 additions & 4 deletions g2g/analytic_integral/cuda/gpu_vars/os_gpu_variables.h
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
#ifndef _OS_GPU_VARIABLES_H
#define _OS_GPU_VARIABLES_H

extern cudaArray* gammaArray;
// extern cudaArray* gammaArray;

extern __device__ __constant__ uint gpu_m;

#if !AINT_MP || FULL_DOUBLE
extern __device__ __constant__ double gpu_fac[17];
extern texture<int2, cudaTextureType2D, cudaReadModeElementType>
str_tex; // Texture for STR array (used in F(m,U))
// extern texture<int2, cudaTextureType2D, cudaReadModeElementType> str_tex; // Texture for STR array (used in F(m,U))
#else
extern __device__ __constant__ float gpu_fac[17];
extern texture<float, cudaTextureType2D, cudaReadModeElementType> str_tex;
// extern texture<float, cudaTextureType2D, cudaReadModeElementType> str_tex;
#endif

extern __device__ __constant__ uint TERM_TYPE_GAUSSIANS[6]; // How many
Expand Down
36 changes: 18 additions & 18 deletions g2g/analytic_integral/cuda/kernels/os_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@
//#define PI 3.141592653589793238462643383
//#define PI52 17.49341832762486284626282167987

#if FULL_DOUBLE || !AINT_MP
static __inline__ __device__ double os_fetch_double(texture<int2, 2> t, float x,
float y) {
int2 v = tex2D(t, x, y);
return __hiloint2double(v.y, v.x);
}
#define os_fetch(t, x, y) os_fetch_double(t, x, y)
#else
#define os_fetch(t, x, y) tex2D(t, x, y)
#endif
// #if FULL_DOUBLE || !AINT_MP
// static __inline__ __device__ double os_fetch_double(texture<int2, 2> t, float x,
// float y) {
// int2 v = tex2D(t, x, y);
// return __hiloint2double(v.y, v.x);
// }
// #define os_fetch(t, x, y) os_fetch_double(t, x, y)
// #else
// #define os_fetch(t, x, y) tex2D(t, x, y)
// #endif

//
// Calculates F(m,U) values for m = 0 to max_m (F(m,U) is used in the
Expand All @@ -37,6 +37,7 @@ template <class scalar_type, int m_max>
__device__ void lio_gamma(scalar_type* __restrict__ F_mU, scalar_type U) {
int it;
scalar_type ti, delt, delt2, delt3, delt4, delt5;
// uint mc=COALESCED_DIMENSION(m);

// Calculate small-U branch value of F(m,U)
// TODO: need to rethink how this branch (Taylor series expansion) is
Expand All @@ -56,12 +57,12 @@ __device__ void lio_gamma(scalar_type* __restrict__ F_mU, scalar_type U) {
delt5 = 0.20 * delt;

scalar_type tf0, tf1, tf2, tf3, tf4, tf5;
tf0 = os_fetch(str_tex, (float)it, 0.0); // qmmm_str[it];
tf1 = os_fetch(str_tex, (float)it, 1.0); // qmmm_str[it+880];
tf2 = os_fetch(str_tex, (float)it, 2.0); // qmmm_str[it+1760];
tf3 = os_fetch(str_tex, (float)it, 3.0); // qmmm_str[it+2640];
tf4 = os_fetch(str_tex, (float)it, 4.0); // qmmm_str[it+3520];
tf5 = os_fetch(str_tex, (float)it, 5.0); // qmmm_str[it+4400];
tf0 = str_tex[it]; // qmmm_str[it];
tf1 = str_tex[it+880]; // qmmm_str[it+880];
tf2 = str_tex[it+1760]; // qmmm_str[it+1760];
tf3 = str_tex[it+2640]; // qmmm_str[it+2640];
tf4 = str_tex[it+3520]; // qmmm_str[it+3520];
tf5 = str_tex[it+4400]; // qmmm_str[it+4400];

F_mU[0] =
tf0 -
Expand All @@ -73,8 +74,7 @@ __device__ void lio_gamma(scalar_type* __restrict__ F_mU, scalar_type U) {
tf2 = tf3;
tf3 = tf4;
tf4 = tf5;
tf5 = os_fetch(str_tex, (float)it,
(float)(m + 5.0)); // qmmm_str[it+(m+5)*880];
tf5 = str_tex[it+(m + 5.0)*880]; // qmmm_str[it+(m+5)*880];

F_mU[m] =
tf0 -
Expand Down
17 changes: 11 additions & 6 deletions g2g/analytic_integral/cuda/os_common.cu
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@ using std::endl;
namespace AINT
{

cudaArray* gammaArray;
// cudaArray* gammaArray;
__device__ __constant__ uint gpu_m;
#if !AINT_MP || FULL_DOUBLE
texture<int2, cudaTextureType2D, cudaReadModeElementType> str_tex; // Texture for STR array (used in F(m,U))
// texture<int2, cudaTextureType2D, cudaReadModeElementType> str_tex; // Texture for STR array (used in F(m,U))
__device__ __constant__ double gpu_fac[17];
#else
texture<float, cudaTextureType2D, cudaReadModeElementType> str_tex;
// texture<float, cudaTextureType2D, cudaReadModeElementType> str_tex;
__device__ __constant__ float gpu_fac[17];
#endif

Expand Down Expand Up @@ -85,11 +85,16 @@ void OSIntegral<scalar_type>::load_params(void)
h_fac(i) = integral_vars.fac(i);
}

G2G::CudaMatrix<scalar_type> str_tex;

str_tex.normalized = false;
str_tex.filterMode = cudaFilterModePoint;

cudaMallocArray(&gammaArray,&str_tex.channelDesc,880,22);
cudaMemcpyToArray(gammaArray,0,0,h_str.data,sizeof(scalar_type)*880*22,cudaMemcpyHostToDevice);
// cudaMallocArray(&gammaArray,&str_tex.channelDesc,880,22);
// cudaMemcpyToArray(gammaArray,0,0,h_str.data,sizeof(scalar_type)*880*22,cudaMemcpyHostToDevice);

str_tex = h_str;

cudaMemcpyToSymbol(gpu_fac,h_fac.data,h_fac.bytes(),0,cudaMemcpyHostToDevice);

cudaSetDevice(previous_device);
Expand Down Expand Up @@ -131,7 +136,7 @@ void OSIntegral<scalar_type>::deinit( void )
{
clear();

cudaFreeArray(gammaArray);
// cudaFreeArray(gammaArray);

cudaAssertNoError("OSIntegral::deinit");
}
Expand Down
13 changes: 8 additions & 5 deletions g2g/analytic_integral/cuda/qmmm.cu
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ void QMMMIntegral<scalar_type>::calc_fock( double& Es, bool do_cl, bool do_qm )
//
// The STR table for F(m,U) calculation is being accessed via texture fetches
//
cudaBindTextureToArray(str_tex,gammaArray);
str_tex=h_str;

#define qmmm_fock_parameters \
os_int.term_type_counts[i], os_int.factor_ac_dev.data, os_int.nuc_dev.data, os_int.func_code_dev.data+offset,os_int.local_dens_dev.data+offset, \
Expand Down Expand Up @@ -187,7 +187,8 @@ void QMMMIntegral<scalar_type>::calc_fock( double& Es, bool do_cl, bool do_qm )
cudaStreamDestroy(stream[i]);
}

cudaUnbindTexture(str_tex);
// cudaUnbindTexture(str_tex);
str_tex.deallocate();

os_int.get_fock_output(Es,integral_vars.rmm_1e_output);

Expand Down Expand Up @@ -215,11 +216,12 @@ void QMMMIntegral<scalar_type>::calc_gradient(double* qm_forces, double* mm_forc
//
// The STR table for F(m,U) calculation is being accessed via texture fetches
//
cudaBindTextureToArray(str_tex,gammaArray);
// cudaBindTextureToArray(str_tex,gammaArray);
str_tex = h_str;

#define qmmm_forces_parameters \
os_int.term_type_counts[i], os_int.factor_ac_dev.data, os_int.nuc_dev.data, os_int.dens_values_dev.data+dens_offset, os_int.func_code_dev.data+offset,os_int.local_dens_dev.data+offset, \
partial_mm_forces_dev.data+force_offset, os_int.partial_qm_forces_dev.data+force_offset, COALESCED_DIMENSION(partial_out_size),clatom_pos_dev.data,clatom_chg_dev.data
partial_mm_forces_dev.data+force_offset, os_int.partial_qm_forces_dev.data+force_offset, COALESCED_DIMENSION(partial_out_size),clatom_pos_dev.data,clatom_chg_dev.data,str_tex.data
// Each term type is calculated asynchronously
cudaStream_t stream[NUM_TERM_TYPES];
for (uint i = 0; i < NUM_TERM_TYPES; i++) {
Expand Down Expand Up @@ -275,7 +277,8 @@ void QMMMIntegral<scalar_type>::calc_gradient(double* qm_forces, double* mm_forc
cudaStreamDestroy(stream[i]);
}

cudaUnbindTexture(str_tex);
// cudaUnbindTexture(str_tex);
str_tex.deallocate();

os_int.get_gradient_output(qm_forces,partial_out_size);
if (integral_vars.clatoms > 0) get_gradient_output(mm_forces,partial_out_size);
Expand Down

0 comments on commit 8da1460

Please sign in to comment.