Skip to content

Commit

Permalink
add safeguarding against potential integer overflows in a few places,…
Browse files Browse the repository at this point in the history
… other minor corrections
  • Loading branch information
Edgar Solomonik committed Nov 24, 2018
1 parent 4bd532c commit 58e4811
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 48 deletions.
2 changes: 1 addition & 1 deletion examples/recursive_matmul.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ void recursive_matmul(int n,
MPI_Comm_rank(pcomm, &rank);
MPI_Comm_size(pcomm, &num_pes);

if (num_pes == 1){
if (num_pes == 1 || m == 1 || n == 1 || k==1){
C["ij"] += 1.0*A["ik"]*B["kj"];
} else {
for (div=2; num_pes%div!=0; div++){}
Expand Down
20 changes: 13 additions & 7 deletions scalapack_tests/svd.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -30,33 +30,39 @@ bool svd(Matrix<dtype> A,

bool pass_orthogonality = true;

double nrm;
E.norm2(nrm);
if (nrm > m*n*1.E-6){
double nrm1, nrm2, nrm3;
E.norm2(nrm1);
if (nrm1 > m*n*1.E-6){
pass_orthogonality = false;
}

E["ii"] = 1.;

E["ij"] -= VT["ik"]*conj<dtype>(VT)["jk"];

E.norm2(nrm);
if (nrm > m*n*1.E-6){
E.norm2(nrm2);
if (nrm2 > m*n*1.E-6){
pass_orthogonality = false;
}

A["ij"] -= U["ik"]*S["k"]*VT["kj"];

bool pass_residual = true;
A.norm2(nrm);
if (nrm > m*n*n*1.E-6){
A.norm2(nrm3);
if (nrm3 > m*n*n*1.E-6){
pass_residual = false;
}

#ifndef TEST_SUITE
if (dw.rank == 0){
printf("SVD orthogonality check returned %d, residual check %d\n", pass_orthogonality, pass_residual);
}
#else
if (!pass_residual || ! pass_orthogonality){
if (dw.rank == 0){
printf("SVD orthogonality check returned %d (%lf, %lf), residual check %d (%lf)\n", pass_orthogonality, nrm1, nrm2, pass_residual, nrm3);
}
}
#endif
return pass_residual & pass_orthogonality;
}
Expand Down
48 changes: 28 additions & 20 deletions src/contraction/contraction.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -2475,14 +2475,12 @@ namespace CTF_int {
#else
for (int t=global_comm.rank+1; t<(int)wrld->topovec.size()+8; t+=global_comm.np){
#endif
TAU_FSTART(evaluate_mappings_clear_and_init);
A->clear_mapping();
B->clear_mapping();
C->clear_mapping();
A->set_padding();
B->set_padding();
C->set_padding();
TAU_FSTOP(evaluate_mappings_clear_and_init);

topology * topo_i = NULL;
if (t < 8){
Expand Down Expand Up @@ -2511,9 +2509,7 @@ namespace CTF_int {
} else topo_i = wrld->topovec[t-8];
ASSERT(topo_i != NULL);

TAU_FSTART(map_ctr_to_topo);
ret = map_to_topology(topo_i, j);
TAU_FSTOP(map_ctr_to_topo);

if (ret == NEGATIVE){
//printf("map_to_topology returned negative\n");
Expand All @@ -2527,18 +2523,13 @@ namespace CTF_int {
B->topo = topo_i;
C->topo = topo_i;

TAU_FSTART(check_ctr_mapping);
if (check_mapping() == 0){
TAU_FSTOP(check_ctr_mapping);
continue;
}
TAU_FSTOP(check_ctr_mapping);
est_time = 0.0;
TAU_FSTART(evaluate_mappings_set_padding2);
A->set_padding();
B->set_padding();
C->set_padding();
TAU_FSTOP(evaluate_mappings_set_padding2);
#if DEBUG >= 3
if (global_comm.rank == 0){
printf("\nTest mappings:\n");
Expand All @@ -2565,8 +2556,14 @@ namespace CTF_int {
}
nnz_frac_C = std::min(1.,std::max(nnz_frac_C,nnz_frac_A*nnz_frac_B*len_ctr));
}
// check this early on to avoid 64-bit integer overflow
double size_memuse = A->size*nnz_frac_A*A->sr->el_size + B->size*nnz_frac_B*B->sr->el_size + C->size*nnz_frac_C*C->sr->el_size;
if (size_memuse >= (double)max_memuse){
if (global_comm.rank == 0)
DPRINTF(1,"Not enough memory available for topo %d with order %d to store tensors %ld/%ld\n", t,j,(int64_t)size_memuse,max_memuse);
continue;
}

TAU_FSTART(evaluate_mappings_folding);
#if FOLD_TSR
if (can_fold()){
est_time = est_time_fold();
Expand All @@ -2589,7 +2586,6 @@ namespace CTF_int {
est_time = sctr->est_time_rec(sctr->num_lyr);
}
}
TAU_FSTOP(evaluate_mappings_folding);
#if DEBUG >= 3
if (global_comm.rank == 0){
printf("mapping passed contr est_time = %E sec\n", est_time);
Expand All @@ -2600,7 +2596,6 @@ namespace CTF_int {
need_remap_A = 0;
need_remap_B = 0;
need_remap_C = 0;
TAU_FSTART(evaluate_mappings_comp_maps);
if (topo_i == old_topo_A){
for (d=0; d<A->order; d++){
if (!comp_dim_map(&A->edge_map[d],&old_map_A[d]))
Expand Down Expand Up @@ -2631,8 +2626,6 @@ namespace CTF_int {
}
} else
need_remap_C = 1;
TAU_FSTOP(evaluate_mappings_comp_maps);
TAU_FSTART(est_ctr_map_time);
if (need_remap_C) {
est_time += 2.*C->est_redist_time(*dC, nnz_frac_C);
memuse = std::max(1.0*memuse,2.*C->get_redist_mem(*dC, nnz_frac_C));
Expand All @@ -2646,20 +2639,16 @@ namespace CTF_int {
printf("total (with redistribution and transp) est_time = %E\n", est_time);
}
#endif
TAU_FSTOP(est_ctr_map_time);
ASSERT(est_time >= 0.0);

TAU_FSTART(get_avail_res);
if ((int64_t)memuse >= max_memuse){
if (global_comm.rank == 0)
DPRINTF(1,"Not enough memory available for topo %d with order %d memory %ld/%ld\n", t,j,memuse,max_memuse);
TAU_FSTOP(get_avail_res);
delete sctr;
continue;
}
if ((!A->is_sparse && A->size > INT_MAX) ||(!B->is_sparse && B->size > INT_MAX) || (!C->is_sparse && C->size > INT_MAX)){
DPRINTF(1,"MPI does not handle enough bits for topo %d with order\n", j);
TAU_FSTOP(get_avail_res);
delete sctr;
continue;
}
Expand All @@ -2670,7 +2659,6 @@ namespace CTF_int {
btopo = 6*t+j;
}
delete sctr;
TAU_FSTOP(get_avail_res);
}
}
TAU_FSTOP(evaluate_mappings)
Expand Down Expand Up @@ -3509,7 +3497,27 @@ namespace CTF_int {
}
#endif


if (blk_sz_A < vrt_sz_A){
printf("blk_sz_A = %ld, vrt_sz_A = %ld\n", blk_sz_A, vrt_sz_A);
printf("sizes are %ld %ld %ld\n",A->size,B->size,C->size);
A->print_map(stdout, 0);
B->print_map(stdout, 0);
C->print_map(stdout, 0);
}
if (blk_sz_B < vrt_sz_B){
printf("blk_sz_B = %ld, vrt_sz_B = %ld\n", blk_sz_B, vrt_sz_B);
printf("sizes are %ld %ld %ld\n",A->size,B->size,C->size);
A->print_map(stdout, 0);
B->print_map(stdout, 0);
C->print_map(stdout, 0);
}
if (blk_sz_C < vrt_sz_C){
printf("blk_sz_C = %ld, vrt_sz_C = %ld\n", blk_sz_C, vrt_sz_C);
printf("sizes are %ld %ld %ld\n",A->size,B->size,C->size);
A->print_map(stdout, 0);
B->print_map(stdout, 0);
C->print_map(stdout, 0);
}
ASSERT(blk_sz_A >= vrt_sz_A);
ASSERT(blk_sz_B >= vrt_sz_B);
ASSERT(blk_sz_C >= vrt_sz_C);
Expand Down
10 changes: 9 additions & 1 deletion src/contraction/ctr_2d_general.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ namespace CTF_int {
int & load_phase_C){
mapping * map;
int j;
int nstep = 1;
int64_t nstep = 1;
if (comp_dim_map(&C->edge_map[i_C], &B->edge_map[i_B])){
map = &B->edge_map[i_B];
while (map->has_child) map = map->child;
Expand Down Expand Up @@ -111,16 +111,24 @@ namespace CTF_int {
/virt_blk_len_C[j];
}
if (B->edge_map[i_B].type != PHYSICAL_MAP){
if (blk_sz_B / nstep == 0)
printf("blk_len_B[%d] = %d, nstep = %ld blk_sz_B = %ld\n",i_B,blk_len_B[i_B],nstep,blk_sz_B);
blk_sz_B = blk_sz_B / nstep;
blk_len_B[i_B] = blk_len_B[i_B] / nstep;
} else {
if (blk_sz_B * B->edge_map[i_B].np/ nstep == 0)
printf("blk_len_B[%d] = %d B->edge_map[%d].np = %d, nstep = %ld blk_sz_B = %ld\n",i_B,blk_len_B[i_B],i_B,B->edge_map[i_B].np,nstep,blk_sz_B);
blk_sz_B = blk_sz_B * B->edge_map[i_B].np / nstep;
blk_len_B[i_B] = blk_len_B[i_B] * B->edge_map[i_B].np / nstep;
}
if (C->edge_map[i_C].type != PHYSICAL_MAP){
if (blk_sz_C / nstep == 0)
printf("blk_len_C[%d] = %d, nstep = %ld blk_sz_C = %ld\n",i_C,blk_len_C[i_C],nstep,blk_sz_C);
blk_sz_C = blk_sz_C / nstep;
blk_len_C[i_C] = blk_len_C[i_C] / nstep;
} else {
if (blk_sz_C * C->edge_map[i_C].np/ nstep == 0)
printf("blk_len_C[%d] = %d C->edge_map[%d].np = %d, nstep = %ld blk_sz_C = %ld\n",i_C,blk_len_C[i_C],i_C,C->edge_map[i_C].np,nstep,blk_sz_C);
blk_sz_C = blk_sz_C * C->edge_map[i_C].np / nstep;
blk_len_C[i_C] = blk_len_C[i_C] * C->edge_map[i_C].np / nstep;
}
Expand Down
40 changes: 21 additions & 19 deletions src/interface/matrix.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,8 @@ namespace CTF {
if (lda == nrow/pc){
memcpy(this->data, (char*)data_, sizeof(dtype)*this->size);
} else {
for (int i=0; i<ncol/pc; i++){
memcpy(this->data+i*lda*sizeof(dtype),(char*)(data_+i*lda), nrow*sizeof(dtype)/pr);
for (int64_t i=0; i<ncol/pc; i++){
memcpy(this->data+i*lda*sizeof(dtype),(char*)(data_+i*lda), ((int64_t)nrow)*sizeof(dtype)/pr);
}
}
} else {
Expand Down Expand Up @@ -258,7 +258,7 @@ namespace CTF {
if (lda == nrow/pc){
memcpy((char*)data_, this->data, sizeof(dtype)*this->size);
} else {
for (int i=0; i<ncol/pc; i++){
for (int64_t i=0; i<ncol/pc; i++){
memcpy((char*)(data_+i*lda), this->data+i*lda*sizeof(dtype), nrow*sizeof(dtype)/pr);
}
}
Expand Down Expand Up @@ -437,11 +437,11 @@ namespace CTF {

this->read_mat(desca, A);

dtype * tau = (dtype*)malloc(n*sizeof(dtype));
dtype * tau = (dtype*)malloc(((int64_t)n)*sizeof(dtype));
dtype dlwork;
CTF_SCALAPACK::pgeqrf<dtype>(m,n,A,1,1,desca,tau,(dtype*)&dlwork,-1,&info);
int lwork = get_int_fromreal<dtype>(dlwork);
dtype * work = (dtype*)malloc(lwork*sizeof(dtype));
dtype * work = (dtype*)malloc(((int64_t)lwork)*sizeof(dtype));
CTF_SCALAPACK::pgeqrf<dtype>(m,n,A,1,1,desca,tau,work,lwork,&info);


Expand All @@ -453,14 +453,14 @@ namespace CTF {
R = Matrix<dtype>(Q);
else {
R = Matrix<dtype>(desca,dQ,*this->wrld,*this->sr);
R = R.slice(0,m*(n-1)+n-1);
R = R.slice(0,((int64_t)m)*(n-1)+n-1);
}


free(work);
CTF_SCALAPACK::porgqr<dtype>(m,n,n,dQ,1,1,desca,tau,(dtype*)&dlwork,-1,&info);
lwork = get_int_fromreal<dtype>(dlwork);
work = (dtype*)malloc(lwork*sizeof(dtype));
work = (dtype*)malloc(((int64_t)lwork)*sizeof(dtype));
CTF_SCALAPACK::porgqr<dtype>(m,n,n,dQ,1,1,desca,tau,work,lwork,&info);
Q = Matrix<dtype>(desca, dQ, (*(this->wrld)));
free(work);
Expand Down Expand Up @@ -502,22 +502,24 @@ namespace CTF {
int64_t kpr = k/pr + (k % pr != 0);
int64_t kpc = k/pc + (k % pc != 0);
int64_t npc = n/pc + (n % pc != 0);

CTF_SCALAPACK::cdescinit(descu, m, k, 1, 1, 0, 0, ictxt, mpr, &info);
CTF_SCALAPACK::cdescinit(descvt, k, n, 1, 1, 0, 0, ictxt, kpr, &info);
dtype * A = (dtype*)malloc(this->size*sizeof(dtype));

dtype * A = (dtype*)CTF_int::alloc(this->size*sizeof(dtype));


dtype * u = (dtype*)new dtype[mpr*kpc];
dtype * s = (dtype*)new dtype[k];
dtype * vt = (dtype*)new dtype[kpr*npc];
dtype * u = (dtype*)CTF_int::alloc(sizeof(dtype)*mpr*kpc);
dtype * s = (dtype*)CTF_int::alloc(sizeof(dtype)*k);
dtype * vt = (dtype*)CTF_int::alloc(sizeof(dtype)*kpr*npc);
this->read_mat(desca, A);

int lwork;
dtype dlwork;
CTF_SCALAPACK::pgesvd<dtype>('V', 'V', m, n, NULL, 1, 1, desca, NULL, NULL, 1, 1, descu, vt, 1, 1, descvt, &dlwork, -1, &info);

lwork = get_int_fromreal<dtype>(dlwork);
dtype * work = (dtype*)malloc(sizeof(dtype)*lwork);
dtype * work = (dtype*)CTF_int::alloc(sizeof(dtype)*((int64_t)lwork));

CTF_SCALAPACK::pgesvd<dtype>('V', 'V', m, n, A, 1, 1, desca, s, u, 1, 1, descu, vt, 1, 1, descvt, work, lwork, &info);

Expand All @@ -537,18 +539,18 @@ namespace CTF {
}
if (rank > 0 && rank < k) {
S = S.slice(0, rank-1);
U = U.slice(0, rank*(m)-1);
VT = VT.slice(0, k*n-(k-rank+1));
U = U.slice(0, rank*((int64_t)m)-1);
VT = VT.slice(0, k*((int64_t)n)-(k-rank+1));
}

free(A);
delete [] u;
delete [] s;
delete [] vt;
CTF_int::cdealloc(A);
CTF_int::cdealloc(u);
CTF_int::cdealloc(s);
CTF_int::cdealloc(vt);
free(desca);
free(descu);
free(descvt);
free(work);
CTF_int::cdealloc(work);

}

Expand Down

0 comments on commit 58e4811

Please sign in to comment.