From 29aef2239ada38a9e1ffa08f3c6d86f12fb74731 Mon Sep 17 00:00:00 2001 From: CYChenFudan Date: Tue, 2 Jun 2020 17:47:05 +0800 Subject: [PATCH 1/9] Some first modifications: 00. src/tool: Makefile, add "chmod a+x $(GRASP)/bin/rsave" 01. jj2lsj90: In jj2lsj_code.f90, lines 220 and 222 are changed to deal with both parities present. 03. rcsfzerofirst90: In RCSFzerofirst.f90, the number of CSFs in zero-order space are output. The informations could be used conveniently in RCSF and RCI calculations. 04. rmcdhf90: In newco.f90, weighted average energy is output to monitor the energy convergence. 05. rangular90_mpi: getinf.90 modified for iccut=1 06. rci90_mpi: setham_gg.f90, only myid=0 reports how far the calculation has proceeded. 07. libdvd90, dvdson.f90: make lines 1119 and 1120 work for convergence monitor. The output messages are very useful for extremely large-scaled calculations. 08. libdvd90, dvdson.f90: function TSTSEL is modified, so that dvdson performs at least TWO iteration calculations. Or, error results could be obtained in some cases (in O-like isoelectronic sequence?). 09. coming dvdson--mpi --- Changes_cychen.txt | 16 ++++++++++++++++ make_environment_gfortran | 9 ++++++++- src/appl/rangular90_mpi/getinf.f90 | 5 +++-- src/appl/rci90_mpi/setham_gg.f90 | 9 ++++++--- src/appl/rcsfzerofirst90/RCSFzerofirst.f90 | 4 ++++ src/appl/rmcdhf90/newco.f90 | 2 ++ src/lib/libdvd90/dvdson.f90 | 17 ++++++++++------- src/lib/libdvd90/tstsel_I.f90 | 3 ++- src/tool/Makefile | 1 + 9 files changed, 52 insertions(+), 14 deletions(-) create mode 100644 Changes_cychen.txt diff --git a/Changes_cychen.txt b/Changes_cychen.txt new file mode 100644 index 000000000..d4519a553 --- /dev/null +++ b/Changes_cychen.txt @@ -0,0 +1,16 @@ +00. src/tool: Makefile, add "chmod a+x $(GRASP)/bin/rsave" +01. jj2lsj90: In jj2lsj_code.f90, lines 220 and 222 are changed to deal with both parities present. +#02. jj2lsj90: In jj2lsj_code.f90, lines around 850, and lines around 2435, +#modifications for transformation of configurations with empty K-shell, such as +#2s2p, 2lnl, as done in A&A 592, A141(2016). +03. rcsfzerofirst90: In RCSFzerofirst.f90, the number of CSFs in zero-order space +are output. The informations could be used conveniently in RCSF and RCI calculations. +04. rmcdhf90: In newco.f90, weighted average energy is output to monitor the energy convergence. +05. rangular90_mpi: getinf.90 modified for iccut=1 +06. rci90_mpi: setham_gg.f90, only myid=0 reports how far the calculation has proceeded. +07. libdvd90, dvdson.f90: make lines 1119 and 1120 work for convergence +monitor. The output messages are very useful for extremely large-scaled calculations. +08. libdvd90, dvdson.f90: function TSTSEL is modified, so that dvdson performs at least TWO +iteration calculations. Or, error results could be obtained in some cases (in +O-like isoelectronic sequence?). +09. dvdson--mpi diff --git a/make_environment_gfortran b/make_environment_gfortran index 16caba378..dd34e64a3 100755 --- a/make_environment_gfortran +++ b/make_environment_gfortran @@ -9,13 +9,20 @@ # # Installation requirements: # - Lapack, Blas and MPI libraries have to be installed and properly linked - e.g. add them to LD_LIBRARY_PATH. +mkdir lib bin > /dev/null 2>&1 +echo "cp ../BLAS-3.8.0/blas_LINUX.a lib/libblas.a" +cp ../BLAS-3.8.0/blas_LINUX.a lib/libblas.a +echo "cp ../lapack-3.8.0/liblapack.a lib/" +cp ../lapack-3.8.0/liblapack.a lib/ + # - The Fortran compiler of choice and the MPI wrapper (as specified by FC and FC_MPI below) have to be on your PATH. # # ------------------------------------------------------------------------------------------------------------------- # Set up main flags # ------------------------------------------------------------------------------------------------------------------- export FC=gfortran # Fortran compiler -export FC_FLAGS="-O2 -fno-automatic " # Serial code compiler flags +#export FC_FLAGS="-O2 -fno-automatic" # Serial code compiler flags +export FC_FLAGS="-O2 -fno-automatic -fconvert=big-endian" # If little-endian used, comment out this line, use the above one export FC_LD=" " # Serial linker flags export GRASP="${PWD}" # Location of the 2018 root directory export LAPACK_LIBS="-llapack -lblas" # Lapack libraries diff --git a/src/appl/rangular90_mpi/getinf.f90 b/src/appl/rangular90_mpi/getinf.f90 index 18ec260eb..3a3b0285f 100644 --- a/src/appl/rangular90_mpi/getinf.f90 +++ b/src/appl/rangular90_mpi/getinf.f90 @@ -60,8 +60,9 @@ SUBROUTINE GETINF do i = 1,nblock write(istde,*) 'Give ICCUT for block',i 1 READ *, ICCUT(i) - IF ((ICCUT(i) <= 1).OR.(ICCUT(i) >= ncfblk(i))) THEN - WRITE (istde,*) 'GETINF: ICCUT must be greater than 1',& +!cychen IF ((ICCUT(i) <= 1).OR.(ICCUT(i) >= ncfblk(i))) THEN + IF ((ICCUT(i) < 1).OR.(ICCUT(i) >= ncfblk(i))) THEN + WRITE (istde,*) 'GETINF: ICCUT must be greater than 0',& ' and less than ',ncfblk(i) WRITE (istde,*) ' please reenter ICCUT:' GOTO 1 diff --git a/src/appl/rci90_mpi/setham_gg.f90 b/src/appl/rci90_mpi/setham_gg.f90 index 55b4621cc..18e789f5b 100644 --- a/src/appl/rci90_mpi/setham_gg.f90 +++ b/src/appl/rci90_mpi/setham_gg.f90 @@ -398,6 +398,8 @@ SUBROUTINE SETHAM (myid, nprocs, jblock, ELSTO,ICSTRT, nelmntt, & !cjb MPI progress begin -------------------------------------------------- ! !cjb just started +!cychen: report only from myid.eq.0 + if (myid .eq. 0 ) then IF (IC .LE. nprocs) then PRINT '(A6,I10,A9,I10,A9,I5,A8,I5)', 'Start ', ic, & ' nnonz = ', nelc, & @@ -410,19 +412,19 @@ SUBROUTINE SETHAM (myid, nprocs, jblock, ELSTO,ICSTRT, nelmntt, & IF (IC .GT. nprocs .and. IC .LT. NCF-nprocs .and. & MOD (IC-1,100*nprocs) .EQ. myid) then if (myid .eq. 0) then - PRINT '(A5,I10,A9,I10,A9,I5,A8,I5)', 'Done ', ic, & + PRINT '(A6,I10,A9,I10,A9,I5,A8,I5)', 'Done ', ic, & ' nnonz = ', nelc, & ' block = ', jblock,' myid = ', myid flush output_unit flush error_unit else - PRINT '(A5,I10,A9,I10,A9,I5,A8,I5)', 'Row ', ic, & + PRINT '(A6,I10,A9,I10,A9,I5,A8,I5)', 'Row ', ic, & ' nnonz = ', nelc, & ' block = ', jblock,' myid = ', myid flush output_unit flush error_unit endif - endif + endif ! !cjb almost finished IF (IC .GT. NCF-nprocs) then @@ -433,6 +435,7 @@ SUBROUTINE SETHAM (myid, nprocs, jblock, ELSTO,ICSTRT, nelmntt, & flush output_unit flush error_unit endif + endif !cjb MPI progress end ---------------------------------------------------- ! ! diff --git a/src/appl/rcsfzerofirst90/RCSFzerofirst.f90 b/src/appl/rcsfzerofirst90/RCSFzerofirst.f90 index 288edb916..de301c928 100644 --- a/src/appl/rcsfzerofirst90/RCSFzerofirst.f90 +++ b/src/appl/rcsfzerofirst90/RCSFzerofirst.f90 @@ -44,12 +44,15 @@ PROGRAM RCSFzerofirst print *, "" NBLOCK = 0 CALL SET_CSF_ZFlist +!cychen: output the zero-order space for re-use in mcp, rci + open(301,file='icut',form='formatted',status='unknown') WRITE (6, *) " Block Zero-order Space Complete Space" DO CALL LODCSL_Zero (NEXT_BLOCK) CALL LODCSL_Part (CSF_Number) WRITE (6,'(3X,I2,6X,I14,3X,I17)') & NBLOCK,NCFBLK(NBLOCK),CSF_Number-1 + write(301,*)NCFBLK(NBLOCK) deallocate (Found) deallocate (C_shell) deallocate (C_quant) @@ -57,6 +60,7 @@ PROGRAM RCSFzerofirst IF(.NOT. NEXT_BLOCK) EXIT WRITE(22,'(A2)') ' *' END DO + close(301) call stoptime (ncount1, 'RCSFzerofirst') STOP END PROGRAM RCSFzerofirst diff --git a/src/appl/rmcdhf90/newco.f90 b/src/appl/rmcdhf90/newco.f90 index bb5f6e727..e3ce00277 100644 --- a/src/appl/rmcdhf90/newco.f90 +++ b/src/appl/rmcdhf90/newco.f90 @@ -114,6 +114,8 @@ SUBROUTINE NEWCO(SUM) ! Write out average energy ! IF (NCMIN > 1) WRITE (*, 304) SUM +!cychen, output sum to the terminal for convergence monitor. + WRITE (0,'(A25,1PD18.10)') 'Weighted average energy: ', SUM ! ! Write out generalized occupation numbers ! diff --git a/src/lib/libdvd90/dvdson.f90 b/src/lib/libdvd90/dvdson.f90 index 02cb22411..39b992a70 100644 --- a/src/lib/libdvd90/dvdson.f90 +++ b/src/lib/libdvd90/dvdson.f90 @@ -732,7 +732,7 @@ SUBROUTINE DVDRVR(IRC, IREV, N, HIEND, LIM, MBLOCK, NOC, NUME, NIV, NEIG& ! by the largest magnitude in the last added NNCV rows of Svec. ! DONE = TSTSEL(KPASS,NUME,NEIG,ISELEC,SVEC,EIGVAL,ICV,CRITE,CRITC,SCRA1,& - ISCRA2,OLDVAL,NNCV,INCV) + ISCRA2,OLDVAL,NNCV,INCV,NLOOPS) IF (DONE .OR. KPASS>=N) GO TO 30 ! ! Maximum size for expanding basis. Truncate basis, D, and S, Svec @@ -1029,7 +1029,7 @@ SUBROUTINE NEWVEC(N, NUME, LIM, MBLOCK, KPASS, CRITR, NNCV, INCV, SVEC, & ! M o d u l e s !----------------------------------------------- USE vast_kind_param, ONLY: DOUBLE -! USE MPI_C, ONLY: MYID + USE MPI_C, ONLY: MYID !cychen !----------------------------------------------- ! I n t e r f a c e B l o c k s !----------------------------------------------- @@ -1115,9 +1115,10 @@ SUBROUTINE NEWVEC(N, NUME, LIM, MBLOCK, KPASS, CRITR, NNCV, INCV, SVEC, & ! SQRES = DDOT(N,AB(ICUR),1,AB(ICUR),1) SCRA1(INDX) = SQRT(SQRES) - -! IF (MYID == 0) WRITE (6, '(A11,F22.16,I2,A10,F19.16)') ' EIGVAL(i) ', & -! EIGVAL(INDX), INDX, ' Res.Norm ', SCRA1(INDX) +!cychen, the following informations are very useful for convergence +!monitoring in an extremely large-scaled calculations. + IF (MYID == 0) WRITE (6, '(A11,F22.16,I4,A10,F19.16)') & + ' EIGVAL(i) ', EIGVAL(INDX), INDX, ' Res.Norm ', SCRA1(INDX) IF (SCRA1(INDX) < CRITR) THEN ! ..Converged,do not add. Go for next non converged one @@ -1258,7 +1259,7 @@ END SUBROUTINE OVFLOW !======================================================================= LOGICAL FUNCTION TSTSEL (KPASS, NUME, NEIG, ISELEC, SVEC, EIGVAL, ICV, & - CRITE, CRITC, ROWLAST, IND, OLDVAL, NNCV, INCV) + CRITE, CRITC, ROWLAST, IND, OLDVAL, NNCV, INCV, NLOOPS) !======================================================================= ! ! Called by: DVDRVR @@ -1308,6 +1309,7 @@ LOGICAL FUNCTION TSTSEL (KPASS, NUME, NEIG, ISELEC, SVEC, EIGVAL, ICV, & REAL(DOUBLE) , INTENT(IN) :: EIGVAL(NUME) REAL(DOUBLE) :: ROWLAST(NEIG) REAL(DOUBLE) , INTENT(IN) :: OLDVAL(NUME) + INTEGER , INTENT(IN) :: NLOOPS !----------------------------------------------- ! L o c a l V a r i a b l e s !----------------------------------------------- @@ -1372,7 +1374,8 @@ LOGICAL FUNCTION TSTSEL (KPASS, NUME, NEIG, ISELEC, SVEC, EIGVAL, ICV, & DO L = 1, NNCV - 1 TMAX = MAX(TMAX,ABS(SVEC(ICUR-L))) END DO - IF (TMAX < CRITC) THEN +!cychen: perform at least TWO iterations, or, obtain error results in some cases. + IF (NLOOPS > 2 .and. TMAX < CRITC) THEN ! ..this coefficient converged ICV(ISELEC(I)) = 1 ELSE diff --git a/src/lib/libdvd90/tstsel_I.f90 b/src/lib/libdvd90/tstsel_I.f90 index c93260c75..a750ba8ef 100644 --- a/src/lib/libdvd90/tstsel_I.f90 +++ b/src/lib/libdvd90/tstsel_I.f90 @@ -4,7 +4,7 @@ MODULE tstsel_I !...Modified by Charlotte Froese Fischer ! Gediminas Gaigalas 10/05/17 LOGICAL FUNCTION tstsel (KPASS, NUME, NEIG, ISELEC, SVEC, EIGVAL, ICV& - , CRITE, CRITC, ROWLAST, IND, OLDVAL, NNCV, INCV) + , CRITE, CRITC, ROWLAST, IND, OLDVAL, NNCV, INCV, NLOOPS) USE vast_kind_param,ONLY: DOUBLE INTEGER, INTENT(IN) :: KPASS INTEGER, INTENT(IN) :: NUME @@ -20,6 +20,7 @@ LOGICAL FUNCTION tstsel (KPASS, NUME, NEIG, ISELEC, SVEC, EIGVAL, ICV& REAL(DOUBLE), DIMENSION(NUME), INTENT(IN) :: OLDVAL INTEGER, INTENT(INOUT) :: NNCV INTEGER, DIMENSION(NEIG), INTENT(OUT) :: INCV + INTEGER, INTENT(IN) :: NLOOPS !VAST.../MPI/ MYID(IN) !VAST...Calls: IDAMAX !...This routine performs I/O. diff --git a/src/tool/Makefile b/src/tool/Makefile index 04936da00..70b595d18 100644 --- a/src/tool/Makefile +++ b/src/tool/Makefile @@ -14,6 +14,7 @@ UTIL = rcsfsplit rmixaccumulate rseqenergy \ install: EXE cp rsave $(GRASP)/bin + chmod a+x $(GRASP)/bin/rsave cp lscomp.pl $(GRASP)/bin EXE : $(BIN)/rcsfsplit \ From 4b0aea0f3e093e65026cd670f95cedbac706a655 Mon Sep 17 00:00:00 2001 From: Chongyang Chen Date: Tue, 2 Jun 2020 19:43:28 +0800 Subject: [PATCH 2/9] Fix the bug in the generation of csf's using rcsfgenerate --- src/appl/rcsfgenerate90/matain.f90 | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/appl/rcsfgenerate90/matain.f90 b/src/appl/rcsfgenerate90/matain.f90 index 53f7ca7b9..a21a9b5bf 100644 --- a/src/appl/rcsfgenerate90/matain.f90 +++ b/src/appl/rcsfgenerate90/matain.f90 @@ -262,7 +262,8 @@ subroutine matain(org, lock, closed, varmax, skal, nmax, anel, par, low, & if (Y(1:1).GE.'0' .AND. Y(1:1).LE.'9') then if (org(i,j).GT.0) then tmp = ICHAR(Y(1:1))-ICHAR('0') - if (Y(2:2).GE.'1' .AND. Y(2:2).LE.'9') & +!cychen if (Y(2:2).GE.'1' .AND. Y(2:2).LE.'9') & + if (Y(2:2).GE.'0' .AND. Y(2:2).LE.'9') & tmp = tmp*10 + ICHAR(Y(2:2))-ICHAR('0') low(i,j) = min(org(i,j),tmp) endif @@ -312,7 +313,8 @@ subroutine matain(org, lock, closed, varmax, skal, nmax, anel, par, low, & if (Y(1:1).GE.'0' .AND. Y(1:1).LE.'9') then if (org(i,j).GT.0) then tmp = ICHAR(Y(1:1))-ICHAR('0') - if (Y(2:2).GE.'1' .AND. Y(2:2).LE.'9') & +!cychen if (Y(2:2).GE.'1' .AND. Y(2:2).LE.'9') & + if (Y(2:2).GE.'0' .AND. Y(2:2).LE.'9') & tmp = tmp*10 + ICHAR(Y(2:2))-ICHAR('0') low(i,j) = min(org(i,j),tmp) endif From f7968ac741926df443d9bd851e809856784aad45 Mon Sep 17 00:00:00 2001 From: Chongyang Chen Date: Tue, 2 Jun 2020 23:42:31 +0800 Subject: [PATCH 3/9] DDOTMPI and DGEMVMPI added in mpiu.f90; Then, DVDSON calls them --- make_environment_gfortran | 2 +- src/lib/libdvd90/dvdson.f90 | 102 ++++++++-- src/lib/mpi90/mpiu.f90 | 394 ++++++++++++++++++++++++++++++++++++ 3 files changed, 481 insertions(+), 17 deletions(-) diff --git a/make_environment_gfortran b/make_environment_gfortran index dd34e64a3..53ca53222 100755 --- a/make_environment_gfortran +++ b/make_environment_gfortran @@ -33,4 +33,4 @@ export FC_MPI="mpifort" # MPI export FC_MPIFLAGS="${FC_FLAGS}" # Parallel code compiler flags export FC_MPILD=${FC_LD} # Serial linker flags # ------------------------------------------------------------------------------------------------------------------- -export MPI_TMP="${HOME}/grasp_mpi_tmp" # Location for temporary files +export MPI_TMP="${HOME}/tmp" # Location for temporary files diff --git a/src/lib/libdvd90/dvdson.f90 b/src/lib/libdvd90/dvdson.f90 index 39b992a70..839d1d88c 100644 --- a/src/lib/libdvd90/dvdson.f90 +++ b/src/lib/libdvd90/dvdson.f90 @@ -543,6 +543,8 @@ SUBROUTINE ADDS(N, LIM, KPASS, NNCV, BASIS, AB, S) DO IV = 1, NNCV IBSTART = 1 DO IBV = 1, KPASS + IV +!cychen SS = DDOT(N,BASIS(IBSTART),1,AB(IDSTART),1) + call DDOTMPI(N,BASIS(IBSTART),AB(IDSTART),SS) SS = DDOT(N,BASIS(IBSTART),1,AB(IDSTART),1) S(ISSTART+IBV) = SS IBSTART = IBSTART + N @@ -640,6 +642,7 @@ SUBROUTINE DVDRVR(IRC, IREV, N, HIEND, LIM, MBLOCK, NOC, NUME, NIV, NEIG& INTEGER :: REST, I, KPASS, NNCV, IFIND, NFOUND, INFO, NEWSTART REAL(DOUBLE) :: TOL LOGICAL :: FIRST, DONE + REAL(DOUBLE) :: dsum SAVE FIRST, DONE, REST, I, KPASS, NNCV, IFIND, TOL, NFOUND, INFO, & NEWSTART @@ -822,8 +825,10 @@ SUBROUTINE DVDRVR(IRC, IREV, N, HIEND, LIM, MBLOCK, NOC, NUME, NIV, NEIG& OLDVAL(I) = ABS(OLDVAL(I)-EIGVAL(I)) END DO - CALL MULTBC (N, KPASS, NUME, SVEC, SCRA1, BASIS) - CALL MULTBC (N, KPASS, NUME, SVEC, SCRA1, AB) +!cychen CALL MULTBC (N, KPASS, NUME, SVEC, SCRA1, BASIS) +!cychen CALL MULTBC (N, KPASS, NUME, SVEC, SCRA1, AB) + CALL MULTBC_COL (N, KPASS, NUME, SVEC, SCRA1, BASIS) + CALL MULTBC_COL (N, KPASS, NUME, SVEC, SCRA1, AB) ! ! i=1,NUME residual(i)= DCi-liBCi= newDi-linewBi ! temporarily stored in AB(NUME*N+1) @@ -831,8 +836,10 @@ SUBROUTINE DVDRVR(IRC, IREV, N, HIEND, LIM, MBLOCK, NOC, NUME, NIV, NEIG& DO I = 1, NUME CALL DCOPY (N, AB((I-1)*N+1), 1, AB(NUME*N+1), 1) CALL DAXPY (N, (-EIGVAL(I)),BASIS((I-1)*N+1), 1, AB(NUME*N+1), 1) - SCRA1(I) = DDOT(N,AB(NUME*N+1),1,AB(NUME*N+1),1) - SCRA1(I) = SQRT(SCRA1(I)) +!cychen SCRA1(I) = DDOT(N,AB(NUME*N+1),1,AB(NUME*N+1),1) +! SCRA1(I) = SQRT(SCRA1(I)) + call DDOTMPI(N,AB(NUME*N+1),AB(NUME*N+1),dsum) + SCRA1(I) = SQRT(dsum) END DO ! ! Set IRC=0 for normal exit with no reverse communication @@ -997,6 +1004,55 @@ SUBROUTINE MULTBC(N, K, M, C, TEMP, B) RETURN END SUBROUTINE MULTBC +!======================================================================= + SUBROUTINE MULTBC_COL(N,K,M,C,SCARTMP,B) +!======================================================================= +!cychen: obtain the column of B(N,M), by using DGEMVMPI +! called by: DVDRVR +! +! Multiplies B(N,K)*C(K,M) and stores it in B(N,M) +! Used for collapsing the expanding basis to current estimates, +! when basis becomes too large, or for returning the results back + +! Subroutines called +! DINIT, DGEMV, DCOPY +!----------------------------------------------------------------------- +!----------------------------------------------- +! M o d u l e s +!----------------------------------------------- + USE vast_kind_param, ONLY: DOUBLE +!----------------------------------------------- +! I n t e r f a c e B l o c k s +!----------------------------------------------- + !USE dgemv_I + !USE dcopy_I + IMPLICIT NONE +!----------------------------------------------- +! D u m m y A r g u m e n t s +!----------------------------------------------- + INTEGER :: N + INTEGER :: K + INTEGER :: M + REAL(DOUBLE) :: C(K*M) + REAL(DOUBLE) :: SCARTMP(M) +!cychen REAL(DOUBLE) :: B(N*K) + REAL(DOUBLE) :: B(1) +!----------------------------------------------- +! L o c a l V a r i a b l e s +!----------------------------------------------- + INTEGER :: JCOL + REAL(DOUBLE) :: TEMP(N*M) + +!----------------------------------------------------------------------- + DO JCOL=1,M + CALL DGEMVMPI('N',N,K,1.D0,B,N,C((JCOL-1)*K+1),1, & + 0.d0,TEMP((JCOL-1)*N+1),1) + ENDDO + CALL DCOPY(N*M,TEMP,1,B,1) + CALL DCOPY(M,B(N),N,SCARTMP,1) + + RETURN + END SUBROUTINE MULTBC_COL !======================================================================= SUBROUTINE NEWVEC(N, NUME, LIM, MBLOCK, KPASS, CRITR, NNCV, INCV, SVEC, & @@ -1104,16 +1160,23 @@ SUBROUTINE NEWVEC(N, NUME, LIM, MBLOCK, KPASS, CRITR, NNCV, INCV, SVEC, & ! ..Compute d = AB*Svec_indx ! ..Daxpy d'= d - eigval b gives the residual - CALL DGEMV ('N', N, KPASS, 1.D0, BASIS, N, SVEC((INDX-1)*KPASS+1) & - , 1, 0.D0, BASIS(ICUR), 1) - CALL DGEMV ('N', N, KPASS, 1.D0, AB, N, SVEC((INDX-1)*KPASS+1), 1, & - 0.D0, AB(ICUR), 1) +!cychen CALL DGEMV ('N', N, KPASS, 1.D0, BASIS, N, SVEC((INDX-1)*KPASS+1) & +!cychen , 1, 0.D0, BASIS(ICUR), 1) +!cychen CALL DGEMV ('N', N, KPASS, 1.D0, AB, N, SVEC((INDX-1)*KPASS+1), 1, & +!cychen 0.D0, AB(ICUR), 1) + CALL DGEMVMPI('N',N,KPASS,1.D0,BASIS,N,SVEC((INDX-1)*KPASS+1),1,& + 0.d0,BASIS(ICUR),1) + CALL DGEMVMPI('N',N,KPASS,1.D0,AB,N,SVEC((INDX-1)*KPASS+1),1, & + 0.d0,AB(ICUR),1) +!cychen: daxpympi is worse CALL DAXPY (N, (-EIGVAL(INDX)),BASIS(ICUR), 1, AB(ICUR), 1) + !CALL DAXPYMPI(N,-EIGVAL(INDX),BASIS(ICUR),1,AB(ICUR),1) ! ! ..Compute the norm of the residual ! ..and check for convergence ! - SQRES = DDOT(N,AB(ICUR),1,AB(ICUR),1) +!cychen SQRES = DDOT(N,AB(ICUR),1,AB(ICUR),1) + call DDOTMPI(N,AB(ICUR),AB(ICUR),SQRES) SCRA1(INDX) = SQRT(SQRES) !cychen, the following informations are very useful for convergence !monitoring in an extremely large-scaled calculations. @@ -1234,8 +1297,10 @@ SUBROUTINE OVFLOW(N, NUME, KPASS, SCRA1, BASIS, AB, S, SVEC, EIGVAL) !----------------------------------------------------------------------- ! Truncate the basis and the AB array. ! - CALL MULTBC (N, KPASS, NUME, SVEC, SCRA1, BASIS) - CALL MULTBC (N, KPASS, NUME, SVEC, SCRA1, AB) +!cychen CALL MULTBC (N, KPASS, NUME, SVEC, SCRA1, BASIS) +!cychen CALL MULTBC (N, KPASS, NUME, SVEC, SCRA1, AB) + CALL MULTBC_COL (N, KPASS, NUME, SVEC, SCRA1, BASIS) + CALL MULTBC_COL (N, KPASS, NUME, SVEC, SCRA1, AB) ! ! calculation of the new upper S=diag(l1,...,l_NUME) and ! its matrix Svec of eigenvectors (e1,...,e_NUME) @@ -1468,8 +1533,10 @@ subroutine mgs_nrm(n, kp, new, scra, b) kcur = 1 do k = 1, kp jcur = newstart - call dgemv ('T', n, new, 1.D0, b(jcur), n, b(kcur), 1, 0.D0, scra, & - 1) +!cychen call DGEMV ('T', n, new, 1.D0, b(jcur), n, b(kcur), 1, 0.D0, scra, & +!cychen 1) + call DGEMVMPI('T', N, new, 1.d0, B(jcur), N, B(kcur), 1, & + 0.d0, scra, 1) do j = 1, new ! call daxpy(N,-scra(j),B(kcur),1,B(jcur),1) do mm = 0, n - 1 @@ -1486,12 +1553,15 @@ subroutine mgs_nrm(n, kp, new, scra, b) jcur = kcur + n ! The current vector should be normalized ! - dnm = ddot(n,b(kcur),1,b(kcur),1) +!cychen dnm = DDOT(n,b(kcur),1,b(kcur),1) + call DDOTMPI(N,B(kcur),B(kcur),dnm) dnm = sqrt(dnm) call dscal (n, 1/dnm, b(kcur), 1) ! - call dgemv ('T', n, new - k, 1.D0, b(jcur), n, b(kcur), 1, 0.D0, & - scra, 1) +!cychen call DGEMV ('T', n, new - k, 1.D0, b(jcur), n, b(kcur), 1, 0.D0, & +!cychen scra, 1) + call DGEMVMPI('T', N, new - k, 1.d0, B(jcur), N, B(kcur), 1,& + 0.d0, scra, 1) do j = k + 1, new ! call daxpy(N,-scra(j-k),B(kcur),1,B(jcur),1) do mm = 0, n - 1 diff --git a/src/lib/mpi90/mpiu.f90 b/src/lib/mpi90/mpiu.f90 index c98f076af..ffe2324f3 100644 --- a/src/lib/mpi90/mpiu.f90 +++ b/src/lib/mpi90/mpiu.f90 @@ -369,3 +369,397 @@ SUBROUTINE gisummpi (ix, n) RETURN END + +!*********************************************************************** +!cychen, 2020/05 + SUBROUTINE ddotmpi (n, x, y, ddot) +! mpi inner-dot, modification from blas: ddot +!*********************************************************************** +!----------------------------------------------- +! M o d u l e s +!----------------------------------------------- + USE mpi_C +!----------------------------------------------- + IMPLICIT NONE + INTEGER n, i, nbar + REAL*8 x(n), y(n), ddot, dsum + + ddot=0.0d0 + nbar = n/nprocs + + do i=myid*nbar+1, (myid+1)*nbar + ddot=ddot+x(i)*y(i) + enddo + + CALL MPI_Allreduce (ddot, dsum, 1, MPI_DOUBLE_PRECISION, & + MPI_SUM, MPI_COMM_WORLD, ierr) + do i=nbar*nprocs+1, n + dsum=dsum+x(i)*y(i) + enddo + ddot=dsum + + RETURN + END + +!*********************************************************************** +!cychen, 2020/05 + subroutine dgemvmpi ( trans, m, n, alpha, a, lda, x, incx, & + BETA, Y, INCY ) +! mpi version for blas: dgemv +!*********************************************************************** +!----------------------------------------------- +! M o d u l e s +!----------------------------------------------- + USE mpi_C +!----------------------------------------------- + IMPLICIT NONE +! .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA + INTEGER INCX, INCY, LDA, M, N + CHARACTER*1 TRANS +! .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), X( * ), Y( * ) + + INTEGER nbar + + real*8 tm(m), tn + +!cychen: +!DGEMV performs one of the matrix-vector operations by using mpi: +! y :=A*x, or y:=A'*x +!then, alpha and beta must be input as 1.0d0 and 0.0d0, respectively +!Also, for simplicity, incx and incy are modified to be both 1. + +! +! Parameters +! ========== +! +! TRANS - CHARACTER*1. +! On entry, TRANS specifies the operation to be performed as +! follows: +! +! TRANS = 'N' or 'n' y := alpha*A*x + beta*y. +! +! TRANS = 'T' or 't' y := alpha*A'*x + beta*y. +! +! TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. +! +! Unchanged on exit. +! +! M - INTEGER. +! On entry, M specifies the number of rows of the matrix A. +! M must be at least zero. +! Unchanged on exit. +! +! N - INTEGER. +! On entry, N specifies the number of columns of the matrix A. +! N must be at least zero. +! Unchanged on exit. +! +! ALPHA - DOUBLE PRECISION. +! On entry, ALPHA specifies the scalar alpha. +! Unchanged on exit. +! +! A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). +! Before entry, the leading m by n part of the array A must +! contain the matrix of coefficients. +! Unchanged on exit. +! +! LDA - INTEGER. +! On entry, LDA specifies the first dimension of A as declared +! in the calling (sub) program. LDA must be at least +! max( 1, m ). +! Unchanged on exit. +! +! X - DOUBLE PRECISION array of DIMENSION at least +! ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +! and at least +! ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +! Before entry, the incremented array X must contain the +! vector x. +! Unchanged on exit. +! +! INCX - INTEGER. +! On entry, INCX specifies the increment for the elements of +! X. INCX must not be zero. +! Unchanged on exit. +! +! BETA - DOUBLE PRECISION. +! On entry, BETA specifies the scalar beta. When BETA is +! supplied as zero then Y need not be set on input. +! Unchanged on exit. +! +! Y - DOUBLE PRECISION array of DIMENSION at least +! ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +! and at least +! ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +! Before entry with BETA non-zero, the incremented array Y +! must contain the vector y. On exit, Y is overwritten by the +! updated vector y. +! +! INCY - INTEGER. +! On entry, INCY specifies the increment for the elements of +! Y. INCY must not be zero. +! Unchanged on exit. +! +! +! Level 2 Blas routine. +! +! -- Written on 22-October-1986. +! Jack Dongarra, Argonne National Lab. +! Jeremy Du Croz, Nag Central Office. +! Sven Hammarling, Nag Central Office. +! Richard Hanson, Sandia National Labs. +! +! +! .. Parameters .. + DOUBLE PRECISION ONE , ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +! .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY, LENX, LENY +! .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +! .. External Subroutines .. + EXTERNAL XERBLA +! .. Intrinsic Functions .. + INTRINSIC MAX +! .. +! .. Executable Statements .. +! +! Test the input parameters. +! + INFO = 0 + IF ( .NOT.LSAME( TRANS, 'N' ).AND. & + .NOT.LSAME( TRANS, 'T' ).AND. & + .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 1 + ELSE IF( M.LT.0 )THEN + INFO = 2 + ELSE IF( N.LT.0 )THEN + INFO = 3 + ELSE IF( LDA.LT.MAX( 1, M ) )THEN + INFO = 6 +!cychen ELSE IF( INCX.EQ.0 )THEN + ELSE IF( INCX.NE.1 )THEN + INFO = 8 + if (myid.eq.0) print *, "INCX=",INCX +!cychen ELSE IF( INCY.EQ.0 )THEN + ELSE IF( INCY.NE.1 )THEN + INFO = 11 + if (myid.eq.0) print *, "INCY=",INCY + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DGEMVMPI ', INFO ) + RETURN + END IF +! +! Quick return if possible. +! + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. & + ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) & + RETURN +! +! Set LENX and LENY, the lengths of the vectors x and y, and set +! up the start points in X and Y. +! + IF( LSAME( TRANS, 'N' ) )THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( LENX - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( LENY - 1 )*INCY + END IF +! +! Start the operations. In this version the elements of A are +! accessed sequentially with one pass through A. +! +! First form y := beta*y. +! +!cychen IF( BETA.NE.ONE )THEN +!cychen IF( INCY.EQ.1 )THEN +!cychen IF( BETA.EQ.ZERO )THEN +!cychen DO 10, I = 1, LENY +!cychen Y( I ) = ZERO +!cychen 10 CONTINUE +!cychen ELSE +!cychen DO 20, I = 1, LENY +!cychen Y( I ) = BETA*Y( I ) +!cychen 20 CONTINUE +!cychen END IF +!cychen ELSE +!cychen IY = KY +!cychen IF( BETA.EQ.ZERO )THEN +!cychen DO 30, I = 1, LENY +!cychen Y( IY ) = ZERO +!cychen IY = IY + INCY +!cychen 30 CONTINUE +!cychen ELSE +!cychen DO 40, I = 1, LENY +!cychen Y( IY ) = BETA*Y( IY ) +!cychen IY = IY + INCY +!cychen 40 CONTINUE +!cychen END IF +!cychen END IF +!cychen END IF +!cychen IF( ALPHA.EQ.ZERO ) +!cychen $ RETURN + + IF( LSAME( TRANS, 'N' ) )THEN +! +! Form y := alpha*A*x + y. +! +! JX = KX + IF( INCY.EQ.1 )THEN + CALL DINIT(M, 0.d0, tm, 1) + nbar = M / nprocs + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + do i = myid * nbar + 1, (myid + 1) * nbar + tm(i)=tm(i) + x(j) * A(i,j) + enddo + END IF + 60 CONTINUE + CALL MPI_Allreduce (tm, y, m, MPI_DOUBLE_PRECISION, & + MPI_SUM, MPI_COMM_WORLD, ierr) + do j = 1, N + do i = nbar*nprocs + 1, M + y(i) = y(i) + x(j) * A(i,j) + enddo + enddo + + ELSE + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IY = KY + DO 70, I = 1, M + Y( IY ) = Y( IY ) + TEMP*A( I, J ) + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + ELSE +! +! Form y := alpha*A'*x + y. +! + JY = KY + IF( INCX.EQ.1 )THEN + nbar = M / nprocs + DO 100, J = 1, N + TEMP = ZERO + DO 90, I = myid * nbar + 1, (myid + 1) * nbar + TEMP = TEMP + A( I, J ) * X( I ) + 90 CONTINUE + CALL MPI_Allreduce (TEMP, tn, 1, MPI_DOUBLE_PRECISION,& + MPI_SUM, MPI_COMM_WORLD, ierr) + do i = nbar * nprocs + 1, M + tn = tn + A( I, J ) * X( I ) + enddo + Y( J ) = tn + 100 CONTINUE + ELSE + DO 120, J = 1, N + TEMP = ZERO + IX = KX + DO 110, I = 1, M + TEMP = TEMP + A( I, J )*X( IX ) + IX = IX + INCX + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + 120 CONTINUE + END IF + END IF +! + RETURN +! +! End of DGEMV . +! + END + +!*********************************************************************** +!cychen, 2020/05 +!performs y := da*x + y +!For simplicity, modification only for incx=incy=1. + subroutine daxpympi(n,da,dx,incx,dy,incy) +! mpi version of blas: daxpy (Tests show that it does not save time) +!*********************************************************************** +! +! constant times a vector plus a vector. +! uses unrolled loops for increments equal to one. +! jack dongarra, linpack, 3/11/78. +! +!----------------------------------------------- +! M o d u l e s +!----------------------------------------------- + USE mpi_C +!----------------------------------------------- + + IMPLICIT NONE + + double precision dx(1),dy(1),da + integer i,incx,incy,ix,iy,m,mp1,n + + INTEGER nbar + real*8 tm(n) + +! + if(n.le.0)return + if (da .eq. 0.0d0) return + if(incx.eq.1.and.incy.eq.1)go to 20 +! +! code for unequal increments or equal increments +! not equal to 1 +! + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + dy(iy) = dy(iy) + da*dx(ix) + ix = ix + incx + iy = iy + incy + 10 continue + return + +! mpi code for both increments equal to 1 + 20 CALL DINIT(n, 0.d0, tm, 1) + nbar = n / nprocs + do i = myid*nbar+1, (myid+1)*nbar + tm(i) = dy(i) + da * dx(i) + enddo + CALL MPI_Allreduce (tm, dy, nprocs*nbar, MPI_DOUBLE_PRECISION,& + MPI_SUM, MPI_COMM_WORLD, ierr) + do i = nprocs*nbar+1, n + dy(i) = dy(i) + da*dx(i) + enddo + +! m = mod(n,4) +! if( m .eq. 0 ) go to 40 +! do 30 i = 1,m +! dy(i) = dy(i) + da*dx(i) +! 30 continue +! if( n .lt. 4 ) return +! 40 mp1 = m + 1 +! do 50 i = mp1,n,4 +! dy(i) = dy(i) + da*dx(i) +! dy(i + 1) = dy(i + 1) + da*dx(i + 1) +! dy(i + 2) = dy(i + 2) + da*dx(i + 2) +! dy(i + 3) = dy(i + 3) + da*dx(i + 3) +! 50 continue + return + end From e8efff48584f1af4f380153339c65f249e0f8908 Mon Sep 17 00:00:00 2001 From: Jon Grumer Date: Tue, 6 Oct 2020 14:51:29 +0200 Subject: [PATCH 4/9] Update matain.f90 Revert changes, this is move to a separate PR (jg/chen_rcsfgenerate) to get this into master as soon as possible. --- src/appl/rcsfgenerate90/matain.f90 | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/appl/rcsfgenerate90/matain.f90 b/src/appl/rcsfgenerate90/matain.f90 index a21a9b5bf..53f7ca7b9 100644 --- a/src/appl/rcsfgenerate90/matain.f90 +++ b/src/appl/rcsfgenerate90/matain.f90 @@ -262,8 +262,7 @@ subroutine matain(org, lock, closed, varmax, skal, nmax, anel, par, low, & if (Y(1:1).GE.'0' .AND. Y(1:1).LE.'9') then if (org(i,j).GT.0) then tmp = ICHAR(Y(1:1))-ICHAR('0') -!cychen if (Y(2:2).GE.'1' .AND. Y(2:2).LE.'9') & - if (Y(2:2).GE.'0' .AND. Y(2:2).LE.'9') & + if (Y(2:2).GE.'1' .AND. Y(2:2).LE.'9') & tmp = tmp*10 + ICHAR(Y(2:2))-ICHAR('0') low(i,j) = min(org(i,j),tmp) endif @@ -313,8 +312,7 @@ subroutine matain(org, lock, closed, varmax, skal, nmax, anel, par, low, & if (Y(1:1).GE.'0' .AND. Y(1:1).LE.'9') then if (org(i,j).GT.0) then tmp = ICHAR(Y(1:1))-ICHAR('0') -!cychen if (Y(2:2).GE.'1' .AND. Y(2:2).LE.'9') & - if (Y(2:2).GE.'0' .AND. Y(2:2).LE.'9') & + if (Y(2:2).GE.'1' .AND. Y(2:2).LE.'9') & tmp = tmp*10 + ICHAR(Y(2:2))-ICHAR('0') low(i,j) = min(org(i,j),tmp) endif From 76eb371dbf4c3261c95f10243de024ed4cb7ff24 Mon Sep 17 00:00:00 2001 From: Chongyang Chen Date: Sat, 16 Jan 2021 07:06:35 +0800 Subject: [PATCH 5/9] Modifcations for searching the targeted eigenpairs one by one --- Changes_cychen.txt | 1 + src/appl/rci90_mpi/maneigmpi.f90 | 74 +++++++++++++++++++++++++++++--- src/lib/libdvd90/dvdson.f90 | 7 ++- src/lib/libdvd90/gdvd.f90 | 21 +++++++-- src/lib/libdvd90/gdvd_I.f90 | 3 +- 5 files changed, 96 insertions(+), 10 deletions(-) diff --git a/Changes_cychen.txt b/Changes_cychen.txt index d4519a553..bdffe8e38 100644 --- a/Changes_cychen.txt +++ b/Changes_cychen.txt @@ -14,3 +14,4 @@ monitor. The output messages are very useful for extremely large-scaled calculat iteration calculations. Or, error results could be obtained in some cases (in O-like isoelectronic sequence?). 09. dvdson--mpi +10. Modifcations for searching the targeted eigenpairs one by one diff --git a/src/appl/rci90_mpi/maneigmpi.f90 b/src/appl/rci90_mpi/maneigmpi.f90 index d313efc92..e0ddba283 100644 --- a/src/appl/rci90_mpi/maneigmpi.f90 +++ b/src/appl/rci90_mpi/maneigmpi.f90 @@ -132,8 +132,12 @@ SUBROUTINE MANEIG(IATJPO, IASPAR, NELMNT_a) CHARACTER(LEN=8) :: CNUM REAL(DOUBLE), DIMENSION(:), pointer :: w, z, work, diag INTEGER, DIMENSION(:), pointer :: iwork, ifail, jwork -!----------------------------------------------- -! + +!----------------------------------------------------------------------- +! CYC Modifications for step by step diagonalization + INTEGER IRESTART_GDVD, ISTEP, NSTEP, IITMP, NMV0 + INTEGER TIME0, TIME1, TIME2 +! CYC !----------------------------------------------------------------------- ABSTOL = 2*DLAMCH('S') IF (MYID == 0) WRITE (6, *) 'Calling maneig...' @@ -494,7 +498,7 @@ SUBROUTINE MANEIG(IATJPO, IASPAR, NELMNT_a) print *, 'Returned from iniestsd ' if (ncf.gt. IOLPCK) then print *, 'Calling GDVD' - CALL GDVD (SPODMV,NCF,LIM,DIAG,ILOW,IHIGH, & + CALL GDVD (SPODMV,IRESTART_GDVD,NCF,LIM,DIAG,ILOW,IHIGH, & JWORK,NIV,MBLOCK,CRITE,CRITC, CRITR,ORTHO,MAXITR, & WORK,LWORK,IWORK,LIWORK,HIEND,NLOOPS, & NMV,IERR) @@ -505,10 +509,70 @@ SUBROUTINE MANEIG(IATJPO, IASPAR, NELMNT_a) IF (myid .EQ. 0) print *, ' Sparse - Memory, iniestmpi' CALL iniestmpi (IOLPCK, NCF,NIV,WORK,EMT,IENDC,IROW) if(ncf.gt. IOLPCK) then - CALL GDVD (SPICMVmpi,NCF,LIM,DIAG,ILOW,IHIGH, & +!----------------------------------------------------------------------- +! CYC: Try to diagonalize step by step + IRESTART_GDVD = 0 + TIME0 = MPI_WTIME() +! More tests should be performed for istep, it could be set as 1 + ISTEP = 5 + NMV = 0 + IF (NCF.GT.2.0E5 .AND. NVEX.GT.ISTEP) THEN + NSTEP = NVEX / ISTEP + IF (MOD(NVEX,ISTEP).LE.ISTEP/2) NSTEP = NSTEP - 1 + DO IITMP = 1, NSTEP +! NIV could be changed to nvecmx+1 in dvdson:initdvd procedure + NIV = NVEX + ILOW = 1 + IHIGH = IITMP * ISTEP + IF (IITMP .EQ. 1 ) THEN + IRESTART_GDVD = 0 + ELSE + IRESTART_GDVD = 1 + ENDIF + NMV0 = NMV + TIME1 = MPI_WTIME() + + CALL GDVD (SPICMVmpi,IRESTART_GDVD,NCF,LIM,DIAG,ILOW,IHIGH, & JWORK,NIV,MBLOCK,CRITE,CRITC, CRITR,ORTHO,MAXITR, & WORK,LWORK,IWORK,LIWORK,HIEND,NLOOPS, & NMV,IERR) + + TIME2 = MPI_WTIME() + IF (MYID .EQ. 0) THEN + WRITE (*, *)' IITMP= ', IITMP, & + ' Time (s)=', TIME2 - TIME1 + WRITE (*,*) ' ', NMV - NMV0, & + 'Matrix-vector multiplies for this loop.' + WRITE (*,*) ' ', NMV, & + ' Total matrix-vector multiplies by now.' + ENDIF + ENDDO + ENDIF + +! Search the remaining (NVEX - ISTEP * NSTEP) EIGENPAIRS + NIV = NVEX + ILOW = 1 + IHIGH = NVEX + NMV0 = NMV + TIME1 = MPI_WTIME() + + CALL GDVD(SPICMVmpi,IRESTART_GDVD,NCF,LIM,DIAG,ILOW,IHIGH, & + JWORK,NIV,MBLOCK,CRITE,CRITC, CRITR,ORTHO,MAXITR, & + WORK,LWORK,IWORK,LIWORK,HIEND,NLOOPS, & + NMV,IERR) + + TIME2 = MPI_WTIME() + IF (MYID .EQ. 0) THEN + WRITE (*, *)' IITMP= ', IITMP, & + ' Time (s)=', TIME2 - TIME1 + WRITE (*,*) ' ', NMV - NMV0, & + 'Matrix-vector multiplies for this loop.' + WRITE (*,*) ' ', NMV, & + ' Total matrix-vector multiplies by now.' + WRITE (*, *)' Total time (m)=', (TIME2 - TIME0) / 60.0 + ENDIF +! CYC +!----------------------------------------------------------------------- end if CALL DALLOC (EMT, 'EMT', 'MANEIG') @@ -520,7 +584,7 @@ SUBROUTINE MANEIG(IATJPO, IASPAR, NELMNT_a) IF (myid .EQ. 0) print *, ' Dense - Memory, iniestdm' CALL INIESTDM (IOLPCK,NCF,NIV,WORK,EMT) if (ncf.gt. IOLPCK) then - CALL GDVD (DNICMV,NCF,LIM,DIAG,ILOW,IHIGH, & + CALL GDVD (DNICMV,IRESTART_GDVD,NCF,LIM,DIAG,ILOW,IHIGH, & JWORK,NIV,MBLOCK,CRITE,CRITC, CRITR,ORTHO,MAXITR, & WORK,LWORK,IWORK,LIWORK,HIEND,NLOOPS, & NMV,IERR) diff --git a/src/lib/libdvd90/dvdson.f90 b/src/lib/libdvd90/dvdson.f90 index 839d1d88c..6cc9827ab 100644 --- a/src/lib/libdvd90/dvdson.f90 +++ b/src/lib/libdvd90/dvdson.f90 @@ -362,7 +362,11 @@ SUBROUTINE DVDSON(IRC, IREV, N, LIM, NOC, ILOW, IHIGH, ISELEC, NIV, & IF (IERR /= 0) RETURN - NUME = IHIGH +!set NUME=NIV. In step-by-step diagonalization, IHIGH is changed in +!different calling dvdson + !NUME=IHIGH + NUME = NIV + ! ..Identify if few of the highest eigenpairs are wanted. IF (ILOW + IHIGH - 1 > N) THEN HIEND = .TRUE. @@ -423,6 +427,7 @@ SUBROUTINE DVDSON(IRC, IREV, N, LIM, NOC, ILOW, IHIGH, ISELEC, NIV, & ! 100 CONTINUE +! CYC: niv may be changed to nume+1 in initdvd CALL INITDVD (IRC, IREV, N, NOC, NIV, NUME + 1, LIM, HIEND, WORK(ISCRA1)& , WORK(IORTHO), WORK(IBASIS), WORK(IAB), WORK(IS)) ! ---------------------------------------------------------------- diff --git a/src/lib/libdvd90/gdvd.f90 b/src/lib/libdvd90/gdvd.f90 index 1a2ab8517..2187b9c3a 100644 --- a/src/lib/libdvd90/gdvd.f90 +++ b/src/lib/libdvd90/gdvd.f90 @@ -1,7 +1,7 @@ !*************************************************************************** - SUBROUTINE GDVD(OP, N, LIM, DIAG, ILOW, IHIGH, ISELEC, NIV, MBLOCK, CRITE& + SUBROUTINE GDVD(OP, IRESTART_GDVD, N, LIM, DIAG, ILOW, IHIGH, ISELEC, NIV, MBLOCK, CRITE& , CRITC, CRITR, ORTHO, MAXITER, WORK, IWRSZ, IWORK, IIWSZ, HIEND, & NLOOPS, NMV, IERR) ! Written by M. Saparov @@ -25,6 +25,7 @@ SUBROUTINE GDVD(OP, N, LIM, DIAG, ILOW, IHIGH, ISELEC, NIV, MBLOCK, CRITE& !----------------------------------------------- ! D u m m y A r g u m e n t s !----------------------------------------------- + INTEGER :: IRESTART_GDVD INTEGER :: N INTEGER :: LIM INTEGER :: ILOW @@ -134,13 +135,27 @@ SUBROUTINE GDVD(OP, N, LIM, DIAG, ILOW, IHIGH, ISELEC, NIV, MBLOCK, CRITE& GO TO 99 !********* - ELSE IF (IRC==2 .OR. IRC==3) THEN +!----------------------------------------------------------------------- +! CYC: Try to diagonalize step by step + !ELSE IF (IRC==2 .OR. IRC==3) THEN + ELSE IF (IRC==3) THEN !********* ..Matrix-vector multiply. CALL OP (N, NB, WORK(IW1), WORK(IW2)) NMV = NMV + NB - GO TO 99 + + ELSE IF (IRC==2) THEN + IF (IRESTART_GDVD==0) THEN + CALL OP (N, NB, WORK(IW1), WORK(IW2)) + NMV = NMV + NB + ELSE + CALL OP (N, 1, WORK(IW1), WORK(IW2)) + NMV = NMV + 1 + ENDIF + GO TO 99 ENDIF +! CYC +!----------------------------------------------------------------------- ! * * * * End of Reverse Communication * * * * * * * * * * * * * * * * * RETURN diff --git a/src/lib/libdvd90/gdvd_I.f90 b/src/lib/libdvd90/gdvd_I.f90 index 67d6eec4b..11c01f987 100644 --- a/src/lib/libdvd90/gdvd_I.f90 +++ b/src/lib/libdvd90/gdvd_I.f90 @@ -3,11 +3,12 @@ MODULE gdvd_I !...Generated by Pacific-Sierra Research 77to90 4.3E 20:12:31 2/12/04 !...Modified by Charlotte Froese Fischer ! Gediminas Gaigalas 10/05/17 - SUBROUTINE gdvd (OP, N, LIM, DIAG, ILOW, IHIGH, ISELEC, NIV, MBLOCK& + SUBROUTINE gdvd (OP, IRESTART_GDVD, N, LIM, DIAG, ILOW, IHIGH, ISELEC, NIV, MBLOCK& , CRITE, CRITC, CRITR, ORTHO, MAXITER, WORK, IWRSZ, IWORK, IIWSZ& , HIEND, NLOOPS, NMV, IERR) USE vast_kind_param,ONLY: DOUBLE EXTERNAL OP + integer, INTENT(IN) :: IRESTART_GDVD integer, INTENT(IN) :: N integer, INTENT(IN) :: LIM real(DOUBLE), DIMENSION(N), INTENT(INOUT) :: DIAG From 14ba948d70fddf9afa11a5a799f661479a165b20 Mon Sep 17 00:00:00 2001 From: Chongyang Chen Date: Sat, 16 Jan 2021 07:53:17 +0800 Subject: [PATCH 6/9] Minor modifications for rmcdhf90/maneig.f90, according to those for gdvd --- src/appl/rmcdhf90/maneig.f90 | 7 ++++++- src/appl/rmcdhf90_mpi/maneigmpi.f90 | 9 +++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/appl/rmcdhf90/maneig.f90 b/src/appl/rmcdhf90/maneig.f90 index d71aa3c26..583851a69 100644 --- a/src/appl/rmcdhf90/maneig.f90 +++ b/src/appl/rmcdhf90/maneig.f90 @@ -78,6 +78,10 @@ SUBROUTINE MANEIG(dvdfirst, LPRINT, JBLOCK, & REAL(DOUBLE), DIMENSION(:), POINTER :: WORK, DIAG LOGICAL :: HIEND !----------------------------------------------- +!CYC: Search the targeted eigenpairs one by one + INTEGER IRESTART_GDVD + IRESTART_GDVD = 0 +! !PRINT *, 'maneig ...' @@ -159,7 +163,8 @@ SUBROUTINE MANEIG(dvdfirst, LPRINT, JBLOCK, & CALL ALLOC (IWORK, LIWORK, 'IWORK', 'MANEIG') CALL ALLOC (JWORK, LIM, 'JWORK', 'MANEIG') - CALL GDVD (SPICMV2, NCF, LIM, DIAG, ILOW, IHIGH, JWORK, NIV, MBLOCK, & + !CALL GDVD (SPICMV2, NCF, LIM, DIAG, ILOW, IHIGH, JWORK, NIV, MBLOCK, & + CALL GDVD (SPICMV2, IRESTART_GDVD, NCF, LIM, DIAG, ILOW, IHIGH, JWORK, NIV, MBLOCK, & CRITE, CRITC, CRITR, ORTHO, MAXITR, WORK, LWORK, IWORK, LIWORK, & HIEND, NLOOPS, NMV, IERR) diff --git a/src/appl/rmcdhf90_mpi/maneigmpi.f90 b/src/appl/rmcdhf90_mpi/maneigmpi.f90 index afa217d0e..4d9313b42 100644 --- a/src/appl/rmcdhf90_mpi/maneigmpi.f90 +++ b/src/appl/rmcdhf90_mpi/maneigmpi.f90 @@ -78,8 +78,12 @@ SUBROUTINE MANEIGmpi(dvdfirst, LPRINT, JBLOCK, & REAL(DOUBLE) :: PNWORK, CRITE, CRITC, CRITR, ORTHO, AMAX, WA, DNFAC REAL(DOUBLE), DIMENSION(:), POINTER :: WORK, DIAG, atmp LOGICAL :: HIEND -!----------------------------------------------- +!----------------------------------------------- +!CYC: Search the targeted eigenpairs one by one + INTEGER IRESTART_GDVD + IRESTART_GDVD = 0 +!----------------------------------------------- !PRINT *, 'maneig ...' ! ...spicmvmpi needs this COMMON /WCHBLK/JBLOCKK @@ -174,7 +178,8 @@ SUBROUTINE MANEIGmpi(dvdfirst, LPRINT, JBLOCK, & CALL ALLOC (IWORK, LIWORK, 'IWORK', 'MANEIGmpi') CALL ALLOC (JWORK, LIM, 'JWORK', 'MANEIGmpi') if (ncf.gt.1000) then - CALL GDVD (SPICMVMPI,NCF,LIM,DIAG,ILOW,IHIGH,JWORK,NIV,MBLOCK, & + !CALL GDVD (SPICMVMPI,NCF,LIM,DIAG,ILOW,IHIGH,JWORK,NIV,MBLOCK, & + CALL GDVD (SPICMVMPI,IRESTART_GDVD,NCF,LIM,DIAG,ILOW,IHIGH,JWORK,NIV,MBLOCK, & CRITE, CRITC, CRITR, ORTHO, MAXITR, WORK, LWORK, IWORK, LIWORK, & HIEND, NLOOPS, NMV, IERR) end if From ddad3949ad2d52ce2fa7c0025af4bb3b68ce0564 Mon Sep 17 00:00:00 2001 From: Chongyang Chen Date: Sat, 16 Jan 2021 10:12:34 +0800 Subject: [PATCH 7/9] Minor modification for searching the target eigenpairs one by one. --- src/appl/rci90/maneig.f90 | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/appl/rci90/maneig.f90 b/src/appl/rci90/maneig.f90 index 2a1ee57d6..743982e21 100644 --- a/src/appl/rci90/maneig.f90 +++ b/src/appl/rci90/maneig.f90 @@ -107,6 +107,10 @@ SUBROUTINE MANEIG(IATJPO, IASPAR) REAL(DOUBLE), DIMENSION(:), pointer :: w, z, work, diag INTEGER, DIMENSION(:), pointer :: iwork, ifail, jwork !----------------------------------------------------------------------- +!CYC: Search the targeted eigenpairs one by one + INTEGER IRESTART_GDVD + IRESTART_GDVD = 0 + ABSTOL = 2*DLAMCH('S') MYID = 0 NPROCS = 1 @@ -436,7 +440,7 @@ SUBROUTINE MANEIG(IATJPO, IASPAR) !NIV = 0 ! Why equal 0 ??? !WRITE (6, *) ' Calling gdvd(spodmv,...' - CALL GDVD (SPODMV, NCF, LIM, DIAG, ILOW, IHIGH, JWORK, NIV, & + CALL GDVD (SPODMV, IRESTART_GDVD, NCF, LIM, DIAG, ILOW, IHIGH, JWORK, NIV, & MBLOCK, CRITE, CRITC, CRITR, ORTHO, MAXITR, WORK, LWORK, & IWORK, LIWORK, HIEND, NLOOPS, NMV, IERR) From 892c594d598a72ed7000c0ee8934f89cdd6ace0c Mon Sep 17 00:00:00 2001 From: Chongyang Chen Date: Mon, 18 Jan 2021 17:59:18 +0800 Subject: [PATCH 8/9] Comment off the serial module rci90 and rmcdhf90, as dvdson is parallelized. --- .gitignore | 1 + src/appl/Makefile | 7 ++++--- src/appl/rci90/maneig.f90 | 4 ++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 69353dc26..1ee24f711 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *.o *.mod +*.swp lib/* bin/* diff --git a/src/appl/Makefile b/src/appl/Makefile index 019667fa7..991ceb147 100644 --- a/src/appl/Makefile +++ b/src/appl/Makefile @@ -1,7 +1,8 @@ -SUBDIR = HF jj2lsj90 jjgen90 rangular90 rbiotransform90 rci90 rcsfgenerate90 \ - rcsfinteract90 rcsfzerofirst90 rhfs90 rmcdhf90 rnucleus90 rtransition90 rwfnestimate90 sms90 \ +SUBDIR = HF jj2lsj90 jjgen90 rangular90 rbiotransform90 rcsfgenerate90 \ + rcsfinteract90 rcsfzerofirst90 rhfs90 rnucleus90 rtransition90 rwfnestimate90 sms90 \ rangular90_mpi rbiotransform90_mpi rci90_mpi rmcdhf90_mpi rtransition90_mpi - + # rci90 rmcdhf90 + TARGETS = install $(TARGETS): diff --git a/src/appl/rci90/maneig.f90 b/src/appl/rci90/maneig.f90 index 743982e21..e60b9bb0a 100644 --- a/src/appl/rci90/maneig.f90 +++ b/src/appl/rci90/maneig.f90 @@ -452,7 +452,7 @@ SUBROUTINE MANEIG(IATJPO, IASPAR) !WRITE (*, *) NCF, NIV, (WORK(I),I=NCF*NIV + 1,NCF*NIV + NIV) !WRITE (*, *) LIM, ILOW, IHIGH, MBLOCK, MAXITR, LWORK, LIWORK !WRITE (*, *) IERR - CALL GDVD (SPICMV2, NCF, LIM, DIAG, ILOW, IHIGH, JWORK, NIV, & + CALL GDVD (SPICMV2, IRESTART_GDVD, NCF, LIM, DIAG, ILOW, IHIGH, JWORK, NIV, & MBLOCK, CRITE, CRITC, CRITR, ORTHO, MAXITR, WORK, LWORK, & IWORK, LIWORK, HIEND, NLOOPS, NMV, IERR) !WRITE (*, *) 'after gdvd...' @@ -469,7 +469,7 @@ SUBROUTINE MANEIG(IATJPO, IASPAR) WRITE (6, *) ' Dense - Memory, iniestdm' ! CALL INIESTDM (1000,NCF,NIV,WORK,EMT) CALL INIESTDM (2000, NCF, NIV, WORK, EMT) - CALL GDVD (DNICMV, NCF, LIM, DIAG, ILOW, IHIGH, JWORK, NIV, & + CALL GDVD (DNICMV, IRESTART_GDVD, NCF, LIM, DIAG, ILOW, IHIGH, JWORK, NIV, & MBLOCK, CRITE, CRITC, CRITR, ORTHO, MAXITR, WORK, LWORK, & IWORK, LIWORK, HIEND, NLOOPS, NMV, IERR) CALL DALLOC (EMT, 'EMT', 'MANEIG') From 7e2a0abfeb319380c004bd4b74495a8375952bee Mon Sep 17 00:00:00 2001 From: Chongyang Chen Date: Wed, 20 Jan 2021 22:51:00 +0800 Subject: [PATCH 9/9] Modifications for Librangular code as suggested by G.G (https://github.com/CYChenFudan/grasp/commit/76eb371dbf4c3261c95f10243de024ed4cb7ff24#commitcomment-46147246) --- src/lib/librang90/el1.f90 | 7 ++++--- src/lib/librang90/reco.f90 | 3 ++- src/lib/librang90/rkco_gg.f90 | 10 +++++++--- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/lib/librang90/el1.f90 b/src/lib/librang90/el1.f90 index 39fb09c49..60827a657 100644 --- a/src/lib/librang90/el1.f90 +++ b/src/lib/librang90/el1.f90 @@ -13,7 +13,7 @@ SUBROUTINE EL1(JJA,JJB,JA,JB,IIRE,ICOLBREI) ! * ! Written by G. Gaigalas * ! Transform to fortran 90/95 by G. Gaigalas December 2012 * -! The last modification made by G. Gaigalas October 2017 * +! The last modification made by G. Gaigalas October 2020 * ! * !******************************************************************* ! @@ -64,13 +64,14 @@ SUBROUTINE EL1(JJA,JJB,JA,JB,IIRE,ICOLBREI) !----------------------------------------------- IF(JA /= JB)GO TO 9 ! - IF(JJA /= JJB) RETURN +!GG IF(JJA /= JJB) RETURN ! ! THE CASE 1111 + + - - ! IF(IIRE /= 0) THEN CALL RECO(JA,JA,JA,JA,0,IAT) - IF(IAT /= 0)RETURN +!GG IF(IAT /= 0)RETURN + IF(IAT == 0)RETURN END IF CALL PERKO2(JA,JA,JA,JA,1) QM1=HALF diff --git a/src/lib/librang90/reco.f90 b/src/lib/librang90/reco.f90 index 3c2304641..207c1ce30 100644 --- a/src/lib/librang90/reco.f90 +++ b/src/lib/librang90/reco.f90 @@ -8,7 +8,7 @@ SUBROUTINE RECO(JA1,JA2,JA3,JA4,KA,IAT) ! * ! Written by G. Gaigalas * ! Transform to fortran 90/95 by G. Gaigalas December 2012 * -! The last modification made by G. Gaigalas October 2017 * +! The last modification made by G. Gaigalas October 2020 * ! * !******************************************************************* ! @@ -48,6 +48,7 @@ SUBROUTINE RECO(JA1,JA2,JA3,JA4,KA,IAT) IF(I == JA2) CYCLE END IF DO J=1,3 + IF(JA1 == JA2 .AND. I == JA1 .AND. J == 1) CYCLE IF(JJQ1(J,IJ) /= JJQ2(J,IJ))IAT=0 END DO END DO diff --git a/src/lib/librang90/rkco_gg.f90 b/src/lib/librang90/rkco_gg.f90 index 29c3000cf..43caaeaac 100644 --- a/src/lib/librang90/rkco_gg.f90 +++ b/src/lib/librang90/rkco_gg.f90 @@ -15,7 +15,7 @@ SUBROUTINE RKCO_GG (JA,JB,CORD,INCOR,ICOLBREI) ! * ! Rewrite by G. Gaigalas * ! Transform to fortran 90/95 by G. Gaigalas December 2012 * -! The last modification made by G. Gaigalas October 2017 * +! The last modification made by G. Gaigalas October 2020 * ! * !*********************************************************************** ! @@ -330,8 +330,12 @@ SUBROUTINE RKCO_GG (JA,JB,CORD,INCOR,ICOLBREI) IF (NQ1(K1) .LE. 1) CYCLE END IF JA2 = JA1 - IF(JA.NE.JB) THEN - IF(IDQG.NE.2) THEN + IF(JA /= JB) THEN + IF(NPEEL == 1 .AND. NQ1(K1) == NQ2(K1)) THEN +! +! TARP TU PACIU BUSENU + CALL EL1(JA,JB,JA1,JB1,1,ICOLBREI) + ELSE IF(IDQG.NE.2) THEN WRITE(99,996) WRITE(99,*)"JA,JB,JA1,JB1",JA,JB,JA1,JB1 WRITE(99,*)"IDQG IDQ",IDQG,IDQ