Commit 093d8b81 authored by Victor Yu's avatar Victor Yu
Browse files

Compute "+V" in frozen core more efficiently

parent 70a7e36b
......@@ -7,7 +7,7 @@ SET(elsi_URL "http://elsi-interchange.org")
SET(elsi_EMAIL "elsi-team@duke.edu")
SET(elsi_LICENSE "BSD 3")
SET(elsi_DESCRIPTION "Electronic Structure Infrastructure")
SET(elsi_DATESTAMP "20210124")
SET(elsi_DATESTAMP "20210222")
### CMake modules ###
LIST(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
......
......@@ -595,28 +595,34 @@ subroutine elsi_do_fc_elpa_real(ph,bh,ham,ovlp,evec,perm,ham_v,ovlp_v,evec_v)
bh%desc,ham_v,1,1,ph%desc_v,bh%blacs_ctxt)
! Compute H_vv
call pdgemm("N","N",ph%n_basis_v,ph%n_basis_c,ph%n_basis_c,1.0_r8,ovlp,&
ph%n_basis_c+1,1,bh%desc,ham,1,1,bh%desc,0.0_r8,evec,1,1,bh%desc)
if(ph%fc_method == FC_PLUS_V) then
! H_vv = H_vv + S_vc * H_cc * S_cv - H_vc * S_cv - S_vc * H_cv
! More accurate than H_vv = H_vv - S_vc * H_cc * S_cv
call pdgemm("N","N",ph%n_basis_v,ph%n_basis_v,ph%n_basis_c,1.0_r8,evec,1,&
1,bh%desc,ovlp,1,ph%n_basis_c+1,bh%desc,1.0_r8,ham_v,1,1,ph%desc_v)
! H_vv = A + A^*
! A = (0.5*S_vc * H_cc - H_vc) * S_cv
call pdgemm("N","N",ph%n_basis_v,ph%n_basis_c,ph%n_basis_c,0.5_r8,ovlp,&
ph%n_basis_c+1,1,bh%desc,ham,1,1,bh%desc,0.0_r8,evec,ph%n_basis_c+1,&
1,bh%desc)
call pdgemm("N","N",ph%n_basis_v,ph%n_basis_v,ph%n_basis_c,1.0_r8,ham,&
ph%n_basis_c+1,1,bh%desc,ovlp,1,ph%n_basis_c+1,bh%desc,0.0_r8,&
evec_v,1,1,ph%desc_v)
evec(:,:) = evec-ham
ham_v(:,:) = ham_v-evec_v
call pdgemm("N","N",ph%n_basis_v,ph%n_basis_v,ph%n_basis_c,1.0_r8,evec,&
ph%n_basis_c+1,1,bh%desc,ovlp,1,ph%n_basis_c+1,bh%desc,0.0_r8,ham_v,&
1,1,ph%desc_v)
call pdtran(ph%n_basis_v,ph%n_basis_v,-1.0_r8,evec_v,1,1,ph%desc_v,&
1.0_r8,ham_v,1,1,ph%desc_v)
call pdtran(ph%n_basis_v,ph%n_basis_v,1.0_r8,ham_v,1,1,ph%desc_v,0.0_r8,&
evec_v,1,1,ph%desc_v)
ham_v(:,:) = ham_v+evec_v
else
! H_vv = H_vv - S_vc * H_cc * S_cv
call pdgemm("N","N",ph%n_basis_v,ph%n_basis_c,ph%n_basis_c,1.0_r8,ovlp,&
ph%n_basis_c+1,1,bh%desc,ham,1,1,bh%desc,0.0_r8,evec,ph%n_basis_c+1,&
1,bh%desc)
call pdgemm("N","N",ph%n_basis_v,ph%n_basis_v,ph%n_basis_c,-1.0_r8,evec,&
1,1,bh%desc,ovlp,1,ph%n_basis_c+1,bh%desc,1.0_r8,ham_v,1,1,&
ph%desc_v)
ph%n_basis_c+1,1,bh%desc,ovlp,1,ph%n_basis_c+1,bh%desc,1.0_r8,ham_v,&
1,1,ph%desc_v)
end if
! Switch to valence dimension
......@@ -1248,30 +1254,34 @@ subroutine elsi_do_fc_elpa_cmplx(ph,bh,ham,ovlp,evec,perm,ham_v,ovlp_v,evec_v)
bh%desc,ham_v,1,1,ph%desc_v,bh%blacs_ctxt)
! Compute H_vv
call pzgemm("N","N",ph%n_basis_v,ph%n_basis_c,ph%n_basis_c,(1.0_r8,0.0_r8),&
ovlp,ph%n_basis_c+1,1,bh%desc,ham,1,1,bh%desc,(0.0_r8,0.0_r8),evec,1,1,&
bh%desc)
if(ph%fc_method == FC_PLUS_V) then
! H_vv = H_vv + S_vc * H_cc * S_cv - H_vc * S_cv - S_vc * H_cv
! More accurate than H_vv = H_vv - S_vc * H_cc * S_cv
call pzgemm("N","N",ph%n_basis_v,ph%n_basis_v,ph%n_basis_c,&
(1.0_r8,0.0_r8),evec,1,1,bh%desc,ovlp,1,ph%n_basis_c+1,bh%desc,&
(1.0_r8,0.0_r8),ham_v,1,1,ph%desc_v)
! H_vv = A + A^*
! A = (0.5*S_vc * H_cc - H_vc) * S_cv
call pzgemm("N","N",ph%n_basis_v,ph%n_basis_c,ph%n_basis_c,&
(0.5_r8,0.0_r8),ovlp,ph%n_basis_c+1,1,bh%desc,ham,1,1,bh%desc,&
(0.0_r8,0.0_r8),evec,ph%n_basis_c+1,1,bh%desc)
evec(:,:) = evec-ham
call pzgemm("N","N",ph%n_basis_v,ph%n_basis_v,ph%n_basis_c,&
(1.0_r8,0.0_r8),ham,ph%n_basis_c+1,1,bh%desc,ovlp,1,ph%n_basis_c+1,&
bh%desc,(0.0_r8,0.0_r8),evec_v,1,1,ph%desc_v)
(1.0_r8,0.0_r8),evec,ph%n_basis_c+1,1,bh%desc,ovlp,1,ph%n_basis_c+1,&
bh%desc,(0.0_r8,0.0_r8),ham_v,1,1,ph%desc_v)
ham_v(:,:) = ham_v-evec_v
call pztranc(ph%n_basis_v,ph%n_basis_v,(1.0_r8,0.0_r8),ham_v,1,1,&
ph%desc_v,(0.0_r8,0.0_r8),evec_v,1,1,ph%desc_v)
call pztranc(ph%n_basis_v,ph%n_basis_v,(-1.0_r8,0.0_r8),evec_v,1,1,&
ph%desc_v,(1.0_r8,0.0_r8),ham_v,1,1,ph%desc_v)
ham_v(:,:) = ham_v+evec_v
else
! H_vv = H_vv - S_vc * H_cc * S_cv
call pzgemm("N","N",ph%n_basis_v,ph%n_basis_c,ph%n_basis_c,&
(1.0_r8,0.0_r8),ovlp,ph%n_basis_c+1,1,bh%desc,ham,1,1,bh%desc,&
(0.0_r8,0.0_r8),evec,ph%n_basis_c+1,1,bh%desc)
call pzgemm("N","N",ph%n_basis_v,ph%n_basis_v,ph%n_basis_c,&
(-1.0_r8,0.0_r8),evec,1,1,bh%desc,ovlp,1,ph%n_basis_c+1,bh%desc,&
(1.0_r8,0.0_r8),ham_v,1,1,ph%desc_v)
(-1.0_r8,0.0_r8),evec,ph%n_basis_c+1,1,bh%desc,ovlp,1,&
ph%n_basis_c+1,bh%desc,(1.0_r8,0.0_r8),ham_v,1,1,ph%desc_v)
end if
! Switch to valence dimension
......
......@@ -471,29 +471,33 @@ subroutine elsi_do_fc_lapack_real(ph,ham,ovlp,evec,perm)
end if
! Compute H_vv
call dgemm("N","N",ph%n_basis_v,ph%n_basis_c,ph%n_basis_c,1.0_r8,&
ovlp(ph%n_basis_c+1,1),ph%n_basis,ham,ph%n_basis,0.0_r8,evec,ph%n_basis)
if(ph%fc_method == FC_PLUS_V) then
! H_vv = H_vv + S_vc * H_cc * S_cv - H_vc * S_cv - S_vc * H_cv
! More accurate than H_vv = H_vv - S_vc * H_cc * S_cv
call dgemm("N","N",ph%n_basis_v,ph%n_basis_v,ph%n_basis_c,1.0_r8,evec,&
ph%n_basis,ovlp(1,ph%n_basis_c+1),ph%n_basis,1.0_r8,&
ham(ph%n_basis_c+1,ph%n_basis_c+1),ph%n_basis)
! H_vv = A + A^*
! A = (0.5*S_vc * H_cc - H_vc) * S_cv
call dgemm("N","N",ph%n_basis_v,ph%n_basis_c,ph%n_basis_c,0.5_r8,&
ovlp(ph%n_basis_c+1,1),ph%n_basis,ham,ph%n_basis,0.0_r8,&
evec(ph%n_basis_c+1,1),ph%n_basis)
evec(:,:) = evec-ham
call dgemm("N","N",ph%n_basis_v,ph%n_basis_v,ph%n_basis_c,1.0_r8,&
ham(ph%n_basis_c+1,1),ph%n_basis,ovlp(1,ph%n_basis_c+1),ph%n_basis,&
0.0_r8,evec(ph%n_basis_c+1,ph%n_basis_c+1),ph%n_basis)
evec(ph%n_basis_c+1,1),ph%n_basis,ovlp(1,ph%n_basis_c+1),ph%n_basis,&
0.0_r8,ham(ph%n_basis_c+1,ph%n_basis_c+1),ph%n_basis)
ham(ph%n_basis_c+1:ph%n_basis,ph%n_basis_c+1:ph%n_basis) =&
ham(ph%n_basis_c+1:ph%n_basis,ph%n_basis_c+1:ph%n_basis)&
-evec(ph%n_basis_c+1:ph%n_basis,ph%n_basis_c+1:ph%n_basis)&
-transpose(evec(ph%n_basis_c+1:ph%n_basis,ph%n_basis_c+1:ph%n_basis))
+transpose(ham(ph%n_basis_c+1:ph%n_basis,ph%n_basis_c+1:ph%n_basis))
else
! H_vv = H_vv - S_vc * H_cc * S_cv
call dgemm("N","N",ph%n_basis_v,ph%n_basis_v,ph%n_basis_c,-1.0_r8,evec,&
ph%n_basis,ovlp(1,ph%n_basis_c+1),ph%n_basis,1.0_r8,&
ham(ph%n_basis_c+1,ph%n_basis_c+1),ph%n_basis)
call dgemm("N","N",ph%n_basis_v,ph%n_basis_c,ph%n_basis_c,1.0_r8,&
ovlp(ph%n_basis_c+1,1),ph%n_basis,ham,ph%n_basis,0.0_r8,&
evec(ph%n_basis_c+1,1),ph%n_basis)
call dgemm("N","N",ph%n_basis_v,ph%n_basis_v,ph%n_basis_c,-1.0_r8,&
evec(ph%n_basis_c+1,1),ph%n_basis,ovlp(1,ph%n_basis_c+1),ph%n_basis,&
1.0_r8,ham(ph%n_basis_c+1,ph%n_basis_c+1),ph%n_basis)
end if
! Switch to valence dimension
......@@ -998,32 +1002,36 @@ subroutine elsi_do_fc_lapack_cmplx(ph,ham,ovlp,evec,perm)
end if
! Compute H_vv
call zgemm("N","N",ph%n_basis_v,ph%n_basis_c,ph%n_basis_c,(1.0_r8,0.0_r8),&
ovlp(ph%n_basis_c+1,1),ph%n_basis,ham,ph%n_basis,(0.0_r8,0.0_r8),evec,&
ph%n_basis)
if(ph%fc_method == FC_PLUS_V) then
! H_vv = H_vv + S_vc * H_cc * S_cv - H_vc * S_cv - S_vc * H_cv
! More accurate than H_vv = H_vv - S_vc * H_cc * S_cv
call zgemm("N","N",ph%n_basis_v,ph%n_basis_v,ph%n_basis_c,&
(1.0_r8,0.0_r8),evec,ph%n_basis,ovlp(1,ph%n_basis_c+1),ph%n_basis,&
(1.0_r8,0.0_r8),ham(ph%n_basis_c+1,ph%n_basis_c+1),ph%n_basis)
! H_vv = A + A^*
! A = (0.5*S_vc * H_cc - H_vc) * S_cv
call zgemm("N","N",ph%n_basis_v,ph%n_basis_c,ph%n_basis_c,&
(0.5_r8,0.0_r8),ovlp(ph%n_basis_c+1,1),ph%n_basis,ham,ph%n_basis,&
(0.0_r8,0.0_r8),evec(ph%n_basis_c+1,1),ph%n_basis)
evec(:,:) = evec-ham
call zgemm("N","N",ph%n_basis_v,ph%n_basis_v,ph%n_basis_c,&
(1.0_r8,0.0_r8),ham(ph%n_basis_c+1,1),ph%n_basis,&
(1.0_r8,0.0_r8),evec(ph%n_basis_c+1,1),ph%n_basis,&
ovlp(1,ph%n_basis_c+1),ph%n_basis,(0.0_r8,0.0_r8),&
evec(ph%n_basis_c+1,ph%n_basis_c+1),ph%n_basis)
ham(ph%n_basis_c+1,ph%n_basis_c+1),ph%n_basis)
ham(ph%n_basis_c+1:ph%n_basis,ph%n_basis_c+1:ph%n_basis) =&
ham(ph%n_basis_c+1:ph%n_basis,ph%n_basis_c+1:ph%n_basis)&
-evec(ph%n_basis_c+1:ph%n_basis,ph%n_basis_c+1:ph%n_basis)&
-conjg(transpose(evec(ph%n_basis_c+1:ph%n_basis,&
+conjg(transpose(ham(ph%n_basis_c+1:ph%n_basis,&
ph%n_basis_c+1:ph%n_basis)))
else
! H_vv = H_vv - S_vc * H_cc * S_cv
call zgemm("N","N",ph%n_basis_v,ph%n_basis_c,ph%n_basis_c,&
(1.0_r8,0.0_r8),ovlp(ph%n_basis_c+1,1),ph%n_basis,ham,ph%n_basis,&
(0.0_r8,0.0_r8),evec(ph%n_basis_c+1,1),ph%n_basis)
call zgemm("N","N",ph%n_basis_v,ph%n_basis_v,ph%n_basis_c,&
(-1.0_r8,0.0_r8),evec,ph%n_basis,ovlp(1,ph%n_basis_c+1),ph%n_basis,&
(1.0_r8,0.0_r8),ham(ph%n_basis_c+1,ph%n_basis_c+1),ph%n_basis)
(-1.0_r8,0.0_r8),evec(ph%n_basis_c+1,1),ph%n_basis,&
ovlp(1,ph%n_basis_c+1),ph%n_basis,(1.0_r8,0.0_r8),&
ham(ph%n_basis_c+1,ph%n_basis_c+1),ph%n_basis)
end if
! Switch to valence dimension
......
......@@ -856,7 +856,8 @@ subroutine elsi_build_dm_edm_real(ph,bh,factor,evec,dm,which)
! ELPA routine only faster on GPUs
if(ierr /= 0 .or. ph%elpa_gpu == 0 .or. ph%solver /= ELPA_SOLVER&
.or. bh%blk*(max(bh%n_prow,bh%n_pcol)-1) >= max_state) then
.or. bh%blk*(max(bh%n_prow,bh%n_pcol)-1) >= max_state&
.or. ph%n_basis_c > 0) then
use_elpa_mult = .false.
else
use_elpa_mult = .true.
......@@ -966,7 +967,8 @@ subroutine elsi_build_dm_edm_cmplx(ph,bh,factor,evec,dm,which)
! ELPA routine only faster on GPUs
if(ierr /= 0 .or. ph%elpa_gpu == 0 .or. ph%solver /= ELPA_SOLVER&
.or. bh%blk*(max(bh%n_prow,bh%n_pcol)-1) >= max_state) then
.or. bh%blk*(max(bh%n_prow,bh%n_pcol)-1) >= max_state&
.or. ph%n_basis_c > 0) then
use_elpa_mult = .false.
else
use_elpa_mult = .true.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment