Commit bfac082c authored by Victor Yu's avatar Victor Yu

Update Cholesky-based density matrix extrapolation

Number of matrix multiplication is minimized.
parent c3464339
......@@ -9,7 +9,7 @@
!!
module ELSI_ELPA
use ELSI_CONSTANTS, only: UT_MAT,UNSET
use ELSI_CONSTANTS, only: LT_MAT,UT_MAT,UNSET
use ELSI_DATATYPE, only: elsi_param_t,elsi_basic_t
use ELSI_IO, only: elsi_say,elsi_get_time
use ELSI_MALLOC, only: elsi_allocate,elsi_deallocate
......@@ -376,12 +376,14 @@ end subroutine
!! This routine extrapolates density matrix using Cholesky decomposition of the
!! old and new overlap matrices.
!!
subroutine elsi_update_dm_elpa_real(ph,bh,ovlp0,ovlp1,dm)
subroutine elsi_update_dm_elpa_real(ph,bh,row_map,col_map,ovlp0,ovlp1,dm)
implicit none
type(elsi_param_t), intent(in) :: ph
type(elsi_basic_t), intent(in) :: bh
integer(kind=i4), intent(in) :: row_map(ph%n_basis)
integer(kind=i4), intent(in) :: col_map(ph%n_basis)
real(kind=r8), intent(inout) :: ovlp0(bh%n_lrow,bh%n_lcol)
real(kind=r8), intent(inout) :: ovlp1(bh%n_lrow,bh%n_lcol)
real(kind=r8), intent(inout) :: dm(bh%n_lrow,bh%n_lcol)
......@@ -402,45 +404,31 @@ subroutine elsi_update_dm_elpa_real(ph,bh,ovlp0,ovlp1,dm)
! S_1 = U_1^(-1)
call elsi_elpa_invert(ph,bh,ovlp1)
call elsi_allocate(bh,tmp,bh%n_lrow,bh%n_lcol,"tmp",caller)
! tmp = U_1^(-T)
call pdtran(ph%n_basis,ph%n_basis,1.0_r8,ovlp1,1,1,bh%desc,0.0_r8,tmp,1,1,&
bh%desc)
! S_1 = U_1^(-T)
ovlp1 = tmp
! S_0 = U_0
call elsi_elpa_invert(ph,bh,ovlp0)
! tmp = U_0^T
call pdtran(ph%n_basis,ph%n_basis,1.0_r8,ovlp0,1,1,bh%desc,0.0_r8,tmp,1,1,&
bh%desc)
call elsi_allocate(bh,tmp,bh%n_lrow,bh%n_lcol,"tmp",caller)
! S_0 = U_0^T
ovlp0 = tmp
! tmp = U_1^(-1) U_0
call elsi_elpa_multiply(ph,bh,"U","U",ph%n_basis,ovlp1,ovlp0,tmp)
! tmp = U_0 P_0
call elsi_elpa_multiply(ph,bh,"L","N",ph%n_basis,ovlp0,dm,tmp)
! ovlp0 = U_0^T U_1^(-T)
call pdtran(ph%n_basis,ph%n_basis,1.0_r8,tmp,1,1,bh%desc,0.0_r8,ovlp0,1,1,&
bh%desc)
! dm = U_1^(-1) U_0 P_0
call elsi_elpa_multiply(ph,bh,"L","N",ph%n_basis,ovlp1,tmp,dm)
! tmp = U_1^(-1) U_0 P_0
call elsi_elpa_multiply(ph,bh,"L","U",ph%n_basis,ovlp0,dm,tmp)
! tmp = U_1^(-1) U_0 P_0 U_0^T
call pdgemm("N","N",ph%n_basis,ph%n_basis,ph%n_basis,1.0_r8,dm,1,1,bh%desc,&
ovlp0,1,1,bh%desc,0.0_r8,tmp,1,1,bh%desc)
! dm = P_0 U_0^T U_1^(-T)
call pdtran(ph%n_basis,ph%n_basis,1.0_r8,tmp,1,1,bh%desc,0.0_r8,dm,1,1,&
bh%desc)
! dm = U_1^(-1) U_0 P_0 U_0^T U_1^(-T)
call pdgemm("N","N",ph%n_basis,ph%n_basis,ph%n_basis,1.0_r8,tmp,1,1,bh%desc,&
ovlp1,1,1,bh%desc,0.0_r8,dm,1,1,bh%desc)
! tmp = U_1^(-1) U_0 P_0 U_0^T U_1^(-T)
call elsi_elpa_multiply(ph,bh,"L","L",ph%n_basis,ovlp0,dm,tmp)
! tmp = U_1^(-1)
call pdtran(ph%n_basis,ph%n_basis,1.0_r8,ovlp0,1,1,bh%desc,0.0_r8,tmp,1,1,&
bh%desc)
call elsi_set_full_mat(ph,bh,LT_MAT,row_map,col_map,tmp)
! S_1 = U_1^(-1)
ovlp1 = tmp
dm = tmp
call elsi_deallocate(bh,tmp,"tmp")
......@@ -727,12 +715,14 @@ end subroutine
!! This routine extrapolates density matrix using Cholesky decomposition of the
!! old and new overlap matrices.
!!
subroutine elsi_update_dm_elpa_cmplx(ph,bh,ovlp0,ovlp1,dm)
subroutine elsi_update_dm_elpa_cmplx(ph,bh,row_map,col_map,ovlp0,ovlp1,dm)
implicit none
type(elsi_param_t), intent(in) :: ph
type(elsi_basic_t), intent(in) :: bh
integer(kind=i4), intent(in) :: row_map(ph%n_basis)
integer(kind=i4), intent(in) :: col_map(ph%n_basis)
complex(kind=r8), intent(inout) :: ovlp0(bh%n_lrow,bh%n_lcol)
complex(kind=r8), intent(inout) :: ovlp1(bh%n_lrow,bh%n_lcol)
complex(kind=r8), intent(inout) :: dm(bh%n_lrow,bh%n_lcol)
......@@ -753,45 +743,31 @@ subroutine elsi_update_dm_elpa_cmplx(ph,bh,ovlp0,ovlp1,dm)
! S_1 = U_1^(-1)
call elsi_elpa_invert(ph,bh,ovlp1)
call elsi_allocate(bh,tmp,bh%n_lrow,bh%n_lcol,"tmp",caller)
! tmp = U_1^(-T)
call pztranc(ph%n_basis,ph%n_basis,(1.0_r8,0.0_r8),ovlp1,1,1,bh%desc,&
(0.0_r8,0.0_r8),tmp,1,1,bh%desc)
! S_1 = U_1^(-T)
ovlp1 = tmp
! S_0 = U_0
call elsi_elpa_invert(ph,bh,ovlp0)
! tmp = U_0^T
call pztranc(ph%n_basis,ph%n_basis,(1.0_r8,0.0_r8),ovlp0,1,1,bh%desc,&
(0.0_r8,0.0_r8),tmp,1,1,bh%desc)
call elsi_allocate(bh,tmp,bh%n_lrow,bh%n_lcol,"tmp",caller)
! S_0 = U_0^T
ovlp0 = tmp
! tmp = U_1^(-1) U_0
call elsi_elpa_multiply(ph,bh,"U","U",ph%n_basis,ovlp1,ovlp0,tmp)
! tmp = U_0 P_0
call elsi_elpa_multiply(ph,bh,"L","N",ph%n_basis,ovlp0,dm,tmp)
! ovlp0 = U_0^T U_1^(-T)
call pztranc(ph%n_basis,ph%n_basis,(1.0_r8,0.0_r8),tmp,1,1,bh%desc,&
(0.0_r8,0.0_r8),ovlp0,1,1,bh%desc)
! dm = U_1^(-1) U_0 P_0
call elsi_elpa_multiply(ph,bh,"L","N",ph%n_basis,ovlp1,tmp,dm)
! tmp = U_1^(-1) U_0 P_0
call elsi_elpa_multiply(ph,bh,"L","U",ph%n_basis,ovlp0,dm,tmp)
! tmp = U_1^(-1) U_0 P_0 U_0^T
call pzgemm("N","N",ph%n_basis,ph%n_basis,ph%n_basis,(1.0_r8,0.0_r8),dm,1,1,&
bh%desc,ovlp0,1,1,bh%desc,(0.0_r8,0.0_r8),tmp,1,1,bh%desc)
! dm = P_0 U_0^T U_1^(-T)
call pztranc(ph%n_basis,ph%n_basis,(1.0_r8,0.0_r8),tmp,1,1,bh%desc,&
(0.0_r8,0.0_r8),dm,1,1,bh%desc)
! dm = U_1^(-1) U_0 P_0 U_0^T U_1^(-T)
call pzgemm("N","N",ph%n_basis,ph%n_basis,ph%n_basis,(1.0_r8,0.0_r8),tmp,1,&
1,bh%desc,ovlp1,1,1,bh%desc,(0.0_r8,0.0_r8),dm,1,1,bh%desc)
! tmp = U_1^(-1) U_0 P_0 U_0^T U_1^(-T)
call elsi_elpa_multiply(ph,bh,"L","L",ph%n_basis,ovlp0,dm,tmp)
! tmp = U_1^(-1)
call pztranc(ph%n_basis,ph%n_basis,(1.0_r8,0.0_r8),ovlp0,1,1,bh%desc,&
(0.0_r8,0.0_r8),tmp,1,1,bh%desc)
call elsi_set_full_mat(ph,bh,LT_MAT,row_map,col_map,tmp)
! S_1 = U_1^(-1)
ovlp1 = tmp
dm = tmp
call elsi_deallocate(bh,tmp,"tmp")
......
......@@ -9,7 +9,7 @@
!!
module ELSI_ELPA
use ELSI_CONSTANTS, only: UT_MAT,UNSET
use ELSI_CONSTANTS, only: LT_MAT,UT_MAT,UNSET
use ELSI_DATATYPE, only: elsi_param_t,elsi_basic_t
use ELSI_IO, only: elsi_say,elsi_get_time
use ELSI_MALLOC, only: elsi_allocate,elsi_deallocate
......@@ -391,12 +391,14 @@ end subroutine
!! This routine extrapolates density matrix using Cholesky decomposition of the
!! old and new overlap matrices.
!!
subroutine elsi_update_dm_elpa_real(ph,bh,ovlp0,ovlp1,dm)
subroutine elsi_update_dm_elpa_real(ph,bh,row_map,col_map,ovlp0,ovlp1,dm)
implicit none
type(elsi_param_t), intent(in) :: ph
type(elsi_basic_t), intent(in) :: bh
integer(kind=i4), intent(in) :: row_map(ph%n_basis)
integer(kind=i4), intent(in) :: col_map(ph%n_basis)
real(kind=r8), intent(inout) :: ovlp0(bh%n_lrow,bh%n_lcol)
real(kind=r8), intent(inout) :: ovlp1(bh%n_lrow,bh%n_lcol)
real(kind=r8), intent(inout) :: dm(bh%n_lrow,bh%n_lcol)
......@@ -417,45 +419,31 @@ subroutine elsi_update_dm_elpa_real(ph,bh,ovlp0,ovlp1,dm)
! S_1 = U_1^(-1)
call elsi_elpa_invert(ph,bh,ovlp1)
call elsi_allocate(bh,tmp,bh%n_lrow,bh%n_lcol,"tmp",caller)
! tmp = U_1^(-T)
call pdtran(ph%n_basis,ph%n_basis,1.0_r8,ovlp1,1,1,bh%desc,0.0_r8,tmp,1,1,&
bh%desc)
! S_1 = U_1^(-T)
ovlp1 = tmp
! S_0 = U_0
call elsi_elpa_invert(ph,bh,ovlp0)
! tmp = U_0^T
call pdtran(ph%n_basis,ph%n_basis,1.0_r8,ovlp0,1,1,bh%desc,0.0_r8,tmp,1,1,&
bh%desc)
call elsi_allocate(bh,tmp,bh%n_lrow,bh%n_lcol,"tmp",caller)
! S_0 = U_0^T
ovlp0 = tmp
! tmp = U_1^(-1) U_0
call elsi_elpa_multiply(ph,bh,"U","U",ph%n_basis,ovlp1,ovlp0,tmp)
! tmp = U_0 P_0
call elsi_elpa_multiply(ph,bh,"L","N",ph%n_basis,ovlp0,dm,tmp)
! ovlp0 = U_0^T U_1^(-T)
call pdtran(ph%n_basis,ph%n_basis,1.0_r8,tmp,1,1,bh%desc,0.0_r8,ovlp0,1,1,&
bh%desc)
! dm = U_1^(-1) U_0 P_0
call elsi_elpa_multiply(ph,bh,"L","N",ph%n_basis,ovlp1,tmp,dm)
! tmp = U_1^(-1) U_0 P_0
call elsi_elpa_multiply(ph,bh,"L","U",ph%n_basis,ovlp0,dm,tmp)
! tmp = U_1^(-1) U_0 P_0 U_0^T
call pdgemm("N","N",ph%n_basis,ph%n_basis,ph%n_basis,1.0_r8,dm,1,1,bh%desc,&
ovlp0,1,1,bh%desc,0.0_r8,tmp,1,1,bh%desc)
! dm = P_0 U_0^T U_1^(-T)
call pdtran(ph%n_basis,ph%n_basis,1.0_r8,tmp,1,1,bh%desc,0.0_r8,dm,1,1,&
bh%desc)
! dm = U_1^(-1) U_0 P_0 U_0^T U_1^(-T)
call pdgemm("N","N",ph%n_basis,ph%n_basis,ph%n_basis,1.0_r8,tmp,1,1,bh%desc,&
ovlp1,1,1,bh%desc,0.0_r8,dm,1,1,bh%desc)
! tmp = U_1^(-1) U_0 P_0 U_0^T U_1^(-T)
call elsi_elpa_multiply(ph,bh,"L","L",ph%n_basis,ovlp0,dm,tmp)
! tmp = U_1^(-1)
call pdtran(ph%n_basis,ph%n_basis,1.0_r8,ovlp0,1,1,bh%desc,0.0_r8,tmp,1,1,&
bh%desc)
call elsi_set_full_mat(ph,bh,LT_MAT,row_map,col_map,tmp)
! S_1 = U_1^(-1)
ovlp1 = tmp
dm = tmp
call elsi_deallocate(bh,tmp,"tmp")
......@@ -744,12 +732,14 @@ end subroutine
!! This routine extrapolates density matrix using Cholesky decomposition of the
!! old and new overlap matrices.
!!
subroutine elsi_update_dm_elpa_cmplx(ph,bh,ovlp0,ovlp1,dm)
subroutine elsi_update_dm_elpa_cmplx(ph,bh,row_map,col_map,ovlp0,ovlp1,dm)
implicit none
type(elsi_param_t), intent(in) :: ph
type(elsi_basic_t), intent(in) :: bh
integer(kind=i4), intent(in) :: row_map(ph%n_basis)
integer(kind=i4), intent(in) :: col_map(ph%n_basis)
complex(kind=r8), intent(inout) :: ovlp0(bh%n_lrow,bh%n_lcol)
complex(kind=r8), intent(inout) :: ovlp1(bh%n_lrow,bh%n_lcol)
complex(kind=r8), intent(inout) :: dm(bh%n_lrow,bh%n_lcol)
......@@ -770,45 +760,31 @@ subroutine elsi_update_dm_elpa_cmplx(ph,bh,ovlp0,ovlp1,dm)
! S_1 = U_1^(-1)
call elsi_elpa_invert(ph,bh,ovlp1)
call elsi_allocate(bh,tmp,bh%n_lrow,bh%n_lcol,"tmp",caller)
! tmp = U_1^(-T)
call pztranc(ph%n_basis,ph%n_basis,(1.0_r8,0.0_r8),ovlp1,1,1,bh%desc,&
(0.0_r8,0.0_r8),tmp,1,1,bh%desc)
! S_1 = U_1^(-T)
ovlp1 = tmp
! S_0 = U_0
call elsi_elpa_invert(ph,bh,ovlp0)
! tmp = U_0^T
call pztranc(ph%n_basis,ph%n_basis,(1.0_r8,0.0_r8),ovlp0,1,1,bh%desc,&
(0.0_r8,0.0_r8),tmp,1,1,bh%desc)
call elsi_allocate(bh,tmp,bh%n_lrow,bh%n_lcol,"tmp",caller)
! S_0 = U_0^T
ovlp0 = tmp
! tmp = U_1^(-1) U_0
call elsi_elpa_multiply(ph,bh,"U","U",ph%n_basis,ovlp1,ovlp0,tmp)
! tmp = U_0 P_0
call elsi_elpa_multiply(ph,bh,"L","N",ph%n_basis,ovlp0,dm,tmp)
! ovlp0 = U_0^T U_1^(-T)
call pztranc(ph%n_basis,ph%n_basis,(1.0_r8,0.0_r8),tmp,1,1,bh%desc,&
(0.0_r8,0.0_r8),ovlp0,1,1,bh%desc)
! dm = U_1^(-1) U_0 P_0
call elsi_elpa_multiply(ph,bh,"L","N",ph%n_basis,ovlp1,tmp,dm)
! tmp = U_1^(-1) U_0 P_0
call elsi_elpa_multiply(ph,bh,"L","U",ph%n_basis,ovlp0,dm,tmp)
! tmp = U_1^(-1) U_0 P_0 U_0^T
call pzgemm("N","N",ph%n_basis,ph%n_basis,ph%n_basis,(1.0_r8,0.0_r8),dm,1,1,&
bh%desc,ovlp0,1,1,bh%desc,(0.0_r8,0.0_r8),tmp,1,1,bh%desc)
! dm = P_0 U_0^T U_1^(-T)
call pztranc(ph%n_basis,ph%n_basis,(1.0_r8,0.0_r8),tmp,1,1,bh%desc,&
(0.0_r8,0.0_r8),dm,1,1,bh%desc)
! dm = U_1^(-1) U_0 P_0 U_0^T U_1^(-T)
call pzgemm("N","N",ph%n_basis,ph%n_basis,ph%n_basis,(1.0_r8,0.0_r8),tmp,1,&
1,bh%desc,ovlp1,1,1,bh%desc,(0.0_r8,0.0_r8),dm,1,1,bh%desc)
! tmp = U_1^(-1) U_0 P_0 U_0^T U_1^(-T)
call elsi_elpa_multiply(ph,bh,"L","L",ph%n_basis,ovlp0,dm,tmp)
! tmp = U_1^(-1)
call pztranc(ph%n_basis,ph%n_basis,(1.0_r8,0.0_r8),ovlp0,1,1,bh%desc,&
(0.0_r8,0.0_r8),tmp,1,1,bh%desc)
call elsi_set_full_mat(ph,bh,LT_MAT,row_map,col_map,tmp)
! S_1 = U_1^(-1)
ovlp1 = tmp
dm = tmp
call elsi_deallocate(bh,tmp,"tmp")
......
......@@ -88,7 +88,7 @@ subroutine elsi_extrapolate_dm_real(eh,ovlp0,ovlp1,dm)
call elsi_check_init(eh%bh,eh%handle_init,caller)
call elsi_update_dm_elpa(eh%ph,eh%bh,ovlp0,ovlp1,dm)
call elsi_update_dm_elpa(eh%ph,eh%bh,eh%row_map,eh%col_map,ovlp0,ovlp1,dm)
end subroutine
......@@ -108,7 +108,7 @@ subroutine elsi_extrapolate_dm_complex(eh,ovlp0,ovlp1,dm)
call elsi_check_init(eh%bh,eh%handle_init,caller)
call elsi_update_dm_elpa(eh%ph,eh%bh,ovlp0,ovlp1,dm)
call elsi_update_dm_elpa(eh%ph,eh%bh,eh%row_map,eh%col_map,ovlp0,ovlp1,dm)
end subroutine
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment