Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elsi-devel
elsi-interface
Commits
4ca77db1
Commit
4ca77db1
authored
Jul 17, 2020
by
Victor Yu
Browse files
Stop ifort from creating temp array in ELPA2 kernel
parent
9ed1e6ab
Changes
12
Hide whitespace changes
Inline
Side-by-side
CHANGELOG.md
View file @
4ca77db1
# ELSI changelog
##
v2.6.1 (June 2020)
##
Not released
###
PEXSI
*
Removed an improper abort from the error handling code of PEXSI
.
###
ELPA
*
Fix a performance regression of the ELPA2 generic kernel
.
### Known issues
*
The ELPA code can not be compiled with the NAG Fortran compiler, due to the
...
...
@@ -13,6 +13,11 @@
*
Depending on the choice of k-points, the complex PEXSI solver may randomly
fail at the inertia counting stage.
## v2.6.1 (June 2020)
### PEXSI
*
Removed an improper abort from the error handling code of PEXSI.
## v2.6.0 (June 2020)
### ELSI interface
...
...
CMakeLists.txt
View file @
4ca77db1
...
...
@@ -7,7 +7,7 @@ SET(elsi_URL "http://elsi-interchange.org")
SET
(
elsi_EMAIL
"elsi-team@duke.edu"
)
SET
(
elsi_LICENSE
"BSD 3"
)
SET
(
elsi_DESCRIPTION
"Electronic Structure Infrastructure"
)
SET
(
elsi_DATESTAMP
"20200
629
"
)
SET
(
elsi_DATESTAMP
"20200
718
"
)
### CMake modules ###
LIST
(
APPEND CMAKE_MODULE_PATH
${
PROJECT_SOURCE_DIR
}
/cmake
)
...
...
external/ELPA/CMakeLists.txt
View file @
4ca77db1
...
...
@@ -29,8 +29,9 @@ LIST(APPEND elpa_src
src/mod_precision.f90
src/mod_redist_band.f90
src/mod_scalapack_interfaces.f90
src/mod_single_hh_trafo_real.f90
src/resident_set_size.c
src/single_hh_trafo_real_dp.f90
src/single_hh_trafo_real_sp.f90
src/time.c
src/virtual_memory.c
)
...
...
external/ELPA/src/elpa1_compute_private.f90
View file @
4ca77db1
...
...
@@ -3683,7 +3683,7 @@ contains
integer
(
kind
=
ik
)
::
i
,
j
,
lwork
,
liwork
,
info
integer
(
kind
=
BLAS_KIND
)
::
infoBLAS
integer
(
kind
=
ik
),
allocatable
::
iwork
(:)
integer
(
kind
=
BLAS_KIND
),
allocatable
::
iwork
(:)
logical
,
intent
(
in
)
::
wantDebug
logical
,
intent
(
out
)
::
success
...
...
@@ -3709,7 +3709,7 @@ contains
call
check_allocate_f
(
"solve_tridi_single: work, iwork"
,
603
,
istat
,
errorMessage
)
call
obj
%
timer
%
start
(
"blas"
)
call
DSTEDC
(
'I'
,
int
(
nlen
,
kind
=
BLAS_KIND
),
d
,
e
,
q
,
int
(
ldq
,
kind
=
BLAS_KIND
),
&
work
,
int
(
lwork
,
kind
=
BLAS_KIND
),
i
nt
(
iwork
,
kind
=
BLAS_KIND
)
,
int
(
liwork
,
kind
=
BLAS_KIND
),
&
work
,
int
(
lwork
,
kind
=
BLAS_KIND
),
i
work
,
int
(
liwork
,
kind
=
BLAS_KIND
),
&
infoBLAS
)
info
=
int
(
infoBLAS
,
kind
=
ik
)
call
obj
%
timer
%
stop
(
"blas"
)
...
...
@@ -4275,7 +4275,7 @@ contains
integer
(
kind
=
ik
)
::
i
,
j
,
lwork
,
liwork
,
info
integer
(
kind
=
BLAS_KIND
)
::
infoBLAS
integer
(
kind
=
ik
),
allocatable
::
iwork
(:)
integer
(
kind
=
BLAS_KIND
),
allocatable
::
iwork
(:)
logical
,
intent
(
in
)
::
wantDebug
logical
,
intent
(
out
)
::
success
...
...
@@ -4301,7 +4301,7 @@ contains
call
check_allocate_f
(
"solve_tridi_single: work, iwork"
,
603
,
istat
,
errorMessage
)
call
obj
%
timer
%
start
(
"blas"
)
call
DSTEDC
(
'I'
,
int
(
nlen
,
kind
=
BLAS_KIND
),
d
,
e
,
q
,
int
(
ldq
,
kind
=
BLAS_KIND
),
&
work
,
int
(
lwork
,
kind
=
BLAS_KIND
),
i
nt
(
iwork
,
kind
=
BLAS_KIND
)
,
int
(
liwork
,
kind
=
BLAS_KIND
),
&
work
,
int
(
lwork
,
kind
=
BLAS_KIND
),
i
work
,
int
(
liwork
,
kind
=
BLAS_KIND
),
&
infoBLAS
)
info
=
int
(
infoBLAS
,
kind
=
ik
)
call
obj
%
timer
%
stop
(
"blas"
)
...
...
@@ -7546,7 +7546,7 @@ contains
integer
(
kind
=
ik
)
::
i
,
j
,
lwork
,
liwork
,
info
integer
(
kind
=
BLAS_KIND
)
::
infoBLAS
integer
(
kind
=
ik
),
allocatable
::
iwork
(:)
integer
(
kind
=
BLAS_KIND
),
allocatable
::
iwork
(:)
logical
,
intent
(
in
)
::
wantDebug
logical
,
intent
(
out
)
::
success
...
...
@@ -7572,7 +7572,7 @@ contains
call
check_allocate_f
(
"solve_tridi_single: work, iwork"
,
603
,
istat
,
errorMessage
)
call
obj
%
timer
%
start
(
"blas"
)
call
SSTEDC
(
'I'
,
int
(
nlen
,
kind
=
BLAS_KIND
),
d
,
e
,
q
,
int
(
ldq
,
kind
=
BLAS_KIND
),
&
work
,
int
(
lwork
,
kind
=
BLAS_KIND
),
i
nt
(
iwork
,
kind
=
BLAS_KIND
)
,
int
(
liwork
,
kind
=
BLAS_KIND
),
&
work
,
int
(
lwork
,
kind
=
BLAS_KIND
),
i
work
,
int
(
liwork
,
kind
=
BLAS_KIND
),
&
infoBLAS
)
info
=
int
(
infoBLAS
,
kind
=
ik
)
call
obj
%
timer
%
stop
(
"blas"
)
...
...
@@ -8138,7 +8138,7 @@ contains
integer
(
kind
=
ik
)
::
i
,
j
,
lwork
,
liwork
,
info
integer
(
kind
=
BLAS_KIND
)
::
infoBLAS
integer
(
kind
=
ik
),
allocatable
::
iwork
(:)
integer
(
kind
=
BLAS_KIND
),
allocatable
::
iwork
(:)
logical
,
intent
(
in
)
::
wantDebug
logical
,
intent
(
out
)
::
success
...
...
@@ -8164,7 +8164,7 @@ contains
call
check_allocate_f
(
"solve_tridi_single: work, iwork"
,
603
,
istat
,
errorMessage
)
call
obj
%
timer
%
start
(
"blas"
)
call
SSTEDC
(
'I'
,
int
(
nlen
,
kind
=
BLAS_KIND
),
d
,
e
,
q
,
int
(
ldq
,
kind
=
BLAS_KIND
),
&
work
,
int
(
lwork
,
kind
=
BLAS_KIND
),
i
nt
(
iwork
,
kind
=
BLAS_KIND
)
,
int
(
liwork
,
kind
=
BLAS_KIND
),
&
work
,
int
(
lwork
,
kind
=
BLAS_KIND
),
i
work
,
int
(
liwork
,
kind
=
BLAS_KIND
),
&
infoBLAS
)
info
=
int
(
infoBLAS
,
kind
=
ik
)
call
obj
%
timer
%
stop
(
"blas"
)
...
...
external/ELPA/src/mod_compute_hh_trafo.f90
View file @
4ca77db1
...
...
@@ -70,8 +70,6 @@ contains
use
elpa_abstract_impl
use
,
intrinsic
::
iso_c_binding
use
single_hh_trafo_real
use
cuda_c_kernel
use
cuda_functions
...
...
@@ -132,7 +130,7 @@ contains
&
real
&
&
_
generic_
&
&
double
&
&
(
a
(
1
:
stripe_width
,
j
+
off
+
a_off
-1
:
a_dim2
,
istripe
),
w
,
nbw
,
nl
,
stripe_width
,
nbw
)
&
(
a
(
1
,
j
+
off
+
a_off
-1
,
istripe
),
w
,
nbw
,
nl
,
stripe_width
,
nbw
)
enddo
...
...
@@ -140,8 +138,7 @@ contains
&
real
&
&
_
cpu_
&
&
double
&
&
(
a
(
1
:
stripe_width
,
1
+
off
+
a_off
:
1
+
off
+
a_off
+
nbw
-1
,
istripe
),
bcast_buffer
(
1
:
nbw
,
off
+1
),
nbw
,
nl
,&
stripe_width
)
&
(
a
(
1
,
j
+
off
+
a_off
,
istripe
),
bcast_buffer
(
1
,
j
+
off
),
nbw
,
nl
,
stripe_width
)
kernel_flops
=
kernel_flops
+
4
*
int
(
nl
,
lik
)
*
int
(
ncols
,
lik
)
*
int
(
nbw
,
lik
)
kernel_time
=
kernel_time
+
mpi_wtime
()
-
ttt
...
...
@@ -171,8 +168,6 @@ contains
use
elpa_abstract_impl
use
,
intrinsic
::
iso_c_binding
use
single_hh_trafo_real
use
cuda_c_kernel
use
cuda_functions
...
...
@@ -233,7 +228,7 @@ contains
&
real
&
&
_
generic_
&
&
single
&
&
(
a
(
1
:
stripe_width
,
j
+
off
+
a_off
-1
:
a_dim2
,
istripe
),
w
,
nbw
,
nl
,
stripe_width
,
nbw
)
&
(
a
(
1
,
j
+
off
+
a_off
-1
,
istripe
),
w
,
nbw
,
nl
,
stripe_width
,
nbw
)
enddo
...
...
@@ -241,8 +236,7 @@ contains
&
real
&
&
_
cpu_
&
&
single
&
&
(
a
(
1
:
stripe_width
,
1
+
off
+
a_off
:
1
+
off
+
a_off
+
nbw
-1
,
istripe
),
bcast_buffer
(
1
:
nbw
,
off
+1
),
nbw
,
nl
,&
stripe_width
)
&
(
a
(
1
,
j
+
off
+
a_off
,
istripe
),
bcast_buffer
(
1
,
j
+
off
),
nbw
,
nl
,
stripe_width
)
kernel_flops
=
kernel_flops
+
4
*
int
(
nl
,
lik
)
*
int
(
ncols
,
lik
)
*
int
(
nbw
,
lik
)
kernel_time
=
kernel_time
+
mpi_wtime
()
-
ttt
...
...
@@ -330,7 +324,7 @@ contains
&
complex
&
&
_
generic_
&
&
double
&
&
(
a
(
1
:
stripe_width
,
j
+
off
+
a_off
:
a_dim2
,
istripe
),
bcast_buffer
(
1
,
j
+
off
),
nbw
,
nl
,
stripe_width
)
&
(
a
(
1
,
j
+
off
+
a_off
,
istripe
),
bcast_buffer
(
1
,
j
+
off
),
nbw
,
nl
,
stripe_width
)
enddo
...
...
@@ -413,7 +407,7 @@ contains
&
complex
&
&
_
generic_
&
&
single
&
&
(
a
(
1
:
stripe_width
,
j
+
off
+
a_off
:
a_dim2
,
istripe
),
bcast_buffer
(
1
,
j
+
off
),
nbw
,
nl
,
stripe_width
)
&
(
a
(
1
,
j
+
off
+
a_off
,
istripe
),
bcast_buffer
(
1
,
j
+
off
),
nbw
,
nl
,
stripe_width
)
enddo
...
...
external/ELPA/src/mod_compute_hh_trafo_avx.f90
View file @
4ca77db1
...
...
@@ -70,8 +70,6 @@ contains
use
elpa_abstract_impl
use
,
intrinsic
::
iso_c_binding
use
single_hh_trafo_real
use
cuda_c_kernel
use
cuda_functions
...
...
@@ -139,8 +137,7 @@ contains
&
real
&
&
_
cpu_
&
&
double
&
&
(
a
(
1
:
stripe_width
,
1
+
off
+
a_off
:
1
+
off
+
a_off
+
nbw
-1
,
istripe
),
bcast_buffer
(
1
:
nbw
,
off
+1
),
nbw
,
nl
,&
stripe_width
)
&
(
a
(
1
,
j
+
off
+
a_off
,
istripe
),
bcast_buffer
(
1
,
j
+
off
),
nbw
,
nl
,
stripe_width
)
kernel_flops
=
kernel_flops
+
4
*
int
(
nl
,
lik
)
*
int
(
ncols
,
lik
)
*
int
(
nbw
,
lik
)
kernel_time
=
kernel_time
+
mpi_wtime
()
-
ttt
...
...
@@ -170,8 +167,6 @@ contains
use
elpa_abstract_impl
use
,
intrinsic
::
iso_c_binding
use
single_hh_trafo_real
use
cuda_c_kernel
use
cuda_functions
...
...
@@ -239,8 +234,7 @@ contains
&
real
&
&
_
cpu_
&
&
single
&
&
(
a
(
1
:
stripe_width
,
1
+
off
+
a_off
:
1
+
off
+
a_off
+
nbw
-1
,
istripe
),
bcast_buffer
(
1
:
nbw
,
off
+1
),
nbw
,
nl
,&
stripe_width
)
&
(
a
(
1
,
j
+
off
+
a_off
,
istripe
),
bcast_buffer
(
1
,
j
+
off
),
nbw
,
nl
,
stripe_width
)
kernel_flops
=
kernel_flops
+
4
*
int
(
nl
,
lik
)
*
int
(
ncols
,
lik
)
*
int
(
nbw
,
lik
)
kernel_time
=
kernel_time
+
mpi_wtime
()
-
ttt
...
...
external/ELPA/src/mod_compute_hh_trafo_avx2.f90
View file @
4ca77db1
...
...
@@ -70,8 +70,6 @@ contains
use
elpa_abstract_impl
use
,
intrinsic
::
iso_c_binding
use
single_hh_trafo_real
use
cuda_c_kernel
use
cuda_functions
...
...
@@ -139,8 +137,7 @@ contains
&
real
&
&
_
cpu_
&
&
double
&
&
(
a
(
1
:
stripe_width
,
1
+
off
+
a_off
:
1
+
off
+
a_off
+
nbw
-1
,
istripe
),
bcast_buffer
(
1
:
nbw
,
off
+1
),
nbw
,
nl
,&
stripe_width
)
&
(
a
(
1
,
j
+
off
+
a_off
,
istripe
),
bcast_buffer
(
1
,
j
+
off
),
nbw
,
nl
,
stripe_width
)
kernel_flops
=
kernel_flops
+
4
*
int
(
nl
,
lik
)
*
int
(
ncols
,
lik
)
*
int
(
nbw
,
lik
)
kernel_time
=
kernel_time
+
mpi_wtime
()
-
ttt
...
...
@@ -170,8 +167,6 @@ contains
use
elpa_abstract_impl
use
,
intrinsic
::
iso_c_binding
use
single_hh_trafo_real
use
cuda_c_kernel
use
cuda_functions
...
...
@@ -239,8 +234,7 @@ contains
&
real
&
&
_
cpu_
&
&
single
&
&
(
a
(
1
:
stripe_width
,
1
+
off
+
a_off
:
1
+
off
+
a_off
+
nbw
-1
,
istripe
),
bcast_buffer
(
1
:
nbw
,
off
+1
),
nbw
,
nl
,&
stripe_width
)
&
(
a
(
1
,
j
+
off
+
a_off
,
istripe
),
bcast_buffer
(
1
,
j
+
off
),
nbw
,
nl
,
stripe_width
)
kernel_flops
=
kernel_flops
+
4
*
int
(
nl
,
lik
)
*
int
(
ncols
,
lik
)
*
int
(
nbw
,
lik
)
kernel_time
=
kernel_time
+
mpi_wtime
()
-
ttt
...
...
external/ELPA/src/mod_compute_hh_trafo_avx512.f90
View file @
4ca77db1
...
...
@@ -70,8 +70,6 @@ contains
use
elpa_abstract_impl
use
,
intrinsic
::
iso_c_binding
use
single_hh_trafo_real
use
cuda_c_kernel
use
cuda_functions
...
...
@@ -139,8 +137,7 @@ contains
&
real
&
&
_
cpu_
&
&
double
&
&
(
a
(
1
:
stripe_width
,
1
+
off
+
a_off
:
1
+
off
+
a_off
+
nbw
-1
,
istripe
),
bcast_buffer
(
1
:
nbw
,
off
+1
),
nbw
,
nl
,&
stripe_width
)
&
(
a
(
1
,
j
+
off
+
a_off
,
istripe
),
bcast_buffer
(
1
,
j
+
off
),
nbw
,
nl
,
stripe_width
)
kernel_flops
=
kernel_flops
+
4
*
int
(
nl
,
lik
)
*
int
(
ncols
,
lik
)
*
int
(
nbw
,
lik
)
kernel_time
=
kernel_time
+
mpi_wtime
()
-
ttt
...
...
@@ -170,8 +167,6 @@ contains
use
elpa_abstract_impl
use
,
intrinsic
::
iso_c_binding
use
single_hh_trafo_real
use
cuda_c_kernel
use
cuda_functions
...
...
@@ -239,8 +234,7 @@ contains
&
real
&
&
_
cpu_
&
&
single
&
&
(
a
(
1
:
stripe_width
,
1
+
off
+
a_off
:
1
+
off
+
a_off
+
nbw
-1
,
istripe
),
bcast_buffer
(
1
:
nbw
,
off
+1
),
nbw
,
nl
,&
stripe_width
)
&
(
a
(
1
,
j
+
off
+
a_off
,
istripe
),
bcast_buffer
(
1
,
j
+
off
),
nbw
,
nl
,
stripe_width
)
kernel_flops
=
kernel_flops
+
4
*
int
(
nl
,
lik
)
*
int
(
ncols
,
lik
)
*
int
(
nbw
,
lik
)
kernel_time
=
kernel_time
+
mpi_wtime
()
-
ttt
...
...
external/ELPA/src/mod_compute_hh_trafo_cuda.f90
View file @
4ca77db1
...
...
@@ -70,8 +70,6 @@ contains
use
elpa_abstract_impl
use
,
intrinsic
::
iso_c_binding
use
single_hh_trafo_real
use
cuda_c_kernel
use
cuda_functions
...
...
@@ -155,7 +153,7 @@ contains
&
real
&
&
_
generic_
&
&
double
&
&
(
a
(
1
:
stripe_width
,
j
+
off
+
a_off
-1
:
a_dim2
,
istripe
),
w
,
nbw
,
nl
,
stripe_width
,
nbw
)
&
(
a
(
1
,
j
+
off
+
a_off
-1
,
istripe
),
w
,
nbw
,
nl
,
stripe_width
,
nbw
)
enddo
...
...
@@ -163,8 +161,7 @@ contains
&
real
&
&
_
cpu_
&
&
double
&
&
(
a
(
1
:
stripe_width
,
1
+
off
+
a_off
:
1
+
off
+
a_off
+
nbw
-1
,
istripe
),
bcast_buffer
(
1
:
nbw
,
off
+1
),
nbw
,
nl
,&
stripe_width
)
&
(
a
(
1
,
j
+
off
+
a_off
,
istripe
),
bcast_buffer
(
1
,
j
+
off
),
nbw
,
nl
,
stripe_width
)
endif
kernel_flops
=
kernel_flops
+
4
*
int
(
nl
,
lik
)
*
int
(
ncols
,
lik
)
*
int
(
nbw
,
lik
)
...
...
@@ -195,8 +192,6 @@ contains
use
elpa_abstract_impl
use
,
intrinsic
::
iso_c_binding
use
single_hh_trafo_real
use
cuda_c_kernel
use
cuda_functions
...
...
@@ -280,7 +275,7 @@ contains
&
real
&
&
_
generic_
&
&
single
&
&
(
a
(
1
:
stripe_width
,
j
+
off
+
a_off
-1
:
a_dim2
,
istripe
),
w
,
nbw
,
nl
,
stripe_width
,
nbw
)
&
(
a
(
1
,
j
+
off
+
a_off
-1
,
istripe
),
w
,
nbw
,
nl
,
stripe_width
,
nbw
)
enddo
...
...
@@ -288,8 +283,7 @@ contains
&
real
&
&
_
cpu_
&
&
single
&
&
(
a
(
1
:
stripe_width
,
1
+
off
+
a_off
:
1
+
off
+
a_off
+
nbw
-1
,
istripe
),
bcast_buffer
(
1
:
nbw
,
off
+1
),
nbw
,
nl
,&
stripe_width
)
&
(
a
(
1
,
j
+
off
+
a_off
,
istripe
),
bcast_buffer
(
1
,
j
+
off
),
nbw
,
nl
,
stripe_width
)
endif
kernel_flops
=
kernel_flops
+
4
*
int
(
nl
,
lik
)
*
int
(
ncols
,
lik
)
*
int
(
nbw
,
lik
)
...
...
@@ -401,7 +395,7 @@ contains
&
complex
&
&
_
generic_
&
&
double
&
&
(
a
(
1
:
stripe_width
,
j
+
off
+
a_off
:
a_dim2
,
istripe
),
bcast_buffer
(
1
,
j
+
off
),
nbw
,
nl
,
stripe_width
)
&
(
a
(
1
,
j
+
off
+
a_off
,
istripe
),
bcast_buffer
(
1
,
j
+
off
),
nbw
,
nl
,
stripe_width
)
enddo
endif
...
...
@@ -514,7 +508,7 @@ contains
&
complex
&
&
_
generic_
&
&
single
&
&
(
a
(
1
:
stripe_width
,
j
+
off
+
a_off
:
a_dim2
,
istripe
),
bcast_buffer
(
1
,
j
+
off
),
nbw
,
nl
,
stripe_width
)
&
(
a
(
1
,
j
+
off
+
a_off
,
istripe
),
bcast_buffer
(
1
,
j
+
off
),
nbw
,
nl
,
stripe_width
)
enddo
endif
...
...
external/ELPA/src/mod_single_hh_trafo_real.f90
deleted
100644 → 0
View file @
9ed1e6ab
module
single_hh_trafo_real
implicit
none
public
single_hh_trafo_real_cpu_double
public
single_hh_trafo_real_cpu_single
contains
subroutine
single_hh_trafo_real_cpu_double
(
q
,
hh
,
nb
,
nq
,
ldq
)
use
elpa_abstract_impl
use
precision
! Perform single real Householder transformation.
! This routine is not performance critical and thus it is coded here in Fortran
implicit
none
! class(elpa_abstract_impl_t), intent(inout) :: obj
integer
(
kind
=
ik
),
intent
(
in
)
::
nb
,
nq
,
ldq
! real(kind=rk8), intent(inout) :: q(ldq, *)
! real(kind=rk8), intent(in) :: hh(*)
real
(
kind
=
rk8
),
intent
(
inout
)
::
q
(
1
:
ldq
,
1
:
nb
)
real
(
kind
=
rk8
),
intent
(
in
)
::
hh
(
1
:
nb
)
integer
(
kind
=
ik
)
::
i
real
(
kind
=
rk8
)
::
v
(
nq
)
!#ifdef WITH_OPENMP
! call obj%timer%start("single_hh_trafo_real_cpu_openmp_double")
!#else
! call obj%timer%start("single_hh_trafo_real_cpu_double")
!#endif
! v = q * hh
v
(:)
=
q
(
1
:
nq
,
1
)
do
i
=
2
,
nb
v
(:)
=
v
(:)
+
q
(
1
:
nq
,
i
)
*
hh
(
i
)
enddo
! v = v * tau
v
(:)
=
v
(:)
*
hh
(
1
)
! q = q - v * hh**T
q
(
1
:
nq
,
1
)
=
q
(
1
:
nq
,
1
)
-
v
(:)
do
i
=
2
,
nb
q
(
1
:
nq
,
i
)
=
q
(
1
:
nq
,
i
)
-
v
(:)
*
hh
(
i
)
enddo
!#ifdef WITH_OPENMP
! call obj%timer%stop("single_hh_trafo_real_cpu_openmp_double")
!#else
! call obj%timer%stop("single_hh_trafo_real_cpu_double")
!#endif
end
subroutine
! single precision implementation at the moment duplicated !!!
subroutine
single_hh_trafo_real_cpu_single
(
q
,
hh
,
nb
,
nq
,
ldq
)
use
elpa_abstract_impl
use
precision
! Perform single real Householder transformation.
! This routine is not performance critical and thus it is coded here in Fortran
implicit
none
!class(elpa_abstract_impl_t), intent(inout) :: obj
integer
(
kind
=
ik
),
intent
(
in
)
::
nb
,
nq
,
ldq
! real(kind=rk4), intent(inout) :: q(ldq, *)
! real(kind=rk4), intent(in) :: hh(*)
real
(
kind
=
rk4
),
intent
(
inout
)
::
q
(
1
:
ldq
,
1
:
nb
)
real
(
kind
=
rk4
),
intent
(
in
)
::
hh
(
1
:
nb
)
integer
(
kind
=
ik
)
::
i
real
(
kind
=
rk4
)
::
v
(
nq
)
!#ifdef WITH_OPENMP
! call obj%timer%start("single_hh_trafo_real_cpu_openmp_single")
!#else
! call obj%timer%start("single_hh_trafo_real_cpu_single")
!#endif
! v = q * hh
v
(:)
=
q
(
1
:
nq
,
1
)
do
i
=
2
,
nb
v
(:)
=
v
(:)
+
q
(
1
:
nq
,
i
)
*
hh
(
i
)
enddo
! v = v * tau
v
(:)
=
v
(:)
*
hh
(
1
)
! q = q - v * hh**T
q
(
1
:
nq
,
1
)
=
q
(
1
:
nq
,
1
)
-
v
(:)
do
i
=
2
,
nb
q
(
1
:
nq
,
i
)
=
q
(
1
:
nq
,
i
)
-
v
(:)
*
hh
(
i
)
enddo
!#ifdef WITH_OPENMP
! call obj%timer%stop("single_hh_trafo_real_cpu_openmp_single")
!#else
! call obj%timer%stop("single_hh_trafo_real_cpu_single")
!#endif
end
subroutine
end
module
external/ELPA/src/single_hh_trafo_real_dp.f90
0 → 100644
View file @
4ca77db1
subroutine
single_hh_trafo_real_cpu_double
(
q
,
hh
,
nb
,
nq
,
ldq
)
use
elpa_abstract_impl
use
precision
! Perform single real Householder transformation.
! This routine is not performance critical and thus it is coded here in Fortran
implicit
none
integer
(
kind
=
ik
),
intent
(
in
)
::
nb
,
nq
,
ldq
real
(
kind
=
rk8
),
intent
(
inout
)
::
q
(
ldq
,
*
)
real
(
kind
=
rk8
),
intent
(
in
)
::
hh
(
*
)
integer
(
kind
=
ik
)
::
i
real
(
kind
=
rk8
)
::
v
(
nq
)
! v = q * hh
v
(:)
=
q
(
1
:
nq
,
1
)
do
i
=
2
,
nb
v
(:)
=
v
(:)
+
q
(
1
:
nq
,
i
)
*
hh
(
i
)
enddo
! v = v * tau
v
(:)
=
v
(:)
*
hh
(
1
)
! q = q - v * hh**T
q
(
1
:
nq
,
1
)
=
q
(
1
:
nq
,
1
)
-
v
(:)
do
i
=
2
,
nb
q
(
1
:
nq
,
i
)
=
q
(
1
:
nq
,
i
)
-
v
(:)
*
hh
(
i
)
enddo
end
subroutine
external/ELPA/src/single_hh_trafo_real_sp.f90
0 → 100644
View file @
4ca77db1
subroutine
single_hh_trafo_real_cpu_single
(
q
,
hh
,
nb
,
nq
,
ldq
)
use
elpa_abstract_impl
use
precision
! Perform single real Householder transformation.
! This routine is not performance critical and thus it is coded here in Fortran
implicit
none
integer
(
kind
=
ik
),
intent
(
in
)
::
nb
,
nq
,
ldq
real
(
kind
=
rk4
),
intent
(
inout
)
::
q
(
ldq
,
*
)
real
(
kind
=
rk4
),
intent
(
in
)
::
hh
(
*
)
integer
(
kind
=
ik
)
::
i
real
(
kind
=
rk4
)
::
v
(
nq
)
! v = q * hh
v
(:)
=
q
(
1
:
nq
,
1
)
do
i
=
2
,
nb
v
(:)
=
v
(:)
+
q
(
1
:
nq
,
i
)
*
hh
(
i
)
enddo
! v = v * tau
v
(:)
=
v
(:)
*
hh
(
1
)
! q = q - v * hh**T
q
(
1
:
nq
,
1
)
=
q
(
1
:
nq
,
1
)
-
v
(:)
do
i
=
2
,
nb
q
(
1
:
nq
,
i
)
=
q
(
1
:
nq
,
i
)
-
v
(:)
*
hh
(
i
)
enddo
end
subroutine
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment