!C
!C***
!C*** CG
!C***
!C
subroutine hpcmw_eps_fvm_solver_CG &
& ( N, NP, NPLU, D, B, X, EPS, ITR, IER, &
& index, item, COEF, Tcomm)
use hpcmw_eps_fvm_util
implicit REAL*8 (A-H,O-Z)
real(kind=kreal), dimension(N ) :: D
real(kind=kreal), dimension(NP) :: B
real(kind=kreal), dimension(NP) :: X
integer , dimension(0:N) :: index
integer , dimension(NPLU):: item
real (kind=kreal), dimension(NPLU):: COEF
real(kind=kreal) :: EPS, Tcomm
integer :: ITR, IER
integer :: P, Q, R, Z, DD
real(kind=kreal), dimension(:,:), allocatable, save :: W
real(kind=kreal), dimension(: ), allocatable, save :: WS, WR
!C
!C +-------+
!C | INIT. |
!C +-------+
!C===
nn= max (NP, import_index(n_neighbor_pe), &
& export_index(n_neighbor_pe))
if (.not.allocated(W)) then
allocate (W(NP,4), WS(nn), WR(nn))
endif
X= 0.d0
W= 0.d0
WR= 0.d0
WS= 0.d0
R = 1
Z = 2
Q = 2
P = 3
DD= 4
do i= 1, N
W(i,DD)= 1.0D0 / D(i)
enddo
IER = 0
Tcomm= 0.d0
!C===
!C
!C +-----------------------+
!C | {r0}= {b} - [A]{xini} |
!C +-----------------------+
!C===
S_TIME= MPI_WTIME()
call hpcmw_eps_fvm_update_1_R (X, NP)
E_TIME= MPI_WTIME()
Tcomm= Tcomm + E_TIME - S_TIME
do i= 1, N
W(i,R) = D(i)*X(i)
do j= index(i-1)+1, index(i)
W(i,R) = W(i,R) + COEF(j) * X(item(j))
enddo
enddo
BNRM2= 0.0D0
do i= 1, N
BNRM2= BNRM2 + B(i) **2
W(i,R)= B(i) - W(i,R)
enddo
S_TIME= MPI_WTIME()
call hpcmw_eps_fvm_allreduce_R (BNRM2, hpcmw_sum)
E_TIME= MPI_WTIME()
Tcomm= Tcomm + E_TIME - S_TIME
!C===
!C
!C*************************************************************** ITERATION
do L= 1, ITR
!C
!C +----------------+
!C | {z}= [Minv]{r} |
!C +----------------+
!C===
do i= 1, N
W(i,Z)= W(i,DD) * W(i,R)
enddo
!C===
!C
!C +-------------+
!C | RHO= {r}{z} |
!C +-------------+
!C===
RHO= 0.d0
do i= 1, N
RHO= RHO + W(i,R)*W(i,Z)
enddo
S_TIME= MPI_WTIME()
call hpcmw_eps_fvm_allreduce_R (RHO, hpcmw_sum)
E_TIME= MPI_WTIME()
Tcomm= Tcomm + E_TIME - S_TIME
!C===
!C
!C +-----------------------------+
!C | {p} = {z} if ITER=1 |
!C | BETA= RHO / RHO1 otherwise |
!C +-----------------------------+
!C===
if ( L.eq.1 ) then
do i= 1, N
W(i,P)= W(i,Z)
enddo
else
BETA= RHO / RHO1
do i= 1, N
W(i,P)= W(i,Z) + BETA*W(i,P)
enddo
endif
!C===
!C
!C +-------------+
!C | {q}= [A]{p} |
!C +-------------+
!C===
S_TIME= MPI_WTIME()
call hpcmw_eps_fvm_update_1_R (W(1,P), NP)
E_TIME= MPI_WTIME()
Tcomm= Tcomm + E_TIME - S_TIME
do i= 1, N
W(i,Q) = D(i) * W(i,P)
do j= index(i-1)+1, index(i)
W(i,Q) = W(i,Q) + COEF(j) * W(item(j),P)
enddo
enddo
!C===
!C
!C +---------------------+
!C | ALPHA= RHO / {p}{q} |
!C +---------------------+
!C===
C1= 0.d0
do i= 1, N
C1= C1 + W(i,P)*W(i,Q)
enddo
S_TIME= MPI_WTIME()
call hpcmw_eps_fvm_allreduce_R (C1, hpcmw_sum)
E_TIME= MPI_WTIME()
Tcomm= Tcomm + E_TIME - S_TIME
ALPHA= RHO / C1
!C===
!C +----------------------+
!C | {x}= {x} + ALPHA*{p} |
!C | {r}= {r} - ALPHA*{q} |
!C +----------------------+
!C===
X1= 0.0d0
X2= 0.0d0
do i= 1, N
X(i) = X(i) + ALPHA * W(i,P)
W(i,R)= W(i,R) - ALPHA * W(i,Q)
enddo
DNRM2 = 0.0
do i= 1, N
DNRM2= DNRM2 + W(i,R)**2
enddo
S_TIME= MPI_WTIME()
call hpcmw_eps_fvm_allreduce_R (DNRM2, hpcmw_sum)
E_TIME= MPI_WTIME()
Tcomm= Tcomm + E_TIME - S_TIME
RESID= dsqrt(DNRM2/BNRM2)
if (my_rank.eq.0) write (*, 1000) L, RESID
1000 format (i5, 1pe16.6)
if ( RESID.le.EPS) goto 900
RHO1 = RHO
enddo
IER = 1
900 continue
ITR= L
EPS= RESID
return
end subroutine hpcmw_eps_fvm_solver_CG