subroutine solve_blbicggr(this)
!
! A x = b => x = A \ b
!
! with RCI.
!
use f95_lapack, only : LA_GESV
implicit none
type(blbicggr), intent(inout) :: this
complex(DP) :: ctmp0
complex(DP), allocatable :: ctmp0m(:,:)
complex(DP), allocatable :: ctmp0v(:)
real(DP) :: rtmp0
real(DP), allocatable :: rtmp0v(:),rtmp1v(:)
integer :: nn,nblk
integer :: j,k,kk,iter,ierr
nblk = this%NBLK
nn = this%NSIZE
select case(this%inext)
case(0)
select case(this%guess)
case(GUESS_NO)
!============
! X = 0
! P = 0
!============
do j=1,nn
do k=1,nblk
this%x(k,j) = Z0
this%p(k,j) = Z0
enddo
enddo
this%inext = 1
this%istat = OP_NOP
case(GUESS_USE)
!============
! P = DX
!============
this%src_vec => this%x
this%dst_vec => this%p
this%inext = 1
this%istat = OP_DO_MATVEC
end select
return
case(1)
!=====================
! rsource = |b|
! rnorm = |r|/|b|
! R = B - P
! P = R
!=====================
allocate(rtmp0v(nblk))
allocate(rtmp1v(nblk))
rtmp0v(:) = 0.0_DP
rtmp1v(:) = 0.0_DP
!$OMP PARALLEL DO PRIVATE(j,k) REDUCTION(+:rtmp0v,rtmp1v)
do j=1,nn
do k=1,nblk
this%r(k,j) = this%b(k,j) - this%p(k,j)
this%p(k,j) = this%r(k,j)
rtmp0v(k) = rtmp0v(k)+ REAL(this%b(k,j))**2+AIMAG(this%b(k,j))**2
rtmp1v(k) = rtmp1v(k)+ REAL(this%r(k,j))**2+AIMAG(this%r(k,j))**2
enddo
enddo
#ifndef _singlePU
call comlib_sumcast(rtmp0v)
call comlib_sumcast(rtmp1v)
#endif
do k=1,nblk
this%rsrcv(k) = SQRT(rtmp0v(k))
this%rnrmv(k) = SQRT(rtmp1v(k))/this%rsrcv(k)
enddo
deallocate(rtmp1v)
deallocate(rtmp0v)
!==========================
! Check residual
!==========================
if (this%debug) then
do k=1,nblk
write(*,'("#B",I5," ERR:",E24.16,I3)')this%iter/nblk,this%rnrmv(k),k
enddo
endif
this%inext = 2
this%istat = OP_PRINT_STATUS
return
case(2)
select case(this%mode)
case(MODE_NORMAL)
this%istat = OP_NOP
case(MODE_PRECOND)
!=====================
! S = M R
!=====================
this%src_vec => this%r
this%dst_vec => this%s
this%istat = OP_PRECOND
end select
this%inext = 3
return
case(3)
select case(this%mode)
case(MODE_NORMAL)
!=====================
! W = D R
!=====================
this%src_vec => this%r
this%dst_vec => this%w
case(MODE_PRECOND)
!=====================
! S = M R
! W = D S <=
!=====================
this%src_vec => this%s
this%dst_vec => this%w
end select
this%inext = 4
this%istat = OP_DO_MATVEC
return
case(4)
this%iter = this%iter + nblk
!=====================
! V = W
! Rt = R
!=====================
!$OMP PARALLEL DO PRIVATE(j,k)
do j=1,nn
do k=1,nblk
this%v(k,j) = this%w(k,j)
this%rt(k,j) = this%r(k,j)
enddo
enddo
!=====================
! orthogonalize Rt
!=====================
call orth(this%rt)
!==========================
! Rho0m = Rt'*R
! zeta = Tr[W'*R]/Tr[W'*W]
!==========================
allocate(ctmp0m(nblk,nblk))
ctmp0m(:,:) = Z0
ctmp0 = Z0
rtmp0 = 0.0_DP
!$OMP PARALLEL DO PRIVATE(j,k,kk) REDUCTION(+:ctmp0,rtmp0,ctmp0m)
do j=1,nn
do k=1,nblk
do kk=1,nblk
ctmp0m(kk,k) = ctmp0m(kk,k) + CONJG(this%rt(kk,j))*this%r(k,j)
enddo
ctmp0 = ctmp0 + CONJG(this%w(k,j))*this%r(k,j)
rtmp0 = rtmp0 + REAL(this%w(k,j))**2 + AIMAG(this%w(k,j))**2
enddo
enddo
#ifndef _singlePU
allocate(ctmp0v(nblk*nblk))
ctmp0v(:) = RESHAPE(ctmp0m,SHAPE(ctmp0v))
call comlib_sumcast(ctmp0v)
ctmp0m(:,:) = RESHAPE(ctmp0v,SHAPE(ctmp0m))
deallocate(ctmp0v)
call comlib_sumcast(ctmp0)
call comlib_sumcast(rtmp0)
#endif
this%Rho0m(:,:) = ctmp0m(:,:)
deallocate(ctmp0m)
this%zeta = ctmp0/rtmp0
!==========================
! P = P - zeta V
!==========================
!$OMP PARALLEL DO PRIVATE(j,k)
do j=1,nn
do k=1,nblk
this%p(k,j) = this%p(k,j) - this%zeta*this%v(k,j)
enddo
enddo
allocate(rtmp0v(nblk))
rtmp0v(:) = 0.0_DP
select case(this%mode)
case(MODE_NORMAL)
!==========================
! X = X + zeta R
! R = R - zeta W
!==========================
!$OMP PARALLEL DO PRIVATE(j,k) REDUCTION(+:rtmp0v)
do j=1,nn
do k=1,nblk
this%x(k,j) = this%x(k,j) + this%zeta*this%r(k,j)
this%r(k,j) = this%r(k,j) - this%zeta*this%w(k,j)
rtmp0v(k) = rtmp0v(k) + REAL(this%r(k,j))**2 + AIMAG(this%r(k,j))**2
enddo
enddo
case(MODE_PRECOND)
!==========================
! X = X + zeta S
! R = R - zeta W
!==========================
!$OMP PARALLEL DO PRIVATE(j,k) REDUCTION(+:rtmp0v)
do j=1,nn
do k=1,nblk
this%x(k,j) = this%x(k,j) + this%zeta*this%s(k,j)
this%r(k,j) = this%r(k,j) - this%zeta*this%w(k,j)
rtmp0v(k) = rtmp0v(k) + REAL(this%r(k,j))**2 + AIMAG(this%r(k,j))**2
enddo
enddo
end select
#ifndef _singlePU
call comlib_sumcast(rtmp0v)
#endif
do k=1,nblk
this%rnrmv(k) = SQRT(rtmp0v(k))/this%rsrcv(k)
enddo
deallocate(rtmp0v)
!==========================
! Check residual
!==========================
if (this%debug) then
do k=1,nblk
write(*,'("#B",I5," ERR:",E24.16,I3)')this%iter/nblk,this%rnrmv(k),k
enddo
endif
if ( MAXVAL(this%rnrmv(:)) < this%tol ) then
this%dst_vec => this%x
this%inext = 0
this%istat = OP_CONVERGED
else
this%inext = 5
this%istat = OP_PRINT_STATUS
endif
return
case(5) ! LOOP ITERATION START
if (this%iter > this%max_iter*nblk) then
this%inext = 0
this%istat = OP_MAXITER_REACHED
return
endif
!===========================
! ctmp0m = Rt'*V
!===========================
allocate(ctmp0m(nblk,nblk))
ctmp0m(:,:) = Z0
!$OMP PARALLEL DO PRIVATE(j,k,kk) REDUCTION(+:ctmp0m)
do j=1,nn
do k=1,nblk
do kk=1,nblk
ctmp0m(kk,k) = ctmp0m(kk,k) + CONJG(this%rt(kk,j))*this%v(k,j)
enddo
enddo
enddo
#ifndef _singlePU
allocate(ctmp0v(nblk*nblk))
ctmp0v(:) = RESHAPE(ctmp0m,SHAPE(ctmp0v))
call comlib_sumcast(ctmp0v)
ctmp0m(:,:) = RESHAPE(ctmp0v,SHAPE(ctmp0m))
deallocate(ctmp0v)
#endif
!===========================
! Solve ctmp0m Alpm = Rho0m
! Alpm = ctmp0m \ Rho0m
!===========================
this%Alpm(:,:) = this%Rho0m(:,:)
call LA_GESV(ctmp0m,this%Alpm,INFO=ierr)
if (ierr /= 0) then
write(*,'("Error, solving Alpha in Bl-BiCGStab. INFO=",I3)')ierr
stop
endif
deallocate(ctmp0m)
!==========================
! Q = P Alpm
!==========================
!$OMP PARALLEL DO PRIVATE(j,k,kk)
do j=1,nn
do k=1,nblk
this%q(k,j) = this%p(1,j)*this%Alpm(1,k)
do kk=2,nblk
this%q(k,j) = this%q(k,j) + this%p(kk,j)*this%Alpm(kk,k)
enddo
enddo
enddo
select case(this%mode)
case(MODE_NORMAL)
!==========================
! NOP
! P = D Q
!==========================
this%istat = OP_NOP
case(MODE_PRECOND)
!==========================
! S = M Q <=
! P = D S
!==========================
this%src_vec => this%q
this%dst_vec => this%s
this%istat = OP_PRECOND
end select
this%inext = 6
return
case(6)
select case(this%mode)
case(MODE_NORMAL)
!==========================
! P = D Q
!==========================
this%src_vec => this%q
this%dst_vec => this%p
case(MODE_PRECOND)
!==========================
! S = M Q
! P = D S <=
!==========================
this%src_vec => this%s
this%dst_vec => this%p
end select
this%inext = 7
this%istat = OP_DO_MATVEC
this%iter = this%iter + nblk
return
case(7)
allocate(rtmp0v(nblk))
rtmp0v(:) = 0.0_DP
select case(this%mode)
case(MODE_NORMAL)
!==========================
! X = X + Q
! R = R - P
!==========================
!$OMP PARALLEL DO PRIVATE(j,k) REDUCTION(+:rtmp0v)
do j=1,nn
do k=1,nblk
this%x(k,j) = this%x(k,j) + this%q(k,j)
this%r(k,j) = this%r(k,j) - this%p(k,j)
rtmp0v(k) = rtmp0v(k) + REAL(this%r(k,j))**2 + AIMAG(this%r(k,j))**2
enddo
enddo
case(MODE_PRECOND)
!==========================
! X = X + S
! R = R - P
!==========================
!$OMP PARALLEL DO PRIVATE(j,k) REDUCTION(+:rtmp0v)
do j=1,nn
do k=1,nblk
this%x(k,j) = this%x(k,j) + this%s(k,j)
this%r(k,j) = this%r(k,j) - this%p(k,j)
rtmp0v(k) = rtmp0v(k) + REAL(this%r(k,j))**2 + AIMAG(this%r(k,j))**2
enddo
enddo
end select
#ifndef _singlePU
call comlib_sumcast(rtmp0v)
#endif
do k=1,nblk
this%rnrmv(k) = SQRT(rtmp0v(k))/this%rsrcv(k)
enddo
!==========================
! Check residual
!==========================
if (this%debug) then
do k=1,nblk
write(*,'("#B",I5," ERR:",E24.16,I3)')this%iter/nblk,this%rnrmv(k),k
enddo
endif
if ( MAXVAL(this%rnrmv(:)) < this%tol ) then
this%dst_vec => this%x
this%inext = 0
this%istat = OP_CONVERGED
else
this%inext = 8
this%istat = OP_PRINT_STATUS
endif
deallocate(rtmp0v)
return
case(8)
select case(this%mode)
case(MODE_NORMAL)
!==========================
! NOP <=
! W = D R
!==========================
this%istat = OP_NOP
case(MODE_PRECOND)
!==========================
! S = M R <=
! W = D S
!==========================
this%src_vec => this%r
this%dst_vec => this%s
this%istat = OP_PRECOND
end select
this%inext = 9
return
case(9)
select case(this%mode)
case(MODE_NORMAL)
!==========================
! NOP
! W = D R <=
!==========================
this%src_vec => this%r
this%dst_vec => this%w
case(MODE_PRECOND)
!==========================
! S = M R
! W = D S <=
!==========================
this%src_vec => this%s
this%dst_vec => this%w
end select
this%inext = 10
this%istat = OP_DO_MATVEC
this%iter = this%iter + nblk
return
case(10)
!==========================
! ctmp0m = Rt'*R
! ctmp0 = Tr[W'*R]
! rtmp0 = Tr[W'*W]
!==========================
allocate(ctmp0m(nblk,nblk))
ctmp0m(:,:) = Z0
ctmp0 = Z0
rtmp0 = 0.0_DP
!$OMP PARALLEL DO PRIVATE(j,k,kk) REDUCTION(+:ctmp0,rtmp0,ctmp0m)
do j=1,nn
do k=1,nblk
do kk=1,nblk
ctmp0m(kk,k) = ctmp0m(kk,k) + CONJG(this%rt(kk,j))*this%r(k,j)
enddo
ctmp0 = ctmp0 + CONJG(this%w(k,j))*this%r(k,j)
rtmp0 = rtmp0 + REAL(this%w(k,j))**2 + AIMAG(this%w(k,j))**2
enddo
enddo
#ifndef _singlePU
allocate(ctmp0v(nblk*nblk))
ctmp0v(:) = RESHAPE(ctmp0m,SHAPE(ctmp0v))
call comlib_sumcast(ctmp0v)
ctmp0m(:,:) = RESHAPE(ctmp0v,SHAPE(ctmp0m))
deallocate(ctmp0v)
call comlib_sumcast(ctmp0)
call comlib_sumcast(rtmp0)
#endif
!==================================
! Solve : Rho0m Gamm = ctmp0m/zeta
! => Gamm = Rho0m \ ctmp0m/zeta
! Rho0m <= ctmp0m
!==================================
this%Gamm(:,:) = ctmp0m(:,:)/this%zeta
call LA_GESV(this%Rho0m,this%Gamm,INFO=ierr)
if (ierr /= 0) then
write(*,'("Error, solving Gamma in Bl-BiCGStab. INFO=",I3)')ierr
stop
endif
this%Rho0m(:,:) = ctmp0m(:,:)
!==========================
! zeta = Tr[W'*R]/Tr[W'*W]
!============================
this%zeta = ctmp0/rtmp0
!============================
! V = W + P Gamm
! P = R + Q Gamm - zeta V
!============================
!$OMP PARALLEL DO PRIVATE(j,k,kk)
do j=1,nn
do k=1,nblk
this%v(k,j) = this%w(k,j)
do kk=1,nblk
this%v(k,j) = this%v(k,j) + this%Gamm(kk,k)*this%p(kk,j)
enddo
enddo
do k=1,nblk
this%p(k,j) = this%r(k,j) - this%zeta * this%v(k,j)
do kk=1,nblk
this%p(k,j) = this%p(k,j) + this%Gamm(kk,k)*this%q(kk,j)
enddo
enddo
enddo
allocate(rtmp0v(nblk))
rtmp0v(:) = 0.0_DP
select case(this%mode)
case(MODE_NORMAL)
!==========================
! X = X + zeta R
! R = R - zeta W
!==========================
!$OMP PARALLEL DO PRIVATE(j,k) REDUCTION(+:rtmp0v)
do j=1,nn
do k=1,nblk
this%x(k,j) = this%x(k,j) + this%zeta*this%r(k,j)
this%r(k,j) = this%r(k,j) - this%zeta*this%w(k,j)
rtmp0v(k) = rtmp0v(k) + REAL(this%r(k,j))**2 + AIMAG(this%r(k,j))**2
enddo
enddo
case(MODE_PRECOND)
!==========================
! X = X + zeta S
! R = R - zeta W
!==========================
!$OMP PARALLEL DO PRIVATE(j,k) REDUCTION(+:rtmp0v)
do j=1,nn
do k=1,nblk
this%x(k,j) = this%x(k,j) + this%zeta*this%s(k,j)
this%r(k,j) = this%r(k,j) - this%zeta*this%w(k,j)
rtmp0v(k) = rtmp0v(k) + REAL(this%r(k,j))**2 + AIMAG(this%r(k,j))**2
enddo
enddo
end select
#ifndef _singlePU
call comlib_sumcast(rtmp0v)
#endif
do k=1,nblk
this%rnrmv(k) = SQRT(rtmp0v(k))/this%rsrcv(k)
enddo
!==========================
! Check residual
!==========================
if (this%debug) then
do k=1,nblk
write(*,'("#B",I5," ERR:",E24.16,I3)')this%iter/nblk,this%rnrmv(k),k
enddo
endif
if ( MAXVAL(this%rnrmv(:)) < this%tol ) then
this%dst_vec => this%x
this%inext = 0
this%istat = OP_CONVERGED
else
this%inext = 5
this%istat = OP_PRINT_STATUS
endif
deallocate(rtmp0v,ctmp0m)
return
end select
return
end subroutine