BCGS.F
#include "parmdefines.h"
#include "iadefines.h"
c***********************************************************************
#include "author.inc"
c* $Id: BCGS.F,v 1.42 1996/07/11 04:11:29 turner Exp $
c*
c* Computes the solution to a nonsymmetric system of linear
c* equations of the form Ax=b by bi-conjugate gradients squared
c* (BCGS; also known as conjugate gradients squared) or Freund's
c* transpose-free quasi-minimal residuals method (TFQMR).
c*
c* An error message is written out and control is returned to the
c* calling routine if a breakdown in iterates is encountered or if
c* the maximum number of iterations is exceeded. At that time,
c* err(iter) can be examined to determine if execution can continue.
c*
c* <PARAMETER LIST>
c*
c* Input:
c* b - source vector
c* a - coefficient matrix
c* ia - integer vector containing info about how "a" is stored
c* NOTE: see description of ia below
c* ja - integer map for coefficient
c*
c* In/Out:
c* iparm - array of integer parameters
c* rparm - array of floating point parameters
c* NOTE: see description of iparm and rparm below
c* x - solution vector (whatever is in x on entry is used as an
c* initial guess)
c*
c* Output:
c* cpu - cumulative cpu time after each iteration
c* rnormt - norm of true residual
c* errt - error estimate based on true residual
c* rnorm - norm of residual at each iteration
c* err - error estimate at each iteration
c* ap - preconditioner
c* iap - integer vector containing info about how "ap" is stored
c* NOTE: see description of ia below
c* (elements of iap are analogous to those in ia)
c* jap - same as ja, but for preconditioner
c* status - return status:
c* -5 ==> iteration failed due to breakdown
c* -4 ==> iparm(_JT_itmax_) exceeded
c* -3 ==> internal error
c* -2 ==> memory allocation failure
c* -1 ==> invalid argument(s)
c* 0 ==> convergence was achieved
c* >0 ==> preconditioning needed
c*
#include "parmdesc.inc"
c*
#include "iadesc.inc"
c*
c* <SUBROUTINES REQUIRED>
c*
c* JT_ApplyPreconditioner
c* JT_CheckConvergence
c* JT_Clock
c* JT_FlushUnit
c* JT_GeneratePreconditioner
c* JT_SwapVectors
c* JT_WriteMatrix
c* JT_WriteSystem
c* JT_WriteVectorFloat
c* JT_x_eq_sx
c* JT_y_eq_Ax
c* JT_y_eq_sy_plus_x
c* JT_y_eq_x
c* JT_y_eq_y_minus_Ax
c* JT_y_eq_y_plus_x
c* JT_y_eq_y_plus_sx
c* JT_y_eq_y_times_x
c* JT_z_eq_y_plus_x
c* JT_z_eq_sy_plus_x
c*
c* <FUNCTIONS REQUIRED>
c*
c* JT_DotProduct
c* JT_MatrixNorm
c* JT_VectorNorm
c*
#include "arrays-BCGS.inc"
c*
#include "copyright.inc"
c***********************************************************************
subroutine JT_BCGS (b, a, ia, ja, iparm, rparm, x,
& cpu, rnormt, errt, rnorm, err, ap, iap, jap, status)
implicit none
c
c ... Input:
integer ia(_JT_no_of_storage_parameters_), ja(*)
real a(*)
c
c ... In/Out:
integer iap(_JT_no_of_storage_parameters_), jap(*)
integer iparm(_JT_no_of_iparms_)
real rparm(_JT_no_of_rparms_)
#ifdef strict_f77
real x(*), b(*)
#else
real x(ia(_JT_nrows_)), b(ia(_JT_nrows_))
#endif
real ap(*)
c
c ... Output:
integer status
#ifdef strict_f77
real cpu(0:*), rnorm(0:*), err(0:*), rnormt(0:*), errt(0:*)
#else
real cpu(0:iparm(_JT_itmax_))
real rnorm(0:iparm(_JT_itmax_)), err(0:iparm(_JT_itmax_))
real rnormt(0:iparm(_JT_itmax_)), errt(0:iparm(_JT_itmax_))
#endif
c
c ... Local:
integer i
real zero, one, cpu0,
& alpha, beta, bnorm, rho, rhoold, sigma,
& eta, rnorm_iter, rnormqmr,
& tau, term, theta, theta2
real JT_DotProduct, JT_MatrixNorm, JT_VectorNorm
#include "declare-BCGS.inc"
c
c ... Parameters.
parameter (zero=0.0d0, one=1.0d0)
c
c ... Ensure that all variables are saved.
save
c
c ... Check to see if re-entering after preconditioning.
if (status .eq. 1) then
call JT_SwapVectors (ia(_JT_nrows_), w, b, status)
call JT_SwapVectors (ia(_JT_nrows_), r, x, status)
goto 25
elseif (status .eq. 2) then
call JT_SwapVectors (ia(_JT_nrows_), work, b, status)
call JT_SwapVectors (ia(_JT_nrows_), w, x, status)
goto 45
elseif (status .eq. 3) then
call JT_SwapVectors (ia(_JT_nrows_), work, b, status)
call JT_SwapVectors (ia(_JT_nrows_), w, x, status)
goto 55
endif
c
c ... Obtain CPU time at start of solution.
call JT_Clock (1, cpu0, status)
c
#include "allocate-BCGS.inc"
c
c ... Write initial system if desired.
if (iparm(_JT_out_) .ge. 4) then
call JT_WriteSystem (iparm(_JT_luout_), a, ia, ja,
& x, b, 'JT_BCGS: ON ENTRY (a,x,b):', status)
endif
c
c ... Initialize cpu time and residual and error norm arrays.
call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, cpu, status)
call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, err, status)
call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, errt, status)
call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, rnorm, status)
call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, rnormt, status)
c
c ... Initialization dependent on the stopping test.
c
c Compute || b || if needed.
if (ABS(iparm(_JT_stop_)).eq._JT_stop_axb_ .or.
& ABS(iparm(_JT_stop_)).eq._JT_stop_b_) then
bnorm = JT_VectorNorm(iparm(_JT_norm_), ia(_JT_nrows_), b, status)
else
bnorm = zero
endif
c
rparm(_JT_anorm_) = zero
if (iparm(_JT_stop_) .eq. _JT_stop_relchg_) then
c
c Initialize xold if using || x - xold || / || x || stopping test.
call JT_y_eq_x (ia(_JT_nrows_), x, xold, status)
elseif (ABS(iparm(_JT_stop_)) .eq. _JT_stop_axb_) then
c
c Compute || A || if needed.
rparm(_JT_anorm_) = JT_MatrixNorm(iparm(_JT_norm_), a, ia, ja, status)
if (status .eq. -2) goto 9999
endif
c
c ... Construct preconditioner as appropriate and write out if
c desired.
if (iparm(_JT_pre_).gt.0 .and. iparm(_JT_prein_).eq._JT_prein_no_) then
call JT_GeneratePreconditioner (iparm, rparm,
& a, ia, ja, ap, iap, jap, status)
if (status .lt. 0) then
status = -3
goto 9999
endif
endif
c
c ... Compute initial residual (w=b-Ax) and write out if desired.
call JT_y_eq_x (ia(_JT_nrows_), b, w, status)
call JT_y_eq_y_minus_Ax (a, ia, ja, x, w, status)
if (status .lt. 0) then
if (status .ne. -2) status = -3
goto 9999
endif
if (iparm(_JT_out_) .ge. 6) then
call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), w,
& 'JT_BCGS: INITIAL RESIDUAL (w):', status)
endif
c
c ... Initialize iteration counter.
iparm(_JT_iter_) = 0
c
c ... Compute || r0 ||.
rnormt(0) = JT_VectorNorm(iparm(_JT_norm_), ia(_JT_nrows_), w, status)
rnorm(0) = rnormt(0)
c
c ... Check to see if initial guess meets the convergence
c criterion.
if (rnorm(0) .le. rparm(_JT_tiny_)) then
status = 0
goto 9999
endif
c
c Skip initial check if using || x - xold || / || x || stopping
c test. Note that on entry iter = 0 and JT_CheckConvergence
c increments iparm(_JT_iter_).
if (iparm(_JT_stop_) .ne. _JT_stop_relchg_) then
call JT_CheckConvergence (bnorm, rnormt(0), rnorm(iparm(_JT_iter_)),
& x, xold, b, a, ia, ja, iparm, rparm, cpu(iparm(_JT_iter_)),
& err(iparm(_JT_iter_)), rnormt(iparm(_JT_iter_)), errt(iparm(_JT_iter_)),
& status)
if (status .le. 0) goto 9999
else
iparm(_JT_iter_) = 1
endif
c
c ... Initialize TFQMR parameters as appropriate.
theta = zero
theta2 = zero
eta = zero
if (iparm(_JT_method_) .eq. _JT_method_TFQMR_) then
tau = rnorm(0)
call JT_FillVectorFloat (ia(_JT_nrows_), zero, d, status)
else
tau = zero
endif
c
c ... Initialize r.
if (iparm(_JT_pre_) .ge. 0) then
call JT_ApplyPreconditioner (iparm, rparm,
& a, ia, ja, ap, iap, jap, w, r, status)
if (status .lt. 0) then
if (status .ne. -2) status = -3
goto 9999
endif
elseif (iparm(_JT_pre_) .eq. _JT_pre_reverse_) then
call JT_SwapVectors (ia(_JT_nrows_), w, b, status)
call JT_SwapVectors (ia(_JT_nrows_), r, x, status)
status = 1
return
endif
c
c ... Initialize q.
25 continue
call JT_y_eq_x (ia(_JT_nrows_), r, q, status) ! q = r
c
c ... Compute rhoold = (r,q).
rhoold = JT_DotProduct(ia(_JT_nrows_), r, q, status)
c
c ... First part of first iteration is special since beta is zero
c and there is no h vector yet.
call JT_y_eq_x (ia(_JT_nrows_), r, u, status) ! u = r
call JT_y_eq_x (ia(_JT_nrows_), r, g, status) ! g = r
if (iparm(_JT_out_) .ge. 6) then
call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), r,
& 'JT_BCGS: BEFORE START OF MAIN LOOP (r):', status)
endif
c
c ************* START MAIN LOOP *************
c
35 continue
if (iparm(_JT_out_) .ge. 6) then
write (iparm(_JT_luout_),*)
write (iparm(_JT_luout_),905) '*** ITERATION NUMBER: ', iparm(_JT_iter_)
call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), u,
& 'JT_BCGS: AFTER START OF MAIN LOOP (u):', status)
call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), g,
& 'JT_BCGS: AFTER START OF MAIN LOOP (g):', status)
endif
call JT_y_eq_Ax (a, ia, ja, g, work, status)
if (status .lt. 0) then
if (status .ne. -2) status = -3
goto 9999
endif
c
if (iparm(_JT_pre_) .ge. 0) then
call JT_ApplyPreconditioner (iparm, rparm,
& a, ia, ja, ap, iap, jap, work, w, status)
if (status .lt. 0) then
if (status .ne. -2) status = -3
goto 9999
endif
elseif (iparm(_JT_pre_) .eq. -1) then
call JT_SwapVectors (ia(_JT_nrows_), work, b, status)
call JT_SwapVectors (ia(_JT_nrows_), w, x, status)
status = 2
return
endif
c
45 continue
if (iparm(_JT_out_) .ge. 6) then
call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), w,
& 'JT_BCGS: AFTER FIRST PRECONDITIONING (w):', status)
endif
sigma = JT_DotProduct(ia(_JT_nrows_), q, w, status)
if (sigma .eq. zero) then
if (iparm(_JT_out_) .ge. 1) then
write(iparm(_JT_luerr_),*)
write(iparm(_JT_luerr_),901) '*********************************'
write(iparm(_JT_luerr_),901) 'JT_BCGS: ERROR'
write(iparm(_JT_luerr_),901) ' Breakdown (sigma = 0.0)'
write(iparm(_JT_luerr_),905) ' current iteration number: ', iparm(_JT_iter_)
write(iparm(_JT_luerr_),940) ' norm of residual at breakdown: ',
& rnorm(iparm(_JT_iter_))
write(iparm(_JT_luerr_),901) '*********************************'
endif
status = -5
goto 9999
endif
alpha = rhoold/sigma
c
call JT_z_eq_sy_plus_x (ia(_JT_nrows_), -alpha, u, w, h, status) ! h = u - alpha*w
c
call JT_z_eq_y_plus_x (ia(_JT_nrows_), u, h, v, status)
call JT_x_eq_sx (ia(_JT_nrows_), alpha, v, status) ! v = alpha*(u + h)
c
if (iparm(_JT_out_) .ge. 6) then
call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), h,
& 'JT_BCGS: BEFORE CALCULATION OF RESIDUAL (h):', status)
call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), v,
& 'JT_BCGS: BEFORE CALCULATION OF RESIDUAL (v):', status)
endif
c
call JT_y_eq_Ax (a, ia, ja, v, work, status)
if (status .lt. 0) then
if (status .ne. -2) status = -3
goto 9999
endif
c
if (iparm(_JT_pre_) .ge. 0) then
call JT_ApplyPreconditioner (iparm, rparm,
& a, ia, ja, ap, iap, jap, work, w, status)
if (status .lt. 0) then
if (status .ne. -2) status = -3
goto 9999
endif
elseif (iparm(_JT_pre_) .eq. -1) then
call JT_SwapVectors (ia(_JT_nrows_), work, b, status)
call JT_SwapVectors (ia(_JT_nrows_), w, x, status)
status = 3
return
endif
c
55 continue
c
c ... Compute BCGS residual: r = r - w
call JT_y_eq_y_minus_x (ia(_JT_nrows_), w, r, status)
if (iparm(_JT_out_) .ge. 6) then
call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), r,
& 'JT_BCGS: AFTER CALCULATION OF RESIDUAL (r):', status)
endif
c
c ... Compute || r ||.
rnorm(iparm(_JT_iter_)) = JT_VectorNorm(iparm(_JT_norm_), ia(_JT_nrows_), r, status)
c
if (iparm(_JT_method_) .eq. _JT_method_BCGS_) then
c
c .... Compute new BCGS iterate: x = x + v
call JT_y_eq_y_plus_x (ia(_JT_nrows_), v, x, status)
rnorm_iter = rnorm(iparm(_JT_iter_))
else
c
c .... Compute 1st TFQMR iterate and check convergence.
term = theta2*eta/alpha
call JT_y_eq_sy_plus_x (ia(_JT_nrows_), term, u, d, status) ! d = term*d + u
theta = SQRT(rnorm(MAX(0,iparm(_JT_iter_)-2))*rnorm(iparm(_JT_iter_)))/tau
theta2 = theta*theta
sigma = one / SQRT(one + theta2)
eta = sigma*sigma*alpha
call JT_y_eq_y_plus_sx (ia(_JT_nrows_), eta, d, x, status) ! x = x + eta*d
rnormqmr = tau*SQRT(REAL(iparm(_JT_iter_)+1))
call JT_CheckConvergence (bnorm, rnormt(0), rnormqmr,
& x, xold, b, a, ia, ja, iparm, rparm, cpu(iparm(_JT_iter_)),
& err(iparm(_JT_iter_)), rnormt(iparm(_JT_iter_)), errt(iparm(_JT_iter_)),
& status)
rnorm(iparm(_JT_iter_)) = rnorm(iparm(_JT_iter_)-1)
if (status.le.0 .or. theta.eq.zero) goto 9999
c
c .... Compute 2nd TFQMR iterate.
tau = tau*theta*sigma
term = theta2*eta/alpha
call JT_y_eq_sy_plus_x (ia(_JT_nrows_), term, h, d, status) ! d = term*d + h
theta = rnorm(iparm(_JT_iter_)-1) / tau
theta2 = theta*theta
sigma = one / SQRT(one + theta2)
tau = tau*theta*sigma
eta = sigma*sigma*alpha
call JT_y_eq_y_plus_sx (ia(_JT_nrows_), eta, d, x, status) ! x = x + eta*d
rnormqmr = tau*SQRT(REAL(iparm(_JT_iter_)))
rnorm_iter = rnormqmr
endif
c
c ... Check convergence.
call JT_CheckConvergence (bnorm, rnormt(0), rnorm_iter,
& x, xold, b, a, ia, ja, iparm, rparm, cpu(iparm(_JT_iter_)),
& err(iparm(_JT_iter_)), rnormt(iparm(_JT_iter_)), errt(iparm(_JT_iter_)),
& status)
if (iparm(_JT_method_) .eq. _JT_method_TFQMR_) then
rnorm(iparm(_JT_iter_)) = rnorm(iparm(_JT_iter_)-1)
endif
if (status .le. 0) goto 9999
c
c ... Begin new iteration.
rho = JT_DotProduct(ia(_JT_nrows_), q, r, status)
if (rho .eq. zero) then
if (iparm(_JT_out_) .ge. 1) then
write(iparm(_JT_luerr_),*)
write(iparm(_JT_luerr_),901) '*********************************'
write(iparm(_JT_luerr_),901) 'JT_BCGS: ERROR'
write(iparm(_JT_luerr_),901) ' Breakdown (rho = 0.0)'
write(iparm(_JT_luerr_),905) ' current iteration number: ', iparm(_JT_iter_)
write(iparm(_JT_luerr_),940) ' norm of residual at breakdown: ',
& rnorm(iparm(_JT_iter_))
write(iparm(_JT_luerr_),901) '*********************************'
endif
status = -5
goto 9999
endif
beta = rho/rhoold
rhoold = rho
c
call JT_z_eq_sy_plus_x (ia(_JT_nrows_), beta, r, h, u, status) ! u = beta*h + r
c
c The following two calls compute g = u + beta*(beta*g + h)
call JT_y_eq_sy_plus_x (ia(_JT_nrows_), beta, h, g, status) ! g = beta*g + h
call JT_y_eq_sy_plus_x (ia(_JT_nrows_), beta, u, g, status) ! g = beta*g + u
c
goto 35
c
9999 continue
c
c ... Subtract time at start of solution from each element of cpu array
c to obtain true CPU time for each iteration.
cpu(0) = cpu0
do i=1,iparm(_JT_iter_)
cpu(i) = cpu(i) - cpu(0)
enddo
#include "deallocate-BCGS.inc"
c
#include "formats.inc"
return
end