GMRES.F
#ifdef strict_f77
#define _h_(i,j) h(IJ2I(iparm(_JT_nold_)+1,i,j))
#define _v_(i,j) v(IJ2I(ia(_JT_nrows_),i,j))
#else
#define _h_(i,j) h(i,j)
#define _v_(i,j) v(i,j)
#endif
#include "parmdefines.h"
#include "iadefines.h"
c***********************************************************************
#include "author.inc"
c* $Id: GMRES.F,v 1.47 1997/01/01 19:46:23 turner Exp $
c*
c* Computes the solution to a nonsymmetric system of linear
c* equations of the form Ax=b by the restarted Generalized Minimal
c* Residual method, GMRES(k), where k is the frequency of restart.
c*
c* An error message is written out and control is returned to the
c* calling routine if a breakdown in iterates is encountered or if
c* the maximum number of iterations is exceeded. At that time,
c* err(iter) can be examined to determine if execution can continue.
c*
c* <PARAMETER LIST>
c*
c* Input:
c* b - source vector
c* a - coefficient matrix
c* ia - integer vector containing info about how "a" is stored
c* NOTE: see description of ia below
c* ja - integer array that maps columns in a to true columns
c*
c* In/Out:
c* iparm - array of integer parameters
c* rparm - array of floating point parameters
c* NOTE: see description of iparm and rparm below
c* x - solution vector (whatever is in x on entry is used as an
c* initial guess)
c*
c* Output:
c* cpu - cumulative cpu time after each iteration
c* rnormt - norm of true residual
c* errt - error estimate based on true residual
c* rnorm - norm of residual at each iteration
c* err - error estimate at each iteration
c* ap - preconditioner
c* iap - integer vector containing info about how "ap" is stored
c* NOTE: see description of ia below
c* jap - same as ja, but for preconditioner
c* status - return status:
c* -4 ==> iparm(_JT_itmax_) exceeded
c* -3 ==> internal error
c* -2 ==> memory allocation failure
c* -1 ==> invalid argument(s)
c* 0 ==> convergence was achieved
c* >0 ==> preconditioning needed
c*
#include "parmdesc.inc"
c*
#include "iadesc.inc"
c*
c* Variables in iap are analogous to those in ia.
c*
c* <SUBROUTINES REQUIRED>
c*
c* JT_ApplyPreconditioner
c* JT_CheckConvergence
c* JT_Clock
c* JT_FlushUnit
c* JT_GeneratePreconditioner
c* JT_SolveUTriang_Full
c* JT_SwapVectors
c* JT_WriteMatrix
c* JT_WriteSystem
c* JT_WriteVectorFloat
c* JT_x_eq_sx
c* JT_y_eq_Ax
c* JT_y_eq_x
c* JT_y_eq_y_minus_Ax
c* JT_y_eq_y_plus_sx
c*
c* <FUNCTIONS REQUIRED>
c*
c* JT_DotProduct
c* JT_MatrixNorm
c* JT_VectorNorm
c*
#include "arrays-GMRES.inc"
c*
#include "copyright.inc"
c***********************************************************************
subroutine JT_GMRES (b, a, ia, ja, iparm, rparm, x,
& cpu, rnormt, errt, rnorm, err, ap, iap, jap, status)
implicit none
c
c ... Input:
integer ia(_JT_no_of_storage_parameters_), ja(*)
real a(*)
c
c ... In/Out:
integer iap(_JT_no_of_storage_parameters_), jap(*)
integer iparm(_JT_no_of_iparms_)
real rparm(_JT_no_of_rparms_)
#ifdef strict_f77
real x(*), b(*)
#else
real x(ia(_JT_nrows_)), b(ia(_JT_nrows_))
#endif
real ap(*)
c
c ... Output:
integer status
#ifdef strict_f77
real cpu(0:*), rnorm(0:*), err(0:*), rnormt(0:*), errt(0:*)
#else
real cpu(0:iparm(_JT_itmax_))
real rnorm(0:iparm(_JT_itmax_)), err(0:iparm(_JT_itmax_))
real rnormt(0:iparm(_JT_itmax_)), errt(0:iparm(_JT_itmax_))
#endif
c
c ... Local:
integer i, j, k, n, tmp_status
#ifdef strict_f77
integer IJ2I
#endif
real zero, one, bnorm, gamma, hijp, hipjp, hjjinv,
& h_squared, gamma_squared, cpu0
real JT_DotProduct, JT_MatrixNorm, JT_VectorNorm
#include "declare-GMRES.inc"
c
c ... Parameters.
parameter (zero=0.0d0, one=1.0d0)
c
c ... Ensure that all variables are saved.
save
c
c ... Check to see if re-entering after preconditioning.
if (status .eq. 1) then
call JT_SwapVectors (ia(_JT_nrows_), w, b, status)
call JT_SwapVectors (ia(_JT_nrows_), _v_(1,1), x, status)
goto 45
elseif (status .eq. 2) then
call JT_SwapVectors (ia(_JT_nrows_), w, b, status)
call JT_SwapVectors (ia(_JT_nrows_), _v_(1,j+1), x, status)
goto 55
endif
c
c ... Obtain CPU time at start of solution.
call JT_Clock (1, cpu0, status)
c
c ... Check value of nold.
if (iparm(_JT_nold_) .le. zero) then
if (iparm(_JT_out_) .ge. 1) then
write(iparm(_JT_luerr_),*)
write(iparm(_JT_luerr_),901) '**************************************'
write(iparm(_JT_luerr_),901) 'JT_GMRES: ERROR'
write(iparm(_JT_luerr_),909) ' Invalid value for nold: ', iparm(_JT_nold_)
write(iparm(_JT_luerr_),901) ' (value must be >0)'
write(iparm(_JT_luerr_),901) '**************************************'
endif
status = -1
goto 9999
endif
c
c ... Let k be a synonym for nold.
k = iparm(_JT_nold_)
c
#include "allocate-GMRES.inc"
c
c ... Initialize dynamically-allocated array v if preconditioning
c via reverse communication.
#ifdef strict_f77
n = ia(_JT_nrows_)*(iparm(_JT_nold_)+1)
call JT_FillVectorFloat (n, zero, v, status)
#else
do j=1,iparm(_JT_nold_)+1
call JT_FillVectorFloat (ia(_JT_nrows_), zero, v(1,j), status)
enddo
#endif
c
c ... Write initial system if desired.
if (iparm(_JT_out_) .ge. 4) then
call JT_WriteSystem (iparm(_JT_luout_), a, ia, ja, x, b,
& 'JT_GMRES: ON ENTRY (a, x, b):', status)
endif
c
c ... Initialize cpu time and residual and error norm arrays.
call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, cpu, status)
call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, err, status)
call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, errt, status)
call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, rnorm, status)
call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, rnormt, status)
c
c ... Initialization dependent on the stopping test.
c
c Compute || b || if needed.
if (ABS(iparm(_JT_stop_)).eq._JT_stop_axb_ .or.
& ABS(iparm(_JT_stop_)).eq._JT_stop_b_) then
bnorm = JT_VectorNorm(iparm(_JT_norm_), ia(_JT_nrows_), b, status)
else
bnorm = zero
endif
c
c Copy initial guess into xt if needed for stopping test.
if (ABS(iparm(_JT_stop_)).eq._JT_stop_axb_ .or.
& ABS(iparm(_JT_stop_)).eq._JT_stop_x_) then
call JT_y_eq_x (ia(_JT_nrows_), x, xt, tmp_status)
endif
c
rparm(_JT_anorm_) = zero
if (iparm(_JT_stop_) .eq. _JT_stop_relchg_) then
c
c Initialize xold if using || x - xold || / || x || stopping test,
c which is actually completely inappropriate for GMRES.
call JT_y_eq_x (ia(_JT_nrows_), x, xold, status)
elseif (ABS(iparm(_JT_stop_)) .eq. _JT_stop_axb_) then
c
c Compute || A || if needed.
rparm(_JT_anorm_) = JT_MatrixNorm(iparm(_JT_norm_), a, ia, ja, status)
if (status .eq. -2) goto 9999
endif
c
c ... Construct preconditioner as appropriate and write out if
c desired.
if (iparm(_JT_pre_).gt.0 .and. iparm(_JT_prein_).eq._JT_prein_no_) then
call JT_GeneratePreconditioner (iparm, rparm,
& a, ia, ja, ap, iap, jap, status)
if (status .lt. 0) then
status = -3
goto 9999
endif
endif
c
c ... Initialize iteration counter.
iparm(_JT_iter_) = 0
c
c ************* START OUTER LOOP *************
c
c ... Compute residual (w=b-Ax) and write out if desired.
35 continue
call JT_y_eq_x (ia(_JT_nrows_), b, w, status)
call JT_y_eq_y_minus_Ax (a, ia, ja, x, w, status)
if (status .lt. 0) then
status = -3
goto 9999
endif
if (iparm(_JT_out_) .ge. 6) then
call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), w,
& 'JT_GMRES: AT START OF OUTER LOOP (w):', status)
endif
c
c ... If this is first iteration, compute || r0 ||.
if (iparm(_JT_iter_) .eq. 0) then
rnormt(0) = JT_VectorNorm(iparm(_JT_norm_), ia(_JT_nrows_), w, status)
rnorm(0) = rnormt(0)
c
c Check initial residual.
if (rnorm(0) .le. rparm(_JT_tiny_)) then
c
c If the residual norm is smaller than rparm(_JT_tiny_), simply
c declare victory.
status = 0
goto 9999
elseif (iparm(_JT_stop_) .ne. _JT_stop_relchg_) then
c
c Otherwise check to see if initial guess meets convergence
c criterion. Skip this if using the || x - xold || / || x ||
c stopping test. Note that on entry iter = 0 and
c JT_CheckConvergence increments iter.
call JT_CheckConvergence (bnorm, rnormt(0), rnorm(0),
& x, xold, b, a, ia, ja, iparm, rparm, cpu(0),
& err(0), rnormt(0), errt(0),
& status)
if (status .le. 0) goto 9999
else
iparm(_JT_iter_) = 1
endif
endif
c
c ... Initialize directions and write out if desired.
if (iparm(_JT_pre_) .ge. 0) then
call JT_ApplyPreconditioner (iparm, rparm, a, ia, ja,
& ap, iap, jap, w, _v_(1,1), status)
if (status .lt. 0) then
status = -3
goto 9999
endif
elseif (iparm(_JT_pre_) .eq. _JT_pre_reverse_) then
call JT_SwapVectors (ia(_JT_nrows_), w, b, status)
call JT_SwapVectors (ia(_JT_nrows_), _v_(1,1), x, status)
status = 1
return
endif
c
45 continue
if (iparm(_JT_out_) .ge. 6) then
call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), _v_(1,1),
& 'JT_GMRES: JUST BEFORE INNER LOOP (v):', status)
endif
c
c ... Clear vector that will hold residual norm estimates and compute
c initial estimate.
call JT_FillVectorFloat (iparm(_JT_nold_)+1, zero, r, status)
r(1) = JT_VectorNorm(iparm(_JT_norm_), ia(_JT_nrows_), _v_(1,1), status)
if (r(1) .eq. zero) then
status = 0
goto 9999
endif
_h_(1,1) = r(1)
c
c ************* START INNER LOOP *************
c
c ... This should be a do loop over j from 1 to k, but since reverse
c communication for preconditioning requires jumping to a point
c within this loop, which is not allowed, it had to be kludged.
j = 0
50 continue
j = j + 1
if (j .gt. k) goto 999
c
if (iparm(_JT_out_) .ge. 6) then
write (iparm(_JT_luout_),*)
write (iparm(_JT_luout_),905) '*** ITERATION NUMBER: ', iparm(_JT_iter_)
call JT_FlushUnit (iparm(_JT_luout_), status)
endif
c
hjjinv = one / _h_(j,j)
c
c .... Step 1 (Kernel 2).
call JT_y_eq_Ax (a, ia, ja, _v_(1,j), w, status)
if (status .lt. 0) then
status = -3
goto 9999
endif
call JT_x_eq_sx (ia(_JT_nrows_), hjjinv, w, status)
if (iparm(_JT_out_) .ge. 6) then
call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), w,
& 'JT_GMRES: AFTER START OF INNER LOOP (w):', status)
endif
c
c .... Apply preconditioner.
if (iparm(_JT_pre_) .ge. 0) then
call JT_ApplyPreconditioner (iparm, rparm, a, ia, ja,
& ap, iap, jap, w, _v_(1,j+1), status)
if (status .lt. 0) then
status = -3
goto 9999
endif
elseif (iparm(_JT_pre_) .eq. _JT_pre_reverse_) then
call JT_SwapVectors (ia(_JT_nrows_), w, b, status)
call JT_SwapVectors (ia(_JT_nrows_), _v_(1,j+1), x, status)
status = 2
return
endif
c
55 continue
if (iparm(_JT_out_) .ge. 6) then
call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), _v_(1,j+1),
& 'JT_GMRES: AFTER PRECONDITIONING (v):', status)
endif
c
_h_(1,j+1) = JT_DotProduct(ia(_JT_nrows_),
& _v_(1,1), _v_(1,j+1), status)
c
c .... Step 2 (Kernel 3).
do i=1,j-1
call JT_y_eq_y_plus_sx (ia(_JT_nrows_), -_h_(i,j+1),
& _v_(1,i), _v_(1,j+1), status)
_h_(i+1,j+1) = JT_DotProduct(ia(_JT_nrows_),
& _v_(1,i+1), _v_(1,j+1), status)
enddo
if (iparm(_JT_out_) .ge. 6) then
call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), _v_(1,j+1),
& 'JT_GMRES: AFTER STEP 2 (v):', status)
endif
c
c .... Step 3.
_h_(j,j+1) = _h_(j,j+1)*hjjinv
c
c .... Step 4 (Kernel 4).
call JT_x_eq_sx (ia(_JT_nrows_), hjjinv, _v_(1,j), status)
call JT_y_eq_y_plus_sx (ia(_JT_nrows_), -_h_(j,j+1),
& _v_(1,j), _v_(1,j+1), status)
_h_(j+1,j+1) = JT_DotProduct(ia(_JT_nrows_),
& _v_(1,j+1), _v_(1,j+1), status)
if (iparm(_JT_out_) .ge. 6) then
call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), _v_(1,j+1),
& 'JT_GMRES: AFTER STEP 4 (v):', status)
endif
c
c .... Step 5.
h_squared = _h_(j+1,j+1)
if (h_squared .lt. zero) then
if (iparm(_JT_out_) .ge. 1) then
write(iparm(_JT_luerr_),*)
write(iparm(_JT_luerr_),901) '****************************************'
write(iparm(_JT_luerr_),901) 'JT_GMRES: ERROR'
write(iparm(_JT_luerr_),901) ' Breakdown in calculation of'//
& 'h(j+1,j+1)'
write(iparm(_JT_luerr_),901) ' This *really* should not happen,'//
& ' and may indicate an input error'
write(iparm(_JT_luerr_),905) ' current iteration number: ', iparm(_JT_iter_)
write(iparm(_JT_luerr_),940) ' norm of residual at breakdown: ',
& rnorm(iparm(_JT_iter_))
write(iparm(_JT_luerr_),901) '****************************************'
endif
status = -5
goto 9999
endif
_h_(j+1,j+1) = SQRT(h_squared)
c
c .... Step 6.
do i=1,j-1
c
c 2x2 matrix multiplication.
hijp = _h_(i,j+1)
hipjp = _h_(i+1,j+1)
_h_(i,j+1) = c(i)*hijp + s(i)*hipjp
_h_(i+1,j+1) = -s(i)*hijp + c(i)*hipjp
enddo
c
c Compute new Gram-Schmidt parameters.
gamma_squared =
& _h_(j,j+1)*_h_(j,j+1) +
& _h_(j+1,j+1)*_h_(j+1,j+1)
if (gamma_squared .le. zero) then
if (iparm(_JT_out_) .ge. 1) then
write(iparm(_JT_luerr_),*)
write(iparm(_JT_luerr_),901) '****************************************'
write(iparm(_JT_luerr_),901) 'JT_GMRES: ERROR'
write(iparm(_JT_luerr_),901) ' Breakdown in calculation of Gram-'//
& 'Schmidt parameters'
write(iparm(_JT_luerr_),901) ' This *really* should not happen,'//
& ' and may indicate an input error'
write(iparm(_JT_luerr_),905) ' current iteration number: ', iparm(_JT_iter_)
write(iparm(_JT_luerr_),940) ' norm of residual at breakdown: ',
& rnorm(iparm(_JT_iter_))
write(iparm(_JT_luerr_),901) '****************************************'
endif
status = -5
goto 9999
endif
gamma = SQRT(gamma_squared)
c(j) = _h_(j,j+1) / gamma
s(j) = _h_(j+1,j+1) / gamma
c
c Compute residual norm estimate.
r(j+1) = -s(j)*r(j)
rnorm(iparm(_JT_iter_)) = ABS(r(j+1))
r(j) = c(j)*r(j)
c
_h_(j,j+1) = c(j)*_h_(j,j+1) +
& s(j)*_h_(j+1,j+1)
c
c .... Solve upper triangular system and compute new iterate *only*
c if needed for output, stopping test, or to calculate the true
c residual and error estimate. Note that this is costly.
c
c The solution is only updated each iteration for stopping test
c || x - xold || / || x ||. For the other stopping tests
c needing || x ||, the solution from the last outer iteration is
c used.
if (iparm(_JT_out_).ge.5 .or.
& iparm(_JT_stop_).eq._JT_stop_relchg_ .or.
& iparm(_JT_resid_).eq.1 .or.
& (iparm(_JT_iter_).le.5 .and. (iparm(_JT_stop_).lt.0 .or.
& iparm(_JT_stop_).eq.1 .or.
& iparm(_JT_stop_).eq.3))) then
call JT_y_eq_x (j, r, rt, status)
call JT_SolveUTriang_Full
& (0, 0, k+1, j, _h_(1,2), rt, status)
if (status .lt. 0) then
status = -3
goto 9999
endif
call JT_y_eq_x (ia(_JT_nrows_), x, xt, status)
do i=1,j
call JT_y_eq_y_plus_sx (ia(_JT_nrows_), rt(i), _v_(1,i), xt,
& status)
enddo
endif
c
c .... Check for convergence.
call JT_CheckConvergence (bnorm, rnormt(0), rnorm(iparm(_JT_iter_)),
& xt, xold, b, a, ia, ja, iparm, rparm, cpu(iparm(_JT_iter_)),
& err(iparm(_JT_iter_)), rnormt(iparm(_JT_iter_)), errt(iparm(_JT_iter_)),
& status)
if (status .le. 0) then
if (status .eq. -2) then
c
c Memory allocation failure, so punt.
goto 9999
else
c
c Either convergence or some other failure, so compute new
c iterate and return.
goto 999
endif
endif
c
c .... Update xold if using || x - xold || / || x || stopping test.
if (iparm(_JT_stop_) .eq. 0) then
call JT_y_eq_x (ia(_JT_nrows_), xt, xold, tmp_status)
endif
c
c ... End of inner loop.
goto 50
999 continue
c
c ... Update iterate.
if (iparm(_JT_out_).ge.5 .or.
& iparm(_JT_stop_).eq._JT_stop_relchg_ .or.
& iparm(_JT_resid_).eq.1) then
c
c May have already done it.
call JT_y_eq_x (ia(_JT_nrows_), xt, x, tmp_status)
else
c
c If not, first solve upper triangular system.
call JT_SolveUTriang_Full (0, 0, k+1, MIN(j,k),
& _h_(1,2), r, tmp_status)
if (tmp_status .lt. 0) then
status = -3
goto 9999
endif
c
c Now update iterate.
do i=1,MIN(j,k)
call JT_y_eq_y_plus_sx (ia(_JT_nrows_), r(i), _v_(1,i), x,
& tmp_status)
enddo
c
c Copy iterate into xt if needed for stopping test.
if (ABS(iparm(_JT_stop_)).eq._JT_stop_axb_ .or.
& ABS(iparm(_JT_stop_)).eq._JT_stop_x_) then
call JT_y_eq_x (ia(_JT_nrows_), x, xt, tmp_status)
endif
endif
c
c ... End of outer loop.
if (status .gt. 0) goto 35
c
9999 continue
c
c ... Subtract time at start of solution from each element of cpu array
c to obtain true CPU time for each iteration.
cpu(0) = cpu0
do i=1,iparm(_JT_iter_)
cpu(i) = cpu(i) - cpu(0)
enddo
#include "deallocate-GMRES.inc"
c
#include "formats.inc"
return
end