GMRES.F


#ifdef strict_f77
#define _h_(i,j) h(IJ2I(iparm(_JT_nold_)+1,i,j))
#define _v_(i,j) v(IJ2I(ia(_JT_nrows_),i,j))
#else
#define _h_(i,j) h(i,j)
#define _v_(i,j) v(i,j)
#endif
#include "parmdefines.h"
#include "iadefines.h"
c***********************************************************************
#include "author.inc"
c*    $Id: GMRES.F,v 1.47 1997/01/01 19:46:23 turner Exp $
c*
c*    Computes the solution to a nonsymmetric system of linear
c*    equations of the form Ax=b by the restarted Generalized Minimal 
c*    Residual method, GMRES(k), where k is the frequency of restart.
c*
c*    An error message is written out and control is returned to the
c*    calling routine if a breakdown in iterates is encountered or if
c*    the maximum number of iterations is exceeded.  At that time,
c*    err(iter) can be examined to determine if execution can continue.
c*
c*    <PARAMETER LIST>
c*
c*     Input:
c*      b - source vector
c*      a - coefficient matrix
c*      ia - integer vector containing info about how "a" is stored
c*        NOTE: see description of ia below
c*      ja - integer array that maps columns in a to true columns
c*
c*     In/Out:
c*      iparm - array of integer parameters
c*      rparm - array of floating point parameters
c*        NOTE: see description of iparm and rparm below
c*      x - solution vector (whatever is in x on entry is used as an
c*          initial guess)
c*
c*     Output:
c*      cpu - cumulative cpu time after each iteration
c*      rnormt - norm of true residual
c*      errt - error estimate based on true residual
c*      rnorm - norm of residual at each iteration
c*      err - error estimate at each iteration
c*      ap - preconditioner
c*      iap - integer vector containing info about how "ap" is stored
c*        NOTE: see description of ia below
c*      jap - same as ja, but for preconditioner
c*      status - return status:
c*        -4  ==>  iparm(_JT_itmax_) exceeded
c*        -3  ==>  internal error
c*        -2  ==>  memory allocation failure
c*        -1  ==>  invalid argument(s)
c*         0  ==>  convergence was achieved
c*        >0  ==>  preconditioning needed
c*
#include "parmdesc.inc"
c*
#include "iadesc.inc"
c*
c*     Variables in iap are analogous to those in ia.
c*
c*    <SUBROUTINES REQUIRED>
c*
c*     JT_ApplyPreconditioner
c*     JT_CheckConvergence
c*     JT_Clock
c*     JT_FlushUnit
c*     JT_GeneratePreconditioner
c*     JT_SolveUTriang_Full
c*     JT_SwapVectors
c*     JT_WriteMatrix
c*     JT_WriteSystem
c*     JT_WriteVectorFloat
c*     JT_x_eq_sx
c*     JT_y_eq_Ax
c*     JT_y_eq_x
c*     JT_y_eq_y_minus_Ax
c*     JT_y_eq_y_plus_sx
c*
c*    <FUNCTIONS REQUIRED>
c*
c*     JT_DotProduct
c*     JT_MatrixNorm
c*     JT_VectorNorm
c*
#include "arrays-GMRES.inc"
c*
#include "copyright.inc"
c***********************************************************************
      subroutine JT_GMRES (b, a, ia, ja, iparm, rparm, x,
     &           cpu, rnormt, errt, rnorm, err, ap, iap, jap, status)
      implicit none
c
c ... Input:
      integer ia(_JT_no_of_storage_parameters_), ja(*)
      real a(*)
c
c ... In/Out:
      integer iap(_JT_no_of_storage_parameters_), jap(*)
      integer iparm(_JT_no_of_iparms_)
      real rparm(_JT_no_of_rparms_)
#ifdef strict_f77
      real x(*), b(*)
#else
      real x(ia(_JT_nrows_)), b(ia(_JT_nrows_))
#endif
      real ap(*)
c
c ... Output:
      integer status
#ifdef strict_f77
      real cpu(0:*), rnorm(0:*), err(0:*), rnormt(0:*), errt(0:*)
#else
      real cpu(0:iparm(_JT_itmax_))
      real rnorm(0:iparm(_JT_itmax_)), err(0:iparm(_JT_itmax_))
      real rnormt(0:iparm(_JT_itmax_)), errt(0:iparm(_JT_itmax_))
#endif
c
c ... Local:
      integer i, j, k, n, tmp_status
#ifdef strict_f77
      integer IJ2I
#endif
      real zero, one, bnorm, gamma, hijp, hipjp, hjjinv,
     &     h_squared, gamma_squared, cpu0
      real JT_DotProduct, JT_MatrixNorm, JT_VectorNorm
#include "declare-GMRES.inc"
c
c ... Parameters.
      parameter (zero=0.0d0, one=1.0d0)
c
c ... Ensure that all variables are saved.
      save
c
c ... Check to see if re-entering after preconditioning.
      if (status .eq. 1) then
       call JT_SwapVectors (ia(_JT_nrows_), w, b, status)
       call JT_SwapVectors (ia(_JT_nrows_), _v_(1,1), x, status)
       goto 45
      elseif (status .eq. 2) then
       call JT_SwapVectors (ia(_JT_nrows_), w, b, status)
       call JT_SwapVectors (ia(_JT_nrows_), _v_(1,j+1), x, status)
       goto 55
      endif
c
c ... Obtain CPU time at start of solution.
      call JT_Clock (1, cpu0, status)
c
c ... Check value of nold.
      if (iparm(_JT_nold_) .le. zero) then
       if (iparm(_JT_out_) .ge. 1) then
        write(iparm(_JT_luerr_),*)
        write(iparm(_JT_luerr_),901) '**************************************'
        write(iparm(_JT_luerr_),901) 'JT_GMRES: ERROR'
        write(iparm(_JT_luerr_),909) '  Invalid value for nold: ', iparm(_JT_nold_)
        write(iparm(_JT_luerr_),901) '  (value must be >0)'
        write(iparm(_JT_luerr_),901) '**************************************'
       endif
       status = -1
       goto 9999
      endif
c
c ... Let k be a synonym for nold.
      k = iparm(_JT_nold_)
c
#include "allocate-GMRES.inc"
c
c ... Initialize dynamically-allocated array v if preconditioning
c     via reverse communication.
#ifdef strict_f77
      n = ia(_JT_nrows_)*(iparm(_JT_nold_)+1)
      call JT_FillVectorFloat (n, zero, v, status)
#else
      do j=1,iparm(_JT_nold_)+1
       call JT_FillVectorFloat (ia(_JT_nrows_), zero, v(1,j), status)
      enddo
#endif
c
c ... Write initial system if desired.
      if (iparm(_JT_out_) .ge. 4) then
       call JT_WriteSystem (iparm(_JT_luout_), a, ia, ja, x, b, 
     &     'JT_GMRES: ON ENTRY (a, x, b):', status)
      endif
c
c ... Initialize cpu time and residual and error norm arrays.
      call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, cpu, status)
      call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, err, status)
      call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, errt, status)
      call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, rnorm, status)
      call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, rnormt, status)
c
c ... Initialization dependent on the stopping test.
c
c     Compute || b || if needed.
      if (ABS(iparm(_JT_stop_)).eq._JT_stop_axb_ .or.
     &    ABS(iparm(_JT_stop_)).eq._JT_stop_b_) then
       bnorm = JT_VectorNorm(iparm(_JT_norm_), ia(_JT_nrows_), b, status)
      else
       bnorm = zero
      endif
c
c     Copy initial guess into xt if needed for stopping test.
      if (ABS(iparm(_JT_stop_)).eq._JT_stop_axb_ .or.
     &    ABS(iparm(_JT_stop_)).eq._JT_stop_x_) then
       call JT_y_eq_x (ia(_JT_nrows_), x, xt, tmp_status)
      endif
c
      rparm(_JT_anorm_) = zero
      if (iparm(_JT_stop_) .eq. _JT_stop_relchg_) then
c
c      Initialize xold if using || x - xold || / || x || stopping test,
c      which is actually completely inappropriate for GMRES.
       call JT_y_eq_x (ia(_JT_nrows_), x, xold, status)
      elseif (ABS(iparm(_JT_stop_)) .eq. _JT_stop_axb_) then
c
c      Compute || A || if needed.
       rparm(_JT_anorm_) = JT_MatrixNorm(iparm(_JT_norm_), a, ia, ja, status)
       if (status .eq. -2) goto 9999
      endif
c
c ... Construct preconditioner as appropriate and write out if
c     desired.
      if (iparm(_JT_pre_).gt.0 .and. iparm(_JT_prein_).eq._JT_prein_no_) then
       call JT_GeneratePreconditioner (iparm, rparm, 
     &      a, ia, ja, ap, iap, jap, status)
       if (status .lt. 0) then
        status = -3
        goto 9999
       endif
      endif
c
c ... Initialize iteration counter.
      iparm(_JT_iter_) = 0
c
c ************* START OUTER LOOP *************
c
c ... Compute residual (w=b-Ax) and write out if desired.
   35 continue
      call JT_y_eq_x (ia(_JT_nrows_), b, w, status)
      call JT_y_eq_y_minus_Ax (a, ia, ja, x, w, status)
      if (status .lt. 0) then
       status = -3
       goto 9999
      endif
      if (iparm(_JT_out_) .ge. 6) then
       call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), w, 
     &     'JT_GMRES: AT START OF OUTER LOOP (w):', status)
      endif
c
c ... If this is first iteration, compute || r0 ||.
      if (iparm(_JT_iter_) .eq. 0) then
       rnormt(0) = JT_VectorNorm(iparm(_JT_norm_), ia(_JT_nrows_), w, status)
       rnorm(0) = rnormt(0)
c
c      Check initial residual.
       if (rnorm(0) .le. rparm(_JT_tiny_)) then
c
c       If the residual norm is smaller than rparm(_JT_tiny_), simply
c       declare victory.
        status = 0
        goto 9999
       elseif (iparm(_JT_stop_) .ne. _JT_stop_relchg_) then
c
c       Otherwise check to see if initial guess meets convergence
c       criterion.  Skip this if using the || x - xold || / || x ||
c       stopping test.  Note that on entry iter = 0 and
c       JT_CheckConvergence increments iter.  
        call JT_CheckConvergence (bnorm, rnormt(0), rnorm(0),
     &       x, xold, b, a, ia, ja, iparm, rparm, cpu(0),
     &       err(0), rnormt(0), errt(0),
     &       status)
        if (status .le. 0) goto 9999
       else
        iparm(_JT_iter_) = 1
       endif
      endif
c
c ... Initialize directions and write out if desired.
      if (iparm(_JT_pre_) .ge. 0) then
       call JT_ApplyPreconditioner (iparm, rparm, a, ia, ja,
     &      ap, iap, jap, w, _v_(1,1), status)
       if (status .lt. 0) then
        status = -3
        goto 9999
       endif
      elseif (iparm(_JT_pre_) .eq. _JT_pre_reverse_) then
       call JT_SwapVectors (ia(_JT_nrows_), w, b, status)
       call JT_SwapVectors (ia(_JT_nrows_), _v_(1,1), x, status)
       status = 1
       return
      endif
c
   45 continue
      if (iparm(_JT_out_) .ge. 6) then
       call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), _v_(1,1), 
     &     'JT_GMRES: JUST BEFORE INNER LOOP (v):', status)
      endif
c
c ... Clear vector that will hold residual norm estimates and compute
c     initial estimate.
      call JT_FillVectorFloat (iparm(_JT_nold_)+1, zero, r, status)
      r(1) = JT_VectorNorm(iparm(_JT_norm_), ia(_JT_nrows_), _v_(1,1), status)
      if (r(1) .eq. zero) then
       status = 0
       goto 9999
      endif
      _h_(1,1) = r(1)
c
c ************* START INNER LOOP *************
c
c ... This should be a do loop over j from 1 to k, but since reverse
c     communication for preconditioning requires jumping to a point
c     within this loop, which is not allowed, it had to be kludged.
      j = 0
   50 continue
      j = j + 1
      if (j .gt. k) goto 999
c
       if (iparm(_JT_out_) .ge. 6) then
        write (iparm(_JT_luout_),*)
        write (iparm(_JT_luout_),905) '*** ITERATION NUMBER: ', iparm(_JT_iter_)
        call JT_FlushUnit (iparm(_JT_luout_), status)
       endif
c
       hjjinv = one / _h_(j,j)
c
c .... Step 1 (Kernel 2).
       call JT_y_eq_Ax (a, ia, ja, _v_(1,j), w, status)
       if (status .lt. 0) then
        status = -3
        goto 9999
       endif
       call JT_x_eq_sx (ia(_JT_nrows_), hjjinv, w, status)
       if (iparm(_JT_out_) .ge. 6) then
        call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), w, 
     &      'JT_GMRES: AFTER START OF INNER LOOP (w):', status)
       endif
c
c .... Apply preconditioner.
       if (iparm(_JT_pre_) .ge. 0) then
        call JT_ApplyPreconditioner (iparm, rparm, a, ia, ja,
     &       ap, iap, jap, w, _v_(1,j+1), status)
        if (status .lt. 0) then
         status = -3
         goto 9999
        endif
       elseif (iparm(_JT_pre_) .eq. _JT_pre_reverse_) then
        call JT_SwapVectors (ia(_JT_nrows_), w, b, status)
        call JT_SwapVectors (ia(_JT_nrows_), _v_(1,j+1), x, status)
        status = 2
        return
       endif
c
   55  continue
       if (iparm(_JT_out_) .ge. 6) then
        call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), _v_(1,j+1), 
     &      'JT_GMRES: AFTER PRECONDITIONING (v):', status)
       endif
c
       _h_(1,j+1) = JT_DotProduct(ia(_JT_nrows_), 
     &            _v_(1,1), _v_(1,j+1), status)
c
c .... Step 2 (Kernel 3).
       do i=1,j-1
        call JT_y_eq_y_plus_sx (ia(_JT_nrows_), -_h_(i,j+1), 
     &       _v_(1,i), _v_(1,j+1), status)
        _h_(i+1,j+1) = JT_DotProduct(ia(_JT_nrows_),
     &               _v_(1,i+1), _v_(1,j+1), status)
       enddo
       if (iparm(_JT_out_) .ge. 6) then
        call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), _v_(1,j+1), 
     &      'JT_GMRES: AFTER STEP 2 (v):', status)
       endif
c
c .... Step 3.
       _h_(j,j+1) = _h_(j,j+1)*hjjinv
c
c .... Step 4 (Kernel 4).
       call JT_x_eq_sx (ia(_JT_nrows_), hjjinv, _v_(1,j), status)
       call JT_y_eq_y_plus_sx (ia(_JT_nrows_), -_h_(j,j+1),
     &      _v_(1,j), _v_(1,j+1), status)
       _h_(j+1,j+1) = JT_DotProduct(ia(_JT_nrows_), 
     &              _v_(1,j+1), _v_(1,j+1), status)
       if (iparm(_JT_out_) .ge. 6) then
        call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), _v_(1,j+1), 
     &      'JT_GMRES: AFTER STEP 4 (v):', status)
       endif
c
c .... Step 5.
       h_squared = _h_(j+1,j+1)
       if (h_squared .lt. zero) then
        if (iparm(_JT_out_) .ge. 1) then
         write(iparm(_JT_luerr_),*)
         write(iparm(_JT_luerr_),901) '****************************************'
         write(iparm(_JT_luerr_),901) 'JT_GMRES: ERROR'
         write(iparm(_JT_luerr_),901) '  Breakdown in calculation of'//
     &                      'h(j+1,j+1)'
         write(iparm(_JT_luerr_),901) '    This *really* should not happen,'//
     &                       ' and may indicate an input error'
         write(iparm(_JT_luerr_),905) '  current iteration number: ', iparm(_JT_iter_)
         write(iparm(_JT_luerr_),940) '  norm of residual at breakdown: ',
     &                       rnorm(iparm(_JT_iter_))
         write(iparm(_JT_luerr_),901) '****************************************'
        endif
        status = -5
        goto 9999
       endif
       _h_(j+1,j+1) = SQRT(h_squared)
c
c .... Step 6.
       do i=1,j-1
c
c       2x2 matrix multiplication.
        hijp = _h_(i,j+1)
        hipjp = _h_(i+1,j+1)
        _h_(i,j+1) = c(i)*hijp + s(i)*hipjp
        _h_(i+1,j+1) = -s(i)*hijp + c(i)*hipjp
       enddo
c
c      Compute new Gram-Schmidt parameters.
       gamma_squared =
     &    _h_(j,j+1)*_h_(j,j+1) +
     &    _h_(j+1,j+1)*_h_(j+1,j+1)
       if (gamma_squared .le. zero) then
        if (iparm(_JT_out_) .ge. 1) then
         write(iparm(_JT_luerr_),*)
         write(iparm(_JT_luerr_),901) '****************************************'
         write(iparm(_JT_luerr_),901) 'JT_GMRES: ERROR'
         write(iparm(_JT_luerr_),901) '  Breakdown in calculation of Gram-'//
     &                      'Schmidt parameters'
         write(iparm(_JT_luerr_),901) '    This *really* should not happen,'//
     &                       ' and may indicate an input error'
         write(iparm(_JT_luerr_),905) '  current iteration number: ', iparm(_JT_iter_)
         write(iparm(_JT_luerr_),940) '  norm of residual at breakdown: ',
     &                       rnorm(iparm(_JT_iter_))
         write(iparm(_JT_luerr_),901) '****************************************'
        endif
        status = -5
        goto 9999
       endif
       gamma = SQRT(gamma_squared)
       c(j) = _h_(j,j+1) / gamma
       s(j) = _h_(j+1,j+1) / gamma
c
c      Compute residual norm estimate.
       r(j+1) = -s(j)*r(j)
       rnorm(iparm(_JT_iter_)) = ABS(r(j+1))
       r(j) = c(j)*r(j)
c
       _h_(j,j+1) = c(j)*_h_(j,j+1) + 
     &              s(j)*_h_(j+1,j+1)
c
c .... Solve upper triangular system and compute new iterate *only*
c      if needed for output, stopping test, or to calculate the true
c      residual and error estimate.  Note that this is costly.
c
c      The solution is only updated each iteration for stopping test
c      || x - xold || / || x ||.  For the other stopping tests
c      needing || x ||, the solution from the last outer iteration is
c      used.
       if (iparm(_JT_out_).ge.5 .or.
     &     iparm(_JT_stop_).eq._JT_stop_relchg_ .or.
     &     iparm(_JT_resid_).eq.1 .or.
     &     (iparm(_JT_iter_).le.5 .and. (iparm(_JT_stop_).lt.0 .or.
     &                                   iparm(_JT_stop_).eq.1 .or.
     &                                   iparm(_JT_stop_).eq.3))) then
        call JT_y_eq_x (j, r, rt, status)
        call JT_SolveUTriang_Full
     &       (0, 0, k+1, j, _h_(1,2), rt, status)
        if (status .lt. 0) then
         status = -3
         goto 9999
        endif
        call JT_y_eq_x (ia(_JT_nrows_), x, xt, status)
        do i=1,j
         call JT_y_eq_y_plus_sx (ia(_JT_nrows_), rt(i), _v_(1,i), xt,
     &        status)
        enddo
       endif
c
c .... Check for convergence.
       call JT_CheckConvergence (bnorm, rnormt(0), rnorm(iparm(_JT_iter_)),
     &      xt, xold, b, a, ia, ja, iparm, rparm, cpu(iparm(_JT_iter_)),
     &      err(iparm(_JT_iter_)), rnormt(iparm(_JT_iter_)), errt(iparm(_JT_iter_)),
     &      status)
       if (status .le. 0) then
        if (status .eq. -2) then
c
c        Memory allocation failure, so punt.
         goto 9999
        else
c
c        Either convergence or some other failure, so compute new
c        iterate and return.
         goto 999
        endif
       endif
c
c .... Update xold if using || x - xold || / || x || stopping test.
       if (iparm(_JT_stop_) .eq. 0) then
        call JT_y_eq_x (ia(_JT_nrows_), xt, xold, tmp_status)
       endif
c
c ... End of inner loop.
      goto 50
  999 continue
c
c ... Update iterate.
      if (iparm(_JT_out_).ge.5 .or.
     &    iparm(_JT_stop_).eq._JT_stop_relchg_ .or.
     &    iparm(_JT_resid_).eq.1) then
c
c      May have already done it.
       call JT_y_eq_x (ia(_JT_nrows_), xt, x, tmp_status)
      else
c
c      If not, first solve upper triangular system.
       call JT_SolveUTriang_Full (0, 0, k+1, MIN(j,k), 
     &     _h_(1,2), r, tmp_status)
       if (tmp_status .lt. 0) then
        status = -3
        goto 9999
       endif
c
c      Now update iterate.
       do i=1,MIN(j,k)
        call JT_y_eq_y_plus_sx (ia(_JT_nrows_), r(i), _v_(1,i), x,
     &       tmp_status)
       enddo
c
c      Copy iterate into xt if needed for stopping test.
       if (ABS(iparm(_JT_stop_)).eq._JT_stop_axb_ .or.
     &     ABS(iparm(_JT_stop_)).eq._JT_stop_x_) then
        call JT_y_eq_x (ia(_JT_nrows_), x, xt, tmp_status)
       endif
      endif
c
c ... End of outer loop.
      if (status .gt. 0) goto 35
c
 9999 continue
c
c ... Subtract time at start of solution from each element of cpu array
c     to obtain true CPU time for each iteration.
      cpu(0) = cpu0
      do i=1,iparm(_JT_iter_)
       cpu(i) = cpu(i) - cpu(0)
      enddo
#include "deallocate-GMRES.inc"
c
#include "formats.inc"
      return
      end