CG.F


#include "parmdefines.h"
#include <iadefines.h>
c***********************************************************************
#include "author.inc"
c*    $Id: CG.F,v 1.43 1996/07/11 04:12:39 turner Exp $
c*
c*    Computes the solution to an SPD system of linear equations of
c*    the form Ax=b by conjugate gradients.
c*
c*    An error message is written out and control is returned to the
c*    calling routine if a breakdown in iterates is encountered or if
c*    the maximum number of iterations is exceeded.  At that time,
c*    err(iter) can be examined to determine if execution can continue.
c*
c*    <PARAMETER LIST>
c*
c*     Input:
c*      b - source vector
c*      a - coefficient matrix
c*      ia - integer vector containing info about how "a" is stored
c*        NOTE: see description of ia below
c*      ja - integer array that maps columns in a to true columns
c*
c*     In/Out:
c*      iparm - array of integer parameters
c*      rparm - array of floating point parameters
c*        NOTE: see description of iparm and rparm below
c*      x - solution vector (whatever is in x on entry is used as an
c*          initial guess)
c*
c*     Output:
c*      cpu - cumulative cpu time after each iteration
c*      rnormt - norm of true residual
c*      errt - error estimate based on true residual
c*      rnorm - norm of residual at each iteration
c*      err - error estimate at each iteration
c*      ap - preconditioner
c*      iap - integer vector containing info about how "ap" is stored
c*        NOTE: see description of ia below
c*      jap - same as ja, but for preconditioner
c*      status - return status:
c*        -5  ==>  iteration failed due to breakdown
c*        -4  ==>  iparm(_JT_itmax_) exceeded
c*        -3  ==>  internal error
c*        -2  ==>  memory allocation failure
c*        -1  ==>  invalid argument(s)
c*         0  ==>  convergence was achieved
c*        >0  ==>  preconditioning needed
c*
#include "parmdesc.inc"
c*
#include "iadesc.inc"
c*
c*     Variables in iap are analogous to those in ia.
c*
c*    <SUBROUTINES REQUIRED>
c*
c*     JT_ApplyPreconditioner
c*     JT_CheckConvergence
c*     JT_Clock
c*     JT_FlushUnit
c*     JT_GeneratePreconditioner
c*     JT_SwapVectors
c*     JT_WriteMatrix
c*     JT_WriteSystem
c*     JT_WriteVectorFloat
c*     JT_y_eq_Ax
c*     JT_y_eq_sy_plus_x
c*     JT_y_eq_x
c*     JT_y_eq_y_minus_Ax
c*     JT_y_eq_y_plus_sx
c*
c*    <FUNCTIONS REQUIRED>
c*
c*     JT_DotProduct
c*     JT_MatrixNorm
c*     JT_VectorNorm
c*
#include "arrays-CG.inc"
c*
#include "copyright.inc"
c***********************************************************************
      subroutine JT_CG (b, a, ia, ja, iparm, rparm, x, 
     &           cpu, rnormt, errt, rnorm, err, ap, iap, jap, status)
      implicit none
c
c ... Input:
      integer ia(_JT_no_of_storage_parameters_), ja(*)
      real a(*)
c
c ... In/Out:
      integer iap(_JT_no_of_storage_parameters_), jap(*)
      integer iparm(_JT_no_of_iparms_)
      real rparm(_JT_no_of_rparms_)
#ifdef strict_f77
      real x(*), b(*)
#else
      real x(ia(_JT_nrows_)), b(ia(_JT_nrows_))
#endif
      real ap(*)
c
c ... Output:
      integer status
#ifdef strict_f77
      real cpu(0:*), rnorm(0:*), err(0:*), rnormt(0:*), errt(0:*)
#else
      real cpu(0:iparm(_JT_itmax_))
      real rnorm(0:iparm(_JT_itmax_)), err(0:iparm(_JT_itmax_))
      real rnormt(0:iparm(_JT_itmax_)), errt(0:iparm(_JT_itmax_))
#endif
c
c ... Local:
      integer i, freq
      real zero, alpha, beta, bnorm, rho, rhoold, sigma, cpu0
      real JT_DotProduct, JT_MatrixNorm, JT_VectorNorm
#include "declare-CG.inc"
c
c ... Parameters.
      parameter (zero=0.0d0)
c
c ... Ensure that all variables are saved.
      save
c
c ... Check to see if re-entering after preconditioning.
      if (status .eq. 1) then
       call JT_SwapVectors (ia(_JT_nrows_), r, b, status)
       call JT_SwapVectors (ia(_JT_nrows_), w, x, status)
       goto 55
      endif
c
c ... Obtain CPU time at start of solution.
      call JT_Clock (1, cpu0, status)
c
#include "allocate-CG.inc"
c
c ... Write initial system if desired.
      if (iparm(_JT_out_) .ge. 4) then
       call JT_WriteSystem (iparm(_JT_luout_), a, ia, ja, x, b, 
     &     'JT_CG: ON ENTRY (a,x,b):', status)
      endif
c
c ... Initialize cpu time and residual and error norm arrays.
      call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, cpu, status)
      call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, err, status)
      call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, errt, status)
      call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, rnorm, status)
      call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, rnormt, status)
c
c ... Set freq at which to use true residual.  Set minimum of
c     every 10 iterations.
      freq = MAX(10, INT(SQRT(REAL(ia(_JT_nrows_)))))
c
c ... Initialization dependent on the stopping test.
c
c     Compute || b || if needed.
      if (ABS(iparm(_JT_stop_)).eq._JT_stop_axb_ .or.
     &    ABS(iparm(_JT_stop_)).eq._JT_stop_b_) then
       bnorm = JT_VectorNorm(iparm(_JT_norm_), ia(_JT_nrows_), b, status)
      else
       bnorm = zero
      endif
c
      rparm(_JT_anorm_) = zero
      if (iparm(_JT_stop_) .eq. _JT_stop_relchg_) then
c
c      Initialize xold if using || x - xold || / || x || stopping test.
       call JT_y_eq_x (ia(_JT_nrows_), x, xold, status)
      elseif (ABS(iparm(_JT_stop_)) .eq. 1) then
c
c      Compute || A || if needed.
       rparm(_JT_anorm_) = JT_MatrixNorm(iparm(_JT_norm_), a, ia, ja, status)
       if (status .eq. -2) goto 9999
      endif
c
c ... Construct preconditioner as appropriate and write out if
c     desired.
      if (iparm(_JT_pre_).gt.0 .and. iparm(_JT_prein_).eq._JT_prein_no_) then
       call JT_GeneratePreconditioner (iparm, rparm, 
     &      a, ia, ja, ap, iap, jap, status)
       if (status .lt. 0) then
        status = -3
        goto 9999
       endif
      endif
c
c ... Initialize iteration counter.
      iparm(_JT_iter_) = 0
c
c ************* START MAIN LOOP *************
c
   35 continue
      if (iparm(_JT_out_) .ge. 6) then
       write (iparm(_JT_luout_),*)
       write (iparm(_JT_luout_),905) '*** ITERATION NUMBER: ', iparm(_JT_iter_)
       call JT_FlushUnit (iparm(_JT_luout_), status)
      endif
c
c ... Compute residual and || r ||.
      if (iparm(_JT_iter_).eq.0 .or.
     &    ((REAL(iparm(_JT_iter_))/freq .eq. REAL(iparm(_JT_iter_)/freq)) .and.
     &     iparm(_JT_resid_).ge.2)) then
c
c      Use r = b - Ax.  In this case also set rnormt = rnorm since it
c      will be input to JT_CheckConvergence rather than output.
       if (iparm(_JT_out_) .ge. 5) then
        write(iparm(_JT_luout_),*)
        write(iparm(_JT_luout_),905)
     &    'Computing true residual at iteration:', iparm(_JT_iter_)
        write(iparm(_JT_luout_),*)
        call JT_FlushUnit (iparm(_JT_luout_), status)
       endif
       call JT_y_eq_x (ia(_JT_nrows_), b, r, status)
       call JT_y_eq_y_minus_Ax (a, ia, ja, x, r, status)
       if (status .lt. 0) then
        if (status .ne. -2) status = -3
        goto 9999
       endif
       rnorm(iparm(_JT_iter_)) =
     &    JT_VectorNorm(iparm(_JT_norm_), ia(_JT_nrows_), r, status)
       rnormt(iparm(_JT_iter_)) = rnorm(iparm(_JT_iter_))
      else
c
c      Use recursive formula: r = r - alpha*w
       call JT_y_eq_y_plus_sx (ia(_JT_nrows_), -alpha, w, r, status)
       rnorm(iparm(_JT_iter_)) =
     &    JT_VectorNorm(iparm(_JT_norm_), ia(_JT_nrows_), r, status)
      endif
      if (iparm(_JT_out_) .ge. 6) then
       call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), r,
     &     'JT_CG: RESIDUAL (r):', status)
      endif
c
c ... If the residual norm is smaller than rparm(_JT_tiny_), simply 
c     declare victory.
      if (rnorm(iparm(_JT_iter_)) .le. rparm(_JT_tiny_)) then
       status = 0
       goto 9999
      endif
c
c ... Check for convergence.
c
c     Note that if using || x - xold || / || x || stopping test, must 
c     protect against testing for convergence on first iteration.
      if (iparm(_JT_iter_).ne.0 .or.
     &    iparm(_JT_stop_).ne._JT_stop_relchg_) then
       call JT_CheckConvergence (bnorm, rnormt(0), rnorm(iparm(_JT_iter_)),
     &      x, xold, b, a, ia, ja, iparm, rparm, cpu(iparm(_JT_iter_)),
     &      err(iparm(_JT_iter_)), rnormt(iparm(_JT_iter_)), errt(iparm(_JT_iter_)),
     &      status)
       if (status .le. 0) goto 9999
      endif
c
c ... Precondition.
      if (iparm(_JT_pre_) .ge. 0) then
       call JT_ApplyPreconditioner (iparm, rparm, 
     &      a, ia, ja, ap, iap, jap, r, w, status)
       if (status .lt. 0) then
        if (status .ne. -2) status = -3
        goto 9999
       endif
      elseif (iparm(_JT_pre_) .eq. _JT_pre_reverse_) then
       call JT_SwapVectors (ia(_JT_nrows_), r, b, status)
       call JT_SwapVectors (ia(_JT_nrows_), w, x, status)
       status = 1
       return
      endif
   55 continue
      if (iparm(_JT_out_) .ge. 6) then
       call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), w, 
     &     'JT_CG: AFTER PRECONDITIONING (w):', status)
      endif
c
      rho = JT_DotProduct(ia(_JT_nrows_), w, r, status)
      if (rho .eq. zero) then
       if (iparm(_JT_out_) .ge. 1) then
        write(iparm(_JT_luerr_),*)
        write(iparm(_JT_luerr_),901) '*********************************'
        write(iparm(_JT_luerr_),901) 'JT_CG: ERROR'
        write(iparm(_JT_luerr_),901) '  Breakdown (rho = 0.0)'
        write(iparm(_JT_luerr_),905) '  current iteration number: ', iparm(_JT_iter_)
        write(iparm(_JT_luerr_),940) '  norm of residual at breakdown: ',
     &                      rnorm(iparm(_JT_iter_))
        write(iparm(_JT_luerr_),901) '*********************************'
       endif
       status = -5
       goto 9999
      endif
c
c ... Compute new direction: p = beta*p + w
      if (iparm(_JT_iter_) .eq. 1) then
       call JT_y_eq_x (ia(_JT_nrows_), w, p, status)
      else
       beta = rho/rhoold
       call JT_y_eq_sy_plus_x (ia(_JT_nrows_), beta, w, p, status)
      endif
      rhoold = rho
c
      if (iparm(_JT_out_) .ge. 6) then
       call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), p, 
     &     'JT_CG: NEW DIRECTION (p):', status)
      endif
      call JT_y_eq_Ax (a, ia, ja, p, w, status)
      if (status .lt. 0) then
       if (status .ne. -2) status = -3
       goto 9999
      endif
      if (iparm(_JT_out_) .ge. 6) then
       call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), w, 
     &     'JT_CG: AFTER CALC. OF NEW DIRECTION (w):', status)
      endif
      sigma = JT_DotProduct(ia(_JT_nrows_), p, w, status)
      if (sigma .eq. zero) then
       if (iparm(_JT_out_) .ge. 1) then
        write(iparm(_JT_luerr_),*)
        write(iparm(_JT_luerr_),901) '*********************************'
        write(iparm(_JT_luerr_),901) 'JT_CG: ERROR'
        write(iparm(_JT_luerr_),901) '  Breakdown (sigma = 0.0)'
        write(iparm(_JT_luerr_),905) '  current iteration number: ', iparm(_JT_iter_)
        write(iparm(_JT_luerr_),940) '  norm of residual at breakdown: ',
     &                      rnorm(iparm(_JT_iter_))
        write(iparm(_JT_luerr_),901) '*********************************'
       endif
       status = -5
       goto 9999
      endif
      alpha = rhoold/sigma
c
c ... Compute new iterate: x = x + alpha*p
      call JT_y_eq_y_plus_sx (ia(_JT_nrows_), alpha, p, x, status)
c
      goto 35
c
 9999 continue
c
c ... Subtract time at start of solution from each element of cpu array
c     to obtain true CPU time for each iteration.
      cpu(0) = cpu0
      do i=1,iparm(_JT_iter_)
       cpu(i) = cpu(i) - cpu(0)
      enddo
#include "deallocate-CG.inc"
c
#include "formats.inc"
      return
      end