BCGS.F


#include "parmdefines.h"
#include "iadefines.h"
c***********************************************************************
#include "author.inc"
c*    $Id: BCGS.F,v 1.42 1996/07/11 04:11:29 turner Exp $
c*
c*    Computes the solution to a nonsymmetric system of linear
c*    equations of the form Ax=b by bi-conjugate gradients squared
c*    (BCGS; also known as conjugate gradients squared) or Freund's
c*    transpose-free quasi-minimal residuals method (TFQMR).
c*
c*    An error message is written out and control is returned to the
c*    calling routine if a breakdown in iterates is encountered or if
c*    the maximum number of iterations is exceeded.  At that time,
c*    err(iter) can be examined to determine if execution can continue.
c*
c*    <PARAMETER LIST>
c*
c*     Input:
c*      b - source vector
c*      a - coefficient matrix
c*      ia - integer vector containing info about how "a" is stored
c*        NOTE: see description of ia below
c*      ja - integer map for coefficient
c*
c*     In/Out:
c*      iparm - array of integer parameters
c*      rparm - array of floating point parameters
c*        NOTE: see description of iparm and rparm below
c*      x - solution vector (whatever is in x on entry is used as an
c*          initial guess)
c*
c*     Output:
c*      cpu - cumulative cpu time after each iteration
c*      rnormt - norm of true residual
c*      errt - error estimate based on true residual
c*      rnorm - norm of residual at each iteration
c*      err - error estimate at each iteration
c*      ap - preconditioner
c*      iap - integer vector containing info about how "ap" is stored
c*        NOTE: see description of ia below
c*              (elements of iap are analogous to those in ia)
c*      jap - same as ja, but for preconditioner
c*      status - return status:
c*        -5  ==>  iteration failed due to breakdown
c*        -4  ==>  iparm(_JT_itmax_) exceeded
c*        -3  ==>  internal error
c*        -2  ==>  memory allocation failure
c*        -1  ==>  invalid argument(s)
c*         0  ==>  convergence was achieved
c*        >0  ==>  preconditioning needed
c*
#include "parmdesc.inc"
c*
#include "iadesc.inc"
c*
c*    <SUBROUTINES REQUIRED>
c*
c*     JT_ApplyPreconditioner
c*     JT_CheckConvergence
c*     JT_Clock
c*     JT_FlushUnit
c*     JT_GeneratePreconditioner
c*     JT_SwapVectors
c*     JT_WriteMatrix
c*     JT_WriteSystem
c*     JT_WriteVectorFloat
c*     JT_x_eq_sx
c*     JT_y_eq_Ax
c*     JT_y_eq_sy_plus_x
c*     JT_y_eq_x
c*     JT_y_eq_y_minus_Ax
c*     JT_y_eq_y_plus_x
c*     JT_y_eq_y_plus_sx
c*     JT_y_eq_y_times_x
c*     JT_z_eq_y_plus_x
c*     JT_z_eq_sy_plus_x
c*
c*    <FUNCTIONS REQUIRED>
c*
c*     JT_DotProduct
c*     JT_MatrixNorm
c*     JT_VectorNorm
c*
#include "arrays-BCGS.inc"
c*
#include "copyright.inc"
c***********************************************************************
      subroutine JT_BCGS (b, a, ia, ja, iparm, rparm, x,
     &           cpu, rnormt, errt, rnorm, err, ap, iap, jap, status)
      implicit none
c
c ... Input:
      integer ia(_JT_no_of_storage_parameters_), ja(*)
      real a(*)
c
c ... In/Out:
      integer iap(_JT_no_of_storage_parameters_), jap(*)
      integer iparm(_JT_no_of_iparms_)
      real rparm(_JT_no_of_rparms_)
#ifdef strict_f77
      real x(*), b(*)
#else
      real x(ia(_JT_nrows_)), b(ia(_JT_nrows_))
#endif
      real ap(*)
c
c ... Output:
      integer status
#ifdef strict_f77
      real cpu(0:*), rnorm(0:*), err(0:*), rnormt(0:*), errt(0:*)
#else
      real cpu(0:iparm(_JT_itmax_))
      real rnorm(0:iparm(_JT_itmax_)), err(0:iparm(_JT_itmax_))
      real rnormt(0:iparm(_JT_itmax_)), errt(0:iparm(_JT_itmax_))
#endif
c
c ... Local:
      integer i
      real zero, one, cpu0,
     &     alpha, beta, bnorm, rho, rhoold, sigma, 
     &     eta, rnorm_iter, rnormqmr,
     &     tau, term, theta, theta2
      real JT_DotProduct, JT_MatrixNorm, JT_VectorNorm
#include "declare-BCGS.inc"
c
c ... Parameters.
      parameter (zero=0.0d0, one=1.0d0)
c
c ... Ensure that all variables are saved.
      save
c
c ... Check to see if re-entering after preconditioning.
      if (status .eq. 1) then
       call JT_SwapVectors (ia(_JT_nrows_), w, b, status)
       call JT_SwapVectors (ia(_JT_nrows_), r, x, status)
       goto 25
      elseif (status .eq. 2) then
       call JT_SwapVectors (ia(_JT_nrows_), work, b, status)
       call JT_SwapVectors (ia(_JT_nrows_), w, x, status)
       goto 45
      elseif (status .eq. 3) then
       call JT_SwapVectors (ia(_JT_nrows_), work, b, status)
       call JT_SwapVectors (ia(_JT_nrows_), w, x, status)
       goto 55
      endif
c
c ... Obtain CPU time at start of solution.
      call JT_Clock (1, cpu0, status)
c
#include "allocate-BCGS.inc"
c
c ... Write initial system if desired.
      if (iparm(_JT_out_) .ge. 4) then
       call JT_WriteSystem (iparm(_JT_luout_), a, ia, ja,
     &      x, b, 'JT_BCGS: ON ENTRY (a,x,b):', status)
      endif
c
c ... Initialize cpu time and residual and error norm arrays.
      call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, cpu, status)
      call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, err, status)
      call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, errt, status)
      call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, rnorm, status)
      call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, rnormt, status)
c
c ... Initialization dependent on the stopping test.
c
c     Compute || b || if needed.
      if (ABS(iparm(_JT_stop_)).eq._JT_stop_axb_ .or.
     &    ABS(iparm(_JT_stop_)).eq._JT_stop_b_) then
       bnorm = JT_VectorNorm(iparm(_JT_norm_), ia(_JT_nrows_), b, status)
      else
       bnorm = zero
      endif
c
      rparm(_JT_anorm_) = zero
      if (iparm(_JT_stop_) .eq. _JT_stop_relchg_) then
c
c      Initialize xold if using || x - xold || / || x || stopping test.
       call JT_y_eq_x (ia(_JT_nrows_), x, xold, status)
      elseif (ABS(iparm(_JT_stop_)) .eq. _JT_stop_axb_) then
c
c      Compute || A || if needed.
       rparm(_JT_anorm_) = JT_MatrixNorm(iparm(_JT_norm_), a, ia, ja, status)
       if (status .eq. -2) goto 9999
      endif
c
c ... Construct preconditioner as appropriate and write out if
c     desired.
      if (iparm(_JT_pre_).gt.0 .and. iparm(_JT_prein_).eq._JT_prein_no_) then
       call JT_GeneratePreconditioner (iparm, rparm, 
     &      a, ia, ja, ap, iap, jap, status)
       if (status .lt. 0) then
        status = -3
        goto 9999
       endif
      endif
c
c ... Compute initial residual (w=b-Ax) and write out if desired.
      call JT_y_eq_x (ia(_JT_nrows_), b, w, status)
      call JT_y_eq_y_minus_Ax (a, ia, ja, x, w, status)
      if (status .lt. 0) then
       if (status .ne. -2) status = -3
       goto 9999
      endif
      if (iparm(_JT_out_) .ge. 6) then
       call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), w, 
     &     'JT_BCGS: INITIAL RESIDUAL (w):', status)
      endif
c
c ... Initialize iteration counter.
      iparm(_JT_iter_) = 0
c
c ... Compute || r0 ||.
      rnormt(0) = JT_VectorNorm(iparm(_JT_norm_), ia(_JT_nrows_), w, status)
      rnorm(0) = rnormt(0)
c
c ... Check to see if initial guess meets the convergence
c     criterion.
      if (rnorm(0) .le. rparm(_JT_tiny_)) then
       status = 0
       goto 9999
      endif
c
c     Skip initial check if using || x - xold || / || x || stopping
c     test.  Note that on entry iter = 0 and JT_CheckConvergence
c     increments iparm(_JT_iter_).
      if (iparm(_JT_stop_) .ne. _JT_stop_relchg_) then
       call JT_CheckConvergence (bnorm, rnormt(0), rnorm(iparm(_JT_iter_)),
     &      x, xold, b, a, ia, ja, iparm, rparm, cpu(iparm(_JT_iter_)),
     &      err(iparm(_JT_iter_)), rnormt(iparm(_JT_iter_)), errt(iparm(_JT_iter_)),
     &      status)
       if (status .le. 0) goto 9999
      else
       iparm(_JT_iter_) = 1
      endif
c
c ... Initialize TFQMR parameters as appropriate.
      theta = zero
      theta2 = zero
      eta = zero
      if (iparm(_JT_method_) .eq. _JT_method_TFQMR_) then
       tau = rnorm(0)
       call JT_FillVectorFloat (ia(_JT_nrows_), zero, d, status)
      else
       tau = zero
      endif
c
c ... Initialize r.
      if (iparm(_JT_pre_) .ge. 0) then
       call JT_ApplyPreconditioner (iparm, rparm, 
     &      a, ia, ja, ap, iap, jap, w, r, status)
       if (status .lt. 0) then
        if (status .ne. -2) status = -3
        goto 9999
       endif
      elseif (iparm(_JT_pre_) .eq. _JT_pre_reverse_) then
       call JT_SwapVectors (ia(_JT_nrows_), w, b, status)
       call JT_SwapVectors (ia(_JT_nrows_), r, x, status)
       status = 1
       return
      endif
c
c ... Initialize q.
   25 continue
      call JT_y_eq_x (ia(_JT_nrows_), r, q, status) ! q = r
c
c ... Compute rhoold = (r,q).
      rhoold = JT_DotProduct(ia(_JT_nrows_), r, q, status)
c
c ... First part of first iteration is special since beta is zero
c     and there is no h vector yet.
      call JT_y_eq_x (ia(_JT_nrows_), r, u, status) ! u = r
      call JT_y_eq_x (ia(_JT_nrows_), r, g, status) ! g = r
      if (iparm(_JT_out_) .ge. 6) then
       call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), r, 
     &     'JT_BCGS: BEFORE START OF MAIN LOOP (r):', status)
      endif
c
c ************* START MAIN LOOP *************
c
   35 continue
      if (iparm(_JT_out_) .ge. 6) then
       write (iparm(_JT_luout_),*)
       write (iparm(_JT_luout_),905) '*** ITERATION NUMBER: ', iparm(_JT_iter_)
       call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), u, 
     &     'JT_BCGS: AFTER START OF MAIN LOOP (u):', status)
       call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), g, 
     &     'JT_BCGS: AFTER START OF MAIN LOOP (g):', status)
      endif
      call JT_y_eq_Ax (a, ia, ja, g, work, status)
      if (status .lt. 0) then
       if (status .ne. -2) status = -3
       goto 9999
      endif
c
      if (iparm(_JT_pre_) .ge. 0) then
       call JT_ApplyPreconditioner (iparm, rparm, 
     &      a, ia, ja, ap, iap, jap, work, w, status)
       if (status .lt. 0) then
        if (status .ne. -2) status = -3
        goto 9999
       endif
      elseif (iparm(_JT_pre_) .eq. -1) then
       call JT_SwapVectors (ia(_JT_nrows_), work, b, status)
       call JT_SwapVectors (ia(_JT_nrows_), w, x, status)
       status = 2
       return
      endif
c
   45 continue
      if (iparm(_JT_out_) .ge. 6) then
       call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), w, 
     &     'JT_BCGS: AFTER FIRST PRECONDITIONING (w):', status)
      endif
      sigma = JT_DotProduct(ia(_JT_nrows_), q, w, status)
      if (sigma .eq. zero) then
       if (iparm(_JT_out_) .ge. 1) then
        write(iparm(_JT_luerr_),*)
        write(iparm(_JT_luerr_),901) '*********************************'
        write(iparm(_JT_luerr_),901) 'JT_BCGS: ERROR'
        write(iparm(_JT_luerr_),901) '  Breakdown (sigma = 0.0)'
        write(iparm(_JT_luerr_),905) '  current iteration number: ', iparm(_JT_iter_)
        write(iparm(_JT_luerr_),940) '  norm of residual at breakdown: ',
     &                      rnorm(iparm(_JT_iter_))
        write(iparm(_JT_luerr_),901) '*********************************'
       endif
       status = -5
       goto 9999
      endif
      alpha = rhoold/sigma
c
      call JT_z_eq_sy_plus_x (ia(_JT_nrows_), -alpha, u, w, h, status) ! h = u - alpha*w
c
      call JT_z_eq_y_plus_x (ia(_JT_nrows_), u, h, v, status)
      call JT_x_eq_sx (ia(_JT_nrows_), alpha, v, status) ! v = alpha*(u + h)
c
      if (iparm(_JT_out_) .ge. 6) then
       call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), h, 
     &     'JT_BCGS: BEFORE CALCULATION OF RESIDUAL (h):', status)
       call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), v, 
     &     'JT_BCGS: BEFORE CALCULATION OF RESIDUAL (v):', status)
      endif
c
      call JT_y_eq_Ax (a, ia, ja, v, work, status)
      if (status .lt. 0) then
       if (status .ne. -2) status = -3
       goto 9999
      endif
c
      if (iparm(_JT_pre_) .ge. 0) then
       call JT_ApplyPreconditioner (iparm, rparm, 
     &      a, ia, ja, ap, iap, jap, work, w, status)
       if (status .lt. 0) then
        if (status .ne. -2) status = -3
        goto 9999
       endif
      elseif (iparm(_JT_pre_) .eq. -1) then
       call JT_SwapVectors (ia(_JT_nrows_), work, b, status)
       call JT_SwapVectors (ia(_JT_nrows_), w, x, status)
       status = 3
       return
      endif
c
   55 continue
c
c ... Compute BCGS residual: r = r - w
      call JT_y_eq_y_minus_x (ia(_JT_nrows_), w, r, status)
      if (iparm(_JT_out_) .ge. 6) then
       call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), r, 
     &     'JT_BCGS: AFTER CALCULATION OF RESIDUAL (r):', status)
      endif
c
c ... Compute || r ||.
      rnorm(iparm(_JT_iter_)) = JT_VectorNorm(iparm(_JT_norm_), ia(_JT_nrows_), r, status)
c
      if (iparm(_JT_method_) .eq. _JT_method_BCGS_) then
c
c .... Compute new BCGS iterate: x = x + v
       call JT_y_eq_y_plus_x (ia(_JT_nrows_), v, x, status)
       rnorm_iter = rnorm(iparm(_JT_iter_))
      else
c
c .... Compute 1st TFQMR iterate and check convergence.
       term = theta2*eta/alpha
       call JT_y_eq_sy_plus_x (ia(_JT_nrows_), term, u, d, status) ! d = term*d + u
       theta = SQRT(rnorm(MAX(0,iparm(_JT_iter_)-2))*rnorm(iparm(_JT_iter_)))/tau
       theta2 = theta*theta
       sigma = one / SQRT(one + theta2)
       eta = sigma*sigma*alpha
       call JT_y_eq_y_plus_sx (ia(_JT_nrows_), eta, d, x, status)  ! x = x + eta*d
       rnormqmr = tau*SQRT(REAL(iparm(_JT_iter_)+1))
       call JT_CheckConvergence (bnorm, rnormt(0), rnormqmr, 
     &      x, xold, b, a, ia, ja, iparm, rparm, cpu(iparm(_JT_iter_)),
     &      err(iparm(_JT_iter_)), rnormt(iparm(_JT_iter_)), errt(iparm(_JT_iter_)),
     &      status)
       rnorm(iparm(_JT_iter_)) = rnorm(iparm(_JT_iter_)-1)
       if (status.le.0 .or. theta.eq.zero) goto 9999
c
c .... Compute 2nd TFQMR iterate.
       tau = tau*theta*sigma
       term = theta2*eta/alpha
       call JT_y_eq_sy_plus_x (ia(_JT_nrows_), term, h, d, status) ! d = term*d + h
       theta = rnorm(iparm(_JT_iter_)-1) / tau
       theta2 = theta*theta
       sigma = one / SQRT(one + theta2)
       tau = tau*theta*sigma
       eta = sigma*sigma*alpha
       call JT_y_eq_y_plus_sx (ia(_JT_nrows_), eta, d, x, status)  ! x = x + eta*d
       rnormqmr = tau*SQRT(REAL(iparm(_JT_iter_)))
       rnorm_iter = rnormqmr
      endif
c
c ... Check convergence.
      call JT_CheckConvergence (bnorm, rnormt(0), rnorm_iter,
     &     x, xold, b, a, ia, ja, iparm, rparm, cpu(iparm(_JT_iter_)),
     &     err(iparm(_JT_iter_)), rnormt(iparm(_JT_iter_)), errt(iparm(_JT_iter_)),
     &     status)
      if (iparm(_JT_method_) .eq. _JT_method_TFQMR_) then
       rnorm(iparm(_JT_iter_)) = rnorm(iparm(_JT_iter_)-1)
      endif
      if (status .le. 0) goto 9999
c
c ... Begin new iteration.
      rho = JT_DotProduct(ia(_JT_nrows_), q, r, status)
      if (rho .eq. zero) then
       if (iparm(_JT_out_) .ge. 1) then
        write(iparm(_JT_luerr_),*)
        write(iparm(_JT_luerr_),901) '*********************************'
        write(iparm(_JT_luerr_),901) 'JT_BCGS: ERROR'
        write(iparm(_JT_luerr_),901) '  Breakdown (rho = 0.0)'
        write(iparm(_JT_luerr_),905) '  current iteration number: ', iparm(_JT_iter_)
        write(iparm(_JT_luerr_),940) '  norm of residual at breakdown: ',
     &                      rnorm(iparm(_JT_iter_))
        write(iparm(_JT_luerr_),901) '*********************************'
       endif
       status = -5
       goto 9999
      endif
      beta = rho/rhoold
      rhoold = rho
c
      call JT_z_eq_sy_plus_x (ia(_JT_nrows_), beta, r, h, u, status) ! u = beta*h + r
c
c     The following two calls compute g = u + beta*(beta*g + h)
      call JT_y_eq_sy_plus_x (ia(_JT_nrows_), beta, h, g, status) ! g = beta*g + h
      call JT_y_eq_sy_plus_x (ia(_JT_nrows_), beta, u, g, status) ! g = beta*g + u
c
      goto 35
c
 9999 continue
c
c ... Subtract time at start of solution from each element of cpu array
c     to obtain true CPU time for each iteration.
      cpu(0) = cpu0
      do i=1,iparm(_JT_iter_)
       cpu(i) = cpu(i) - cpu(0)
      enddo
#include "deallocate-BCGS.inc"
c
#include "formats.inc"
      return
      end