CG.F
#include "parmdefines.h"
#include <iadefines.h>
c***********************************************************************
#include "author.inc"
c* $Id: CG.F,v 1.43 1996/07/11 04:12:39 turner Exp $
c*
c* Computes the solution to an SPD system of linear equations of
c* the form Ax=b by conjugate gradients.
c*
c* An error message is written out and control is returned to the
c* calling routine if a breakdown in iterates is encountered or if
c* the maximum number of iterations is exceeded. At that time,
c* err(iter) can be examined to determine if execution can continue.
c*
c* <PARAMETER LIST>
c*
c* Input:
c* b - source vector
c* a - coefficient matrix
c* ia - integer vector containing info about how "a" is stored
c* NOTE: see description of ia below
c* ja - integer array that maps columns in a to true columns
c*
c* In/Out:
c* iparm - array of integer parameters
c* rparm - array of floating point parameters
c* NOTE: see description of iparm and rparm below
c* x - solution vector (whatever is in x on entry is used as an
c* initial guess)
c*
c* Output:
c* cpu - cumulative cpu time after each iteration
c* rnormt - norm of true residual
c* errt - error estimate based on true residual
c* rnorm - norm of residual at each iteration
c* err - error estimate at each iteration
c* ap - preconditioner
c* iap - integer vector containing info about how "ap" is stored
c* NOTE: see description of ia below
c* jap - same as ja, but for preconditioner
c* status - return status:
c* -5 ==> iteration failed due to breakdown
c* -4 ==> iparm(_JT_itmax_) exceeded
c* -3 ==> internal error
c* -2 ==> memory allocation failure
c* -1 ==> invalid argument(s)
c* 0 ==> convergence was achieved
c* >0 ==> preconditioning needed
c*
#include "parmdesc.inc"
c*
#include "iadesc.inc"
c*
c* Variables in iap are analogous to those in ia.
c*
c* <SUBROUTINES REQUIRED>
c*
c* JT_ApplyPreconditioner
c* JT_CheckConvergence
c* JT_Clock
c* JT_FlushUnit
c* JT_GeneratePreconditioner
c* JT_SwapVectors
c* JT_WriteMatrix
c* JT_WriteSystem
c* JT_WriteVectorFloat
c* JT_y_eq_Ax
c* JT_y_eq_sy_plus_x
c* JT_y_eq_x
c* JT_y_eq_y_minus_Ax
c* JT_y_eq_y_plus_sx
c*
c* <FUNCTIONS REQUIRED>
c*
c* JT_DotProduct
c* JT_MatrixNorm
c* JT_VectorNorm
c*
#include "arrays-CG.inc"
c*
#include "copyright.inc"
c***********************************************************************
subroutine JT_CG (b, a, ia, ja, iparm, rparm, x,
& cpu, rnormt, errt, rnorm, err, ap, iap, jap, status)
implicit none
c
c ... Input:
integer ia(_JT_no_of_storage_parameters_), ja(*)
real a(*)
c
c ... In/Out:
integer iap(_JT_no_of_storage_parameters_), jap(*)
integer iparm(_JT_no_of_iparms_)
real rparm(_JT_no_of_rparms_)
#ifdef strict_f77
real x(*), b(*)
#else
real x(ia(_JT_nrows_)), b(ia(_JT_nrows_))
#endif
real ap(*)
c
c ... Output:
integer status
#ifdef strict_f77
real cpu(0:*), rnorm(0:*), err(0:*), rnormt(0:*), errt(0:*)
#else
real cpu(0:iparm(_JT_itmax_))
real rnorm(0:iparm(_JT_itmax_)), err(0:iparm(_JT_itmax_))
real rnormt(0:iparm(_JT_itmax_)), errt(0:iparm(_JT_itmax_))
#endif
c
c ... Local:
integer i, freq
real zero, alpha, beta, bnorm, rho, rhoold, sigma, cpu0
real JT_DotProduct, JT_MatrixNorm, JT_VectorNorm
#include "declare-CG.inc"
c
c ... Parameters.
parameter (zero=0.0d0)
c
c ... Ensure that all variables are saved.
save
c
c ... Check to see if re-entering after preconditioning.
if (status .eq. 1) then
call JT_SwapVectors (ia(_JT_nrows_), r, b, status)
call JT_SwapVectors (ia(_JT_nrows_), w, x, status)
goto 55
endif
c
c ... Obtain CPU time at start of solution.
call JT_Clock (1, cpu0, status)
c
#include "allocate-CG.inc"
c
c ... Write initial system if desired.
if (iparm(_JT_out_) .ge. 4) then
call JT_WriteSystem (iparm(_JT_luout_), a, ia, ja, x, b,
& 'JT_CG: ON ENTRY (a,x,b):', status)
endif
c
c ... Initialize cpu time and residual and error norm arrays.
call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, cpu, status)
call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, err, status)
call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, errt, status)
call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, rnorm, status)
call JT_FillVectorFloat (iparm(_JT_itmax_)+1, zero, rnormt, status)
c
c ... Set freq at which to use true residual. Set minimum of
c every 10 iterations.
freq = MAX(10, INT(SQRT(REAL(ia(_JT_nrows_)))))
c
c ... Initialization dependent on the stopping test.
c
c Compute || b || if needed.
if (ABS(iparm(_JT_stop_)).eq._JT_stop_axb_ .or.
& ABS(iparm(_JT_stop_)).eq._JT_stop_b_) then
bnorm = JT_VectorNorm(iparm(_JT_norm_), ia(_JT_nrows_), b, status)
else
bnorm = zero
endif
c
rparm(_JT_anorm_) = zero
if (iparm(_JT_stop_) .eq. _JT_stop_relchg_) then
c
c Initialize xold if using || x - xold || / || x || stopping test.
call JT_y_eq_x (ia(_JT_nrows_), x, xold, status)
elseif (ABS(iparm(_JT_stop_)) .eq. 1) then
c
c Compute || A || if needed.
rparm(_JT_anorm_) = JT_MatrixNorm(iparm(_JT_norm_), a, ia, ja, status)
if (status .eq. -2) goto 9999
endif
c
c ... Construct preconditioner as appropriate and write out if
c desired.
if (iparm(_JT_pre_).gt.0 .and. iparm(_JT_prein_).eq._JT_prein_no_) then
call JT_GeneratePreconditioner (iparm, rparm,
& a, ia, ja, ap, iap, jap, status)
if (status .lt. 0) then
status = -3
goto 9999
endif
endif
c
c ... Initialize iteration counter.
iparm(_JT_iter_) = 0
c
c ************* START MAIN LOOP *************
c
35 continue
if (iparm(_JT_out_) .ge. 6) then
write (iparm(_JT_luout_),*)
write (iparm(_JT_luout_),905) '*** ITERATION NUMBER: ', iparm(_JT_iter_)
call JT_FlushUnit (iparm(_JT_luout_), status)
endif
c
c ... Compute residual and || r ||.
if (iparm(_JT_iter_).eq.0 .or.
& ((REAL(iparm(_JT_iter_))/freq .eq. REAL(iparm(_JT_iter_)/freq)) .and.
& iparm(_JT_resid_).ge.2)) then
c
c Use r = b - Ax. In this case also set rnormt = rnorm since it
c will be input to JT_CheckConvergence rather than output.
if (iparm(_JT_out_) .ge. 5) then
write(iparm(_JT_luout_),*)
write(iparm(_JT_luout_),905)
& 'Computing true residual at iteration:', iparm(_JT_iter_)
write(iparm(_JT_luout_),*)
call JT_FlushUnit (iparm(_JT_luout_), status)
endif
call JT_y_eq_x (ia(_JT_nrows_), b, r, status)
call JT_y_eq_y_minus_Ax (a, ia, ja, x, r, status)
if (status .lt. 0) then
if (status .ne. -2) status = -3
goto 9999
endif
rnorm(iparm(_JT_iter_)) =
& JT_VectorNorm(iparm(_JT_norm_), ia(_JT_nrows_), r, status)
rnormt(iparm(_JT_iter_)) = rnorm(iparm(_JT_iter_))
else
c
c Use recursive formula: r = r - alpha*w
call JT_y_eq_y_plus_sx (ia(_JT_nrows_), -alpha, w, r, status)
rnorm(iparm(_JT_iter_)) =
& JT_VectorNorm(iparm(_JT_norm_), ia(_JT_nrows_), r, status)
endif
if (iparm(_JT_out_) .ge. 6) then
call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), r,
& 'JT_CG: RESIDUAL (r):', status)
endif
c
c ... If the residual norm is smaller than rparm(_JT_tiny_), simply
c declare victory.
if (rnorm(iparm(_JT_iter_)) .le. rparm(_JT_tiny_)) then
status = 0
goto 9999
endif
c
c ... Check for convergence.
c
c Note that if using || x - xold || / || x || stopping test, must
c protect against testing for convergence on first iteration.
if (iparm(_JT_iter_).ne.0 .or.
& iparm(_JT_stop_).ne._JT_stop_relchg_) then
call JT_CheckConvergence (bnorm, rnormt(0), rnorm(iparm(_JT_iter_)),
& x, xold, b, a, ia, ja, iparm, rparm, cpu(iparm(_JT_iter_)),
& err(iparm(_JT_iter_)), rnormt(iparm(_JT_iter_)), errt(iparm(_JT_iter_)),
& status)
if (status .le. 0) goto 9999
endif
c
c ... Precondition.
if (iparm(_JT_pre_) .ge. 0) then
call JT_ApplyPreconditioner (iparm, rparm,
& a, ia, ja, ap, iap, jap, r, w, status)
if (status .lt. 0) then
if (status .ne. -2) status = -3
goto 9999
endif
elseif (iparm(_JT_pre_) .eq. _JT_pre_reverse_) then
call JT_SwapVectors (ia(_JT_nrows_), r, b, status)
call JT_SwapVectors (ia(_JT_nrows_), w, x, status)
status = 1
return
endif
55 continue
if (iparm(_JT_out_) .ge. 6) then
call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), w,
& 'JT_CG: AFTER PRECONDITIONING (w):', status)
endif
c
rho = JT_DotProduct(ia(_JT_nrows_), w, r, status)
if (rho .eq. zero) then
if (iparm(_JT_out_) .ge. 1) then
write(iparm(_JT_luerr_),*)
write(iparm(_JT_luerr_),901) '*********************************'
write(iparm(_JT_luerr_),901) 'JT_CG: ERROR'
write(iparm(_JT_luerr_),901) ' Breakdown (rho = 0.0)'
write(iparm(_JT_luerr_),905) ' current iteration number: ', iparm(_JT_iter_)
write(iparm(_JT_luerr_),940) ' norm of residual at breakdown: ',
& rnorm(iparm(_JT_iter_))
write(iparm(_JT_luerr_),901) '*********************************'
endif
status = -5
goto 9999
endif
c
c ... Compute new direction: p = beta*p + w
if (iparm(_JT_iter_) .eq. 1) then
call JT_y_eq_x (ia(_JT_nrows_), w, p, status)
else
beta = rho/rhoold
call JT_y_eq_sy_plus_x (ia(_JT_nrows_), beta, w, p, status)
endif
rhoold = rho
c
if (iparm(_JT_out_) .ge. 6) then
call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), p,
& 'JT_CG: NEW DIRECTION (p):', status)
endif
call JT_y_eq_Ax (a, ia, ja, p, w, status)
if (status .lt. 0) then
if (status .ne. -2) status = -3
goto 9999
endif
if (iparm(_JT_out_) .ge. 6) then
call JT_WriteVectorFloat (iparm(_JT_luout_), ia(_JT_nrows_), w,
& 'JT_CG: AFTER CALC. OF NEW DIRECTION (w):', status)
endif
sigma = JT_DotProduct(ia(_JT_nrows_), p, w, status)
if (sigma .eq. zero) then
if (iparm(_JT_out_) .ge. 1) then
write(iparm(_JT_luerr_),*)
write(iparm(_JT_luerr_),901) '*********************************'
write(iparm(_JT_luerr_),901) 'JT_CG: ERROR'
write(iparm(_JT_luerr_),901) ' Breakdown (sigma = 0.0)'
write(iparm(_JT_luerr_),905) ' current iteration number: ', iparm(_JT_iter_)
write(iparm(_JT_luerr_),940) ' norm of residual at breakdown: ',
& rnorm(iparm(_JT_iter_))
write(iparm(_JT_luerr_),901) '*********************************'
endif
status = -5
goto 9999
endif
alpha = rhoold/sigma
c
c ... Compute new iterate: x = x + alpha*p
call JT_y_eq_y_plus_sx (ia(_JT_nrows_), alpha, p, x, status)
c
goto 35
c
9999 continue
c
c ... Subtract time at start of solution from each element of cpu array
c to obtain true CPU time for each iteration.
cpu(0) = cpu0
do i=1,iparm(_JT_iter_)
cpu(i) = cpu(i) - cpu(0)
enddo
#include "deallocate-CG.inc"
c
#include "formats.inc"
return
end