y_eq_Ax_spec.F
c**********************************************************************
#include "author.inc"
c* $Id: y_eq_Ax_spec.F,v 1.1 1995/11/14 02:19:37 turner Exp $
c*
c* Computes y=Ax.
c*
c* <PARAMETER LIST>
c*
c* Input:
c* idim - leading dimension of a
c* n - number of unknowns
c* a - matrix
c* x - x-vector
c*
c* Output:
c* y - y-vector
c* status - return status
c*
c* <SUBROUTINES REQUIRED>
c*
c* JT_FillVectorFloat
c* JT_y_eq_y_plus_sx
c*
#include "copyright.inc"
c**********************************************************************
subroutine JT_y_eq_Ax_Full (idim, n, a, x, y, status)
implicit none
c
c ... Input:
integer idim, n
real a(idim,n), x(n)
c
c ... Output:
integer status
real y(n)
c
c ... Local:
real zero
parameter (zero=0.0d0)
#ifdef use_blas
real one
parameter (one=1.0d0)
c
call SGEMV ('N', n, n, one, a, idim, x, 1, zero, y, 1)
#else
integer j
c
call JT_FillVectorFloat (n, zero, y, status)
do j=1,n
call JT_y_eq_y_plus_sx (n, x(j), a(1,j), y, status) ! y = y + x(j)*a(1,j)
enddo
#endif
c
status = 0
return
end
c***********************************************************************
#include "author.inc"
c* $Id: y_eq_Ax_spec.F,v 1.1 1995/11/14 02:19:37 turner Exp $
c*
c* Computes y=Ax where A is a matrix stored in ELL format.
c*
c* Note that the dynamically-allocated vector "xtmp" is dimensioned
c* to start at zero. This is because in ELL format, the index map
c* matrix "ja" contains 0's where elements of "a" are zero. Thus the
c* 0th element of "xtmp" can be referenced.
c*
c* <PARAMETER LIST>
c*
c* Input:
c* idim - leading dimension of a and ja
c* n - number of unknowns
c* maxnz - maximum number of non-zero elements in any row
c* a - matrix
c* ja - map array
c* x - x-vector
c*
c* Output:
c* y - y-vector
c* status - return status
c* -2 ==> memory allocation failure
c* -1 ==> invalid argument(s)
c* 0 ==> success
c*
c* <SUBROUTINES REQUIRED>
c*
c* JT_FillVectorFloat
c* JT_y_eq_x
c*
#include "arrays-y_eq_Ax_ELL.inc"
c*
#include "copyright.inc"
c***********************************************************************
subroutine JT_y_eq_Ax_ELL (idim, n, maxnz, a, ja, x, y, status)
implicit none
c
c ... Input:
integer idim, maxnz, n
integer ja(idim,maxnz)
real a(idim,maxnz), x(n)
c
c ... Output:
integer status
real y(n)
c
c ... Local:
integer i, j
real zero
#include "declare-y_eq_Ax_ELL.inc"
c
parameter (zero=0.0d0)
c
c ... Initialize return status.
status = 0
c
c ... Check arguments.
if (n.le.0 .or. idim.le.0 .or. maxnz.le.0 .or. n.gt.idim) then
status = -1
return
endif
c
#include "allocate-y_eq_Ax_ELL.inc"
c
xtmp(0) = zero
call JT_y_eq_x (n, x, xtmp(1), status)
c
c ... Unroll loops for 2 <= maxnz <= 9 and maxnz = 27.
c This improves performance even on workstations.
c Explicit loops for maxnz = 4, 6, and 8 probably
c aren't that useful, but what the heck.
if (maxnz .eq. 2) then
do i=1,n
y(i) = a(i,1)*xtmp(ja(i,1)) +
& a(i,2)*xtmp(ja(i,2))
enddo
elseif (maxnz .eq. 3) then
do i=1,n
y(i) = a(i,1)*xtmp(ja(i,1)) +
& a(i,2)*xtmp(ja(i,2)) +
& a(i,3)*xtmp(ja(i,3))
enddo
elseif (maxnz .eq. 4) then
do i=1,n
y(i) = a(i,1)*xtmp(ja(i,1)) +
& a(i,2)*xtmp(ja(i,2)) +
& a(i,3)*xtmp(ja(i,3)) +
& a(i,4)*xtmp(ja(i,4))
enddo
elseif (maxnz .eq. 5) then
do i=1,n
y(i) = a(i,1)*xtmp(ja(i,1)) +
& a(i,2)*xtmp(ja(i,2)) +
& a(i,3)*xtmp(ja(i,3)) +
& a(i,4)*xtmp(ja(i,4)) +
& a(i,5)*xtmp(ja(i,5))
enddo
elseif (maxnz .eq. 6) then
do i=1,n
y(i) = a(i,1)*xtmp(ja(i,1)) +
& a(i,2)*xtmp(ja(i,2)) +
& a(i,3)*xtmp(ja(i,3)) +
& a(i,4)*xtmp(ja(i,4)) +
& a(i,5)*xtmp(ja(i,5)) +
& a(i,6)*xtmp(ja(i,6))
enddo
elseif (maxnz .eq. 7) then
do i=1,n
y(i) = a(i,1)*xtmp(ja(i,1)) +
& a(i,2)*xtmp(ja(i,2)) +
& a(i,3)*xtmp(ja(i,3)) +
& a(i,4)*xtmp(ja(i,4)) +
& a(i,5)*xtmp(ja(i,5)) +
& a(i,6)*xtmp(ja(i,6)) +
& a(i,7)*xtmp(ja(i,7))
enddo
elseif (maxnz .eq. 8) then
do i=1,n
y(i) = a(i,1)*xtmp(ja(i,1)) +
& a(i,2)*xtmp(ja(i,2)) +
& a(i,3)*xtmp(ja(i,3)) +
& a(i,4)*xtmp(ja(i,4)) +
& a(i,5)*xtmp(ja(i,5)) +
& a(i,6)*xtmp(ja(i,6)) +
& a(i,7)*xtmp(ja(i,7)) +
& a(i,8)*xtmp(ja(i,8))
enddo
elseif (maxnz .eq. 9) then
do i=1,n
y(i) = a(i,1)*xtmp(ja(i,1)) +
& a(i,2)*xtmp(ja(i,2)) +
& a(i,3)*xtmp(ja(i,3)) +
& a(i,4)*xtmp(ja(i,4)) +
& a(i,5)*xtmp(ja(i,5)) +
& a(i,6)*xtmp(ja(i,6)) +
& a(i,7)*xtmp(ja(i,7)) +
& a(i,8)*xtmp(ja(i,8)) +
& a(i,9)*xtmp(ja(i,9))
enddo
elseif (maxnz .eq. 27) then
do i=1,n
y(i) = a(i,1)*xtmp(ja(i,1)) +
& a(i,2)*xtmp(ja(i,2)) +
& a(i,3)*xtmp(ja(i,3)) +
& a(i,4)*xtmp(ja(i,4)) +
& a(i,5)*xtmp(ja(i,5)) +
& a(i,6)*xtmp(ja(i,6)) +
& a(i,7)*xtmp(ja(i,7)) +
& a(i,8)*xtmp(ja(i,8)) +
& a(i,9)*xtmp(ja(i,9))
y(i) = y(i) + a(i,10)*xtmp(ja(i,10)) +
& a(i,11)*xtmp(ja(i,11)) +
& a(i,12)*xtmp(ja(i,12)) +
& a(i,13)*xtmp(ja(i,13)) +
& a(i,14)*xtmp(ja(i,14)) +
& a(i,15)*xtmp(ja(i,15)) +
& a(i,16)*xtmp(ja(i,16)) +
& a(i,17)*xtmp(ja(i,17)) +
& a(i,18)*xtmp(ja(i,18)) +
& a(i,19)*xtmp(ja(i,19))
y(i) = y(i) + a(i,20)*xtmp(ja(i,20)) +
& a(i,21)*xtmp(ja(i,21)) +
& a(i,22)*xtmp(ja(i,22)) +
& a(i,23)*xtmp(ja(i,23)) +
& a(i,24)*xtmp(ja(i,24)) +
& a(i,25)*xtmp(ja(i,25)) +
& a(i,26)*xtmp(ja(i,26)) +
& a(i,27)*xtmp(ja(i,27))
enddo
else
call JT_FillVectorFloat (n, zero, y, status)
do j=1,maxnz
do i=1,n
y(i) = y(i) + a(i,j)*xtmp(ja(i,j))
enddo
enddo
endif
c
9999 continue
#include "deallocate-y_eq_Ax_ELL.inc"
c
return
end
c***********************************************************************
#include "author.inc"
c* $Id: y_eq_Ax_spec.F,v 1.1 1995/11/14 02:19:37 turner Exp $
c*
c* Computes y=Ax where A is a matrix stored in coordinate format.
c*
c* <PARAMETER LIST>
c*
c* Input:
c* nelem - number of active elements in a
c* n - number of unknowns
c* a - matrix
c* ja - map array
c* x - x-vector
c*
c* Output:
c* y - y-vector
c* status - return status
c* -1 ==> invalid argument(s)
c* 0 ==> success
c*
c* <SUBROUTINES REQUIRED>
c*
c* JT_FillVectorFloat
c*
#include "copyright.inc"
c***********************************************************************
subroutine JT_y_eq_Ax_COO (nelem, n, a, ja, x, y, status)
implicit none
c
c ... Input:
integer nelem, n
integer ja(2*nelem)
real a(nelem), x(n)
c
c ... Output:
integer status
real y(n)
c
c ... Local:
integer i, j, ij
real zero
c
parameter (zero=0.0d0)
c
c ... Initialize return status.
status = 0
c
c ... Check arguments.
if (n.le.0 .or. nelem.le.0 .or. n.gt.nelem) then
status = -1
return
endif
c
call JT_FillVectorFloat (n, zero, y, status)
do ij=1,nelem
i = ja(ij)
j = ja(ij+nelem)
y(i) = y(i) + a(ij)*x(j)
enddo
c
return
end
c***********************************************************************
#include "author.inc"
c* $Id: y_eq_Ax_spec.F,v 1.1 1995/11/14 02:19:37 turner Exp $
c*
c* Computes y=Ax where A is a matrix stored in RSS format.
c*
c* <PARAMETER LIST>
c*
c* Input:
c* nelem - number of active elements in a
c* n - number of unknowns
c* a - matrix
c* ja - map array
c* x - x-vector
c*
c* Output:
c* y - y-vector
c* status - return status
c* -1 ==> invalid argument(s)
c* 0 ==> success
c*
c* <SUBROUTINES REQUIRED>
c*
c* JT_FillVectorFloat
c*
#include "copyright.inc"
c***********************************************************************
subroutine JT_y_eq_Ax_RSS (nelem, n, a, ja, x, y, status)
implicit none
c
c ... Input:
integer nelem, n
integer ja(nelem)
real a(nelem), x(n)
c
c ... Output:
integer status
real y(n)
c
c ... Local:
integer i, j, ij
real zero
c
parameter (zero=0.0d0)
c
c ... Initialize return status.
status = 0
c
c ... Check arguments.
if (n.le.0 .or. nelem.le.0 .or. n.gt.nelem) then
status = -1
return
endif
c
call JT_FillVectorFloat (n, zero, y, status)
do ij=1,nelem
i = (ja(ij) - 1)/n + 1
j = ja(ij) - (i-1)*n
y(i) = y(i) + a(ij)*x(j)
enddo
c
return
end
c***********************************************************************
#include "author.inc"
c* $Id: y_eq_Ax_spec.F,v 1.1 1995/11/14 02:19:37 turner Exp $
c*
c* Computes y=Ax where A is a matrix stored in CSS format.
c*
c* <PARAMETER LIST>
c*
c* Input:
c* nelem - number of active elements in a
c* n - number of unknowns
c* a - matrix
c* ja - map array
c* x - x-vector
c*
c* Output:
c* y - y-vector
c* status - return status
c* -1 ==> invalid argument(s)
c* 0 ==> success
c*
c* <SUBROUTINES REQUIRED>
c*
c* JT_FillVectorFloat
c*
#include "copyright.inc"
c***********************************************************************
subroutine JT_y_eq_Ax_CSS (nelem, n, a, ja, x, y, status)
implicit none
c
c ... Input:
integer nelem, n
integer ja(nelem)
real a(nelem), x(n)
c
c ... Output:
integer status
real y(n)
c
c ... Local:
integer i, j, ij
real zero
c
parameter (zero=0.0d0)
c
c ... Initialize return status.
status = 0
c
c ... Check arguments.
if (n.le.0 .or. nelem.le.0 .or. n.gt.nelem) then
status = -1
return
endif
c
call JT_FillVectorFloat (n, zero, y, status)
do ij=1,nelem
j = (ja(ij) - 1)/n + 1
i = ja(ij) - (j-1)*n
y(i) = y(i) + a(ij)*x(j)
enddo
c
return
end