Dgemv.java

package org.mklab.sdpj.gpack.blaswrap;

import org.mklab.nfc.matrix.ComplexNumericalMatrix;
import org.mklab.nfc.matrix.RealNumericalMatrix;
import org.mklab.nfc.scalar.ComplexNumericalScalar;
import org.mklab.nfc.scalar.RealNumericalScalar;


/**
 * @author takafumi
 * @param <RS> 実スカラーの型
 * @param <RM> 実行列の型
 * @param <CS> 複素スカラーの型
 * @param <CM> 複素行列の型
 */
public class Dgemv<RS extends RealNumericalScalar<RS, RM, CS, CM>, RM extends RealNumericalMatrix<RS, RM, CS, CM>, CS extends ComplexNumericalScalar<RS, RM, CS, CM>, CM extends ComplexNumericalMatrix<RS, RM, CS, CM>> {

  /*  Purpose   
      =======   
      DGEMV  performs one of the matrix-vector operations   
         y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
      where alpha and beta are scalars, x and y are vectors and A is an   
      m by n matrix.   
      Parameters   
      ==========   
      TRANS  - CHARACTER*1.   
               On entry, TRANS specifies the operation to be performed as   
               follows:   
                  TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   
                  TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
                  TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   
               Unchanged on exit.   
      M      - INTEGER.   
               On entry, M specifies the number of rows of the matrix A.   
               M must be at least zero.   
               Unchanged on exit.   
      N      - INTEGER.   
               On entry, N specifies the number of columns of the matrix A.   
               N must be at least zero.   
               Unchanged on exit.   
      ALPHA  - DOUBLE PRECISION.   
               On entry, ALPHA specifies the scalar alpha.   
               Unchanged on exit.   
      A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).   
               Before entry, the leading m by n part of the array A must   
               contain the matrix of coefficients.   
               Unchanged on exit.   
      LDA    - INTEGER.   
               On entry, LDA specifies the first dimension of A as declared   
               in the calling (sub) program. LDA must be at least   
               max( 1, m ).   
               Unchanged on exit.   
      X      - DOUBLE PRECISION array of DIMENSION at least   
               ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
               and at least   
               ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
               Before entry, the incremented array X must contain the   
               vector x.   
               Unchanged on exit.   
      INCX   - INTEGER.   
               On entry, INCX specifies the increment for the elements of   
               X. INCX must not be zero.   
               Unchanged on exit.   
      BETA   - DOUBLE PRECISION.   
               On entry, BETA specifies the scalar beta. When BETA is   
               supplied as zero then Y need not be set on input.   
               Unchanged on exit.   
      Y      - DOUBLE PRECISION array of DIMENSION at least   
               ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
               and at least   
               ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
               Before entry with BETA non-zero, the incremented array Y   
               must contain the vector y. On exit, Y is overwritten by the   
               updated vector y.   
      INCY   - INTEGER.   
               On entry, INCY specifies the increment for the elements of   
               Y. INCY must not be zero.   
               Unchanged on exit.   
      Level 2 Blas routine.   
      -- Written on 22-October-1986.   
         Jack Dongarra, Argonne National Lab.   
         Jeremy Du Croz, Nag Central Office.   
         Sven Hammarling, Nag Central Office.   
         Richard Hanson, Sandia National Labs.   
         Test the input parameters.   
         Parameter adjustments */

  /**
   * y=alpha*a*x+beta*y or y=alpha*a'*x+beta*y
   * 
   * @param trans unchanged
   * @param m unchanged Aの行数
   * @param n unchanged Aの列数
   * @param alpha unchanged
   * @param a unchanged matrix A
   * @param lda unchanged
   * @param x unchanged vector X
   * @param incx unchanged
   * @param beta unchanged
   * @param y is overwritten by the update vector y
   * @param incy unchanged
   * @return result
   */
  public int execute(String trans, int m, int n, RS alpha, RS[] a, int lda, RS[] x, int incx, RS beta, RS[] y, int incy) {
    final RS unit = a[0].createUnit();
    int a_dim1 = lda;
    int a_offset = 1 + a_dim1 * 1;
    int pointer_a = -a_offset;

    int info = 0;
    if (!BLAS.lsame(trans, "N") && !BLAS.lsame(trans, "T") && !BLAS.lsame(trans, "C")) { //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
      info = 1;
    } else if (m < 0) {
      info = 2;
    } else if (n < 0) {
      info = 3;
    } else if (lda < Math.max(1, m)) {
      info = 6;
    } else if (incx == 0) {
      info = 8;
    } else if (incy == 0) {
      info = 11;
    }
    if (info != 0) {
      BLAS.xerbla("DGEMV ", info); //$NON-NLS-1$
      return 0;
    }
    // Quick return if possible.
    if (m == 0 || n == 0 || alpha.isZero() && beta.isZero()) {
      return 0;
    }
    // Set  LENX  and  LENY, the lengths of the vectors x and y, and set up the start points in  X  and  Y. */
    int lenx;
    int leny;
    if (BLAS.lsame(trans, "N")) { //$NON-NLS-1$
      lenx = n;
      leny = m;
    } else {
      lenx = m;
      leny = n;
    }
    int kx;
    if (incx > 0) {
      kx = 1;
    } else {
      kx = 1 - (lenx - 1) * incx;
    }
    int ky;
    if (incy > 0) {
      ky = 1;
    } else {
      ky = 1 - (leny - 1) * incy;
    }
    /* Start the operations. In this version the elements of A are   
     * accessed sequentially with one pass through A.   
     * First form  y := beta*y.
     */
    if (!beta.equals(unit.createUnit())) {
      if (incy == 1) {
        if (beta.isZero()) {
          for (int i = 1; i <= leny; ++i) {
            y[i - 1] = unit.createZero();
          }
        } else {
          for (int i = 1; i <= leny; ++i) {
            y[i - 1] = beta.multiply(y[i - 1]);
          }
        }
      } else {
        int iy = ky;
        if (beta.isZero()) {
          for (int i = 1; i <= leny; ++i) {
            y[iy - 1] = unit.createZero();
            iy += incy;
          }
        } else {
          for (int i = 1; i <= leny; ++i) {
            y[iy - 1] = beta.multiply(y[iy - 1]);
            iy += incy;
          }
        }
      }
    }
    if (alpha.isZero()) {
      return 0;
    }
    if (BLAS.lsame(trans, "N")) { //$NON-NLS-1$
      // Form  y := alpha*A*x + y.
      int jx = kx;
      if (incy == 1) {
        for (int j = 1; j <= n; ++j) {
          if (!x[jx - 1].isZero()) {
            RS temp = alpha.multiply(x[jx - 1]);
            for (int i = 1; i <= m; ++i) {
              y[i - 1] = y[i - 1].add(temp.multiply(a[j * a_dim1 + i + pointer_a]));
            }
          }
          jx += incx;
        }
      } else {
        for (int j = 1; j <= n; ++j) {
          if (!x[jx - 1].isZero()) {
            RS temp = alpha.multiply(x[jx - 1]);
            int iy = ky;
            for (int i = 1; i <= m; ++i) {
              y[iy - 1] = y[iy - 1].add(temp.multiply(a[j * a_dim1 + i + pointer_a]));
              iy += incy;
            }
          }
          jx += incx;
        }
      }
    } else {
      // Form  y := alpha*A'*x + y.
      int jy = ky;
      if (incx == 1) {
        for (int j = 1; j <= n; ++j) {
          RS temp = unit.createZero();
          for (int i = 1; i <= m; ++i) {
            temp = temp.add(a[j * a_dim1 + i + pointer_a].multiply(x[i - 1]));
          }
          y[jy - 1] = y[jy - 1].add(alpha.multiply(temp));
          jy += incy;
        }
      } else {
        for (int j = 1; j <= n; ++j) {
          RS temp = unit.createZero();
          int ix = kx;
          for (int i = 1; i <= m; ++i) {
            temp = temp.add(a[j * a_dim1 + i + pointer_a].multiply(x[ix - 1]));
            ix += incx;
          }
          y[jy - 1] = y[jy - 1].add(alpha.multiply(temp));
          jy += incy;
        }
      }
    }

    return 0;
  }
}