Dsyr2k.java

package org.mklab.sdpj.gpack.blaswrap;

import org.mklab.nfc.matrix.ComplexNumericalMatrix;
import org.mklab.nfc.matrix.RealNumericalMatrix;
import org.mklab.nfc.scalar.ComplexNumericalScalar;
import org.mklab.nfc.scalar.RealNumericalScalar;


/**
 * @author koga
 * @version $Revision$, 2009/04/24
 * @param <RS> 実スカラーの型
 * @param <RM> 実行列の型
 * @param <CS> 複素スカラーの型
 * @param <CM> 複素行列の型
 */
public class Dsyr2k<RS extends RealNumericalScalar<RS, RM, CS, CM>, RM extends RealNumericalMatrix<RS, RM, CS, CM>, CS extends ComplexNumericalScalar<RS, RM, CS, CM>, CM extends ComplexNumericalMatrix<RS, RM, CS, CM>> {

  /*  Purpose   
  =======   
  DSYR2K  performs one of the symmetric rank 2k operations   
     C := alpha*A*B' + alpha*B*A' + beta*C,   
  or   
     C := alpha*A'*B + alpha*B'*A + beta*C,   
  where  alpha and beta  are scalars, C is an  n by n  symmetric matrix   
  and  A and B  are  n by k  matrices  in the  first  case  and  k by n   
  matrices in the second case.   
  Parameters   
  ==========   
  UPLO   - CHARACTER*1.   
           On  entry,   UPLO  specifies  whether  the  upper  or  lower   
           triangular  part  of the  array  C  is to be  referenced  as   
           follows:   
              UPLO = 'U' or 'u'   Only the  upper triangular part of  C   
                                  is to be referenced.   
              UPLO = 'L' or 'l'   Only the  lower triangular part of  C   
                                  is to be referenced.   
           Unchanged on exit.   
  TRANS  - CHARACTER*1.   
           On entry,  TRANS  specifies the operation to be performed as   
           follows:   
              TRANS = 'N' or 'n'   C := alpha*A*B' + alpha*B*A' +   
                                        beta*C.   
              TRANS = 'T' or 't'   C := alpha*A'*B + alpha*B'*A +   
                                        beta*C.   
              TRANS = 'C' or 'c'   C := alpha*A'*B + alpha*B'*A +   
                                        beta*C.   
           Unchanged on exit.   
  N      - INTEGER.   
           On entry,  N specifies the order of the matrix C.  N must be   
           at least zero.   
           Unchanged on exit.   
  K      - INTEGER.   
           On entry with  TRANS = 'N' or 'n',  K  specifies  the number   
           of  columns  of the  matrices  A and B,  and on  entry  with   
           TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number   
           of rows of the matrices  A and B.  K must be at least  zero.   
           Unchanged on exit.   
  ALPHA  - DOUBLE PRECISION.   
           On entry, ALPHA specifies the scalar alpha.   
           Unchanged on exit.   
  A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is   
           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.   
           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k   
           part of the array  A  must contain the matrix  A,  otherwise   
           the leading  k by n  part of the array  A  must contain  the   
           matrix A.   
           Unchanged on exit.   
  LDA    - INTEGER.   
           On entry, LDA specifies the first dimension of A as declared   
           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'   
           then  LDA must be at least  max( 1, n ), otherwise  LDA must   
           be at least  max( 1, k ).   
           Unchanged on exit.   
  B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is   
           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.   
           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k   
           part of the array  B  must contain the matrix  B,  otherwise   
           the leading  k by n  part of the array  B  must contain  the   
           matrix B.   
           Unchanged on exit.   
  LDB    - INTEGER.   
           On entry, LDB specifies the first dimension of B as declared   
           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'   
           then  LDB must be at least  max( 1, n ), otherwise  LDB must   
           be at least  max( 1, k ).   
           Unchanged on exit.   
  BETA   - DOUBLE PRECISION.   
           On entry, BETA specifies the scalar beta.   
           Unchanged on exit.   
  C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).   
           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n   
           upper triangular part of the array C must contain the upper   
           triangular part  of the  symmetric matrix  and the strictly   
           lower triangular part of C is not referenced.  On exit, the   
           upper triangular part of the array  C is overwritten by the   
           upper triangular part of the updated matrix.   
           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n   
           lower triangular part of the array C must contain the lower   
           triangular part  of the  symmetric matrix  and the strictly   
           upper triangular part of C is not referenced.  On exit, the   
           lower triangular part of the array  C is overwritten by the   
           lower triangular part of the updated matrix.   
  LDC    - INTEGER.   
           On entry, LDC specifies the first dimension of C as declared   
           in  the  calling  (sub)  program.   LDC  must  be  at  least   
           max( 1, n ).   
           Unchanged on exit.   
  Level 3 Blas routine.   
  -- Written on 8-February-1989.   
     Jack Dongarra, Argonne National Laboratory.   
     Iain Duff, AERE Harwell.   
     Jeremy Du Croz, Numerical Algorithms Group Ltd.   
     Sven Hammarling, Numerical Algorithms Group Ltd.   
     Test the input parameters.   
     Parameter adjustments */

  /**
   * @param uplo uplo
   * @param trans trans
   * @param n n
   * @param k k
   * @param alpha alpha
   * @param a a
   * @param lda lda
   * @param b b
   * @param ldb ldb
   * @param beta beta
   * @param c c
   * @param ldc ldc
   * @return result
   */
  public int execute(String uplo, String trans, int n, int k, RS alpha, RS[] a, int lda, RS[] b, int ldb, RS beta, RS[] c, int ldc) {
    final RS unit = a[0].createUnit();

    int a_dim1 = lda;
    int a_offset = 1 + a_dim1 * 1;
    int pointer_a = -a_offset;

    int b_dim1 = ldb;
    int b_offset = 1 + b_dim1 * 1;
    int pointer_b = -b_offset;

    int c_dim1 = ldc;
    int c_offset = 1 + c_dim1 * 1;
    int pointer_c = -c_offset;

    int nrowa;
    if (BLAS.lsame(trans, "N")) { //$NON-NLS-1$
      nrowa = n;
    } else {
      nrowa = k;
    }
    boolean upper = BLAS.lsame(uplo, "U"); //$NON-NLS-1$

    int info = 0;
    if (!upper && !BLAS.lsame(uplo, "L")) { //$NON-NLS-1$
      info = 1;
    } else if (!BLAS.lsame(trans, "N") && !BLAS.lsame(trans, "T") && !BLAS.lsame(trans, "C")) { //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
      info = 2;
    } else if (n < 0) {
      info = 3;
    } else if (k < 0) {
      info = 4;
    } else if (lda < Math.max(1, nrowa)) {
      info = 7;
    } else if (ldb < Math.max(1, nrowa)) {
      info = 9;
    } else if (ldc < Math.max(1, n)) {
      info = 12;
    }
    if (info != 0) {
      BLAS.xerbla("DSYR2K", info); //$NON-NLS-1$
      return 0;
    }
    // Quick return if possible.
    if (n == 0 || (alpha.isZero() || k == 0) && beta.equals(unit.create(1))) {
      return 0;
    }
    // And when  alpha.eq.zero.
    if (alpha.isZero()) {
      if (upper) {
        if (beta.isZero()) {
          for (int j = 1; j <= n; ++j) {
            for (int i = 1; i <= j; ++i) {
              c[j * c_dim1 + i + pointer_c] = unit.createZero();
            }
          }
        } else {
          for (int j = 1; j <= n; ++j) {
            for (int i = 1; i <= j; ++i) {
              int p = j * c_dim1 + i + pointer_c;
              c[p] = beta.multiply(c[p]);
            }
          }
        }
      } else {
        if (beta.isZero()) {
          for (int j = 1; j <= n; ++j) {
            for (int i = j; i <= n; ++i) {
              c[j * c_dim1 + i + pointer_c] = unit.createZero();
            }
          }
        } else {
          for (int j = 1; j <= n; ++j) {
            for (int i = j; i <= n; ++i) {
              int p = j * c_dim1 + i + pointer_c;
              c[p] = beta.multiply(c[p]);
            }
          }
        }
      }
      return 0;
    }

    if (BLAS.lsame(trans, "N")) { //$NON-NLS-1$
      // Form  C := alpha*A*B' + alpha*B*A' + C.
      if (upper) {
        for (int j = 1; j <= n; ++j) {
          if (beta.isZero()) {
            for (int i = 1; i <= j; ++i) {
              c[j * c_dim1 + i + pointer_c] = unit.createZero();
            }
          } else if (!beta.equals(unit.createUnit())) {
            for (int i = 1; i <= j; ++i) {
              int p = j * c_dim1 + i + pointer_c;
              c[p] = beta.multiply(c[p]);
            }
          }
          for (int l = 1; l <= k; ++l) {
            int aj = l * a_dim1 + j + pointer_a;
            int bj = l * b_dim1 + j + pointer_b;
            if (!a[aj].isZero() || !b[bj].isZero()) {
              RS temp1 = alpha.multiply(b[bj]);
              RS temp2 = alpha.multiply(a[aj]);
              for (int i = 1; i <= j; ++i) {
                int ci = j * c_dim1 + i + pointer_c;
                int ai = l * a_dim1 + i + pointer_a;
                int bi = l * b_dim1 + i + pointer_b;
                c[ci] = c[ci].add(a[ai].multiply(temp1)).add(b[bi].multiply(temp2));
              }
            }
          }
        }
      } else {
        for (int j = 1; j <= n; ++j) {
          if (beta.isZero()) {
            for (int i = j; i <= n; ++i) {
              c[j * c_dim1 + i + pointer_c] = unit.createZero();
            }
          } else if (!beta.equals(unit.createUnit())) {
            for (int i = j; i <= n; ++i) {
              int p = j * c_dim1 + i + pointer_c;
              c[p] = beta.multiply(c[p]);
            }
          }
          for (int l = 1; l <= k; ++l) {
            int aj = l * a_dim1 + j + pointer_a;
            int bj = l * b_dim1 + j + pointer_b;
            if (!a[aj].isZero() || !b[bj].isZero()) {
              RS temp1 = alpha.multiply(b[bj]);
              RS temp2 = alpha.multiply(a[aj]);
              for (int i = j; i <= n; ++i) {
                int ci = j * c_dim1 + i + pointer_c;
                int ai = l * a_dim1 + i + pointer_a;
                int bi = l * b_dim1 + i + pointer_b;
                c[ci] = c[ci].add(a[ai].multiply(temp1)).add(b[bi].multiply(temp2));
              }
            }
          }
        }
      }
    } else {
      // Form  C := alpha*A'*B + alpha*B'*A + C.
      if (upper) {
        for (int j = 1; j <= n; ++j) {
          for (int i = 1; i <= j; ++i) {
            RS temp1 = unit.createZero();
            RS temp2 = unit.createZero();
            for (int l = 1; l <= k; ++l) {
              temp1 = temp1.add(a[i * a_dim1 + l + pointer_a].multiply(b[j * b_dim1 + l + pointer_b]));
              temp2 = temp2.add(b[i * b_dim1 + l + pointer_b].multiply(a[j * a_dim1 + l + pointer_a]));
            }
            int p = j * c_dim1 + i + pointer_c;
            if (beta.isZero()) {
              c[p] = (alpha.multiply(temp1)).add(alpha.multiply(temp2));
            } else {
              c[p] = (beta.multiply(c[p])).add(alpha.multiply(temp1)).add(alpha.multiply(temp2));
            }
          }
        }
      } else {
        for (int j = 1; j <= n; ++j) {
          for (int i = j; i <= n; ++i) {
            RS temp1 = unit.createZero();
            RS temp2 = unit.createZero();
            for (int l = 1; l <= k; ++l) {
              temp1 = temp1.add(a[i * a_dim1 + l + pointer_a].multiply(b[j * b_dim1 + l + pointer_b]));
              temp2 = temp2.add(b[i * b_dim1 + l + pointer_b].multiply(a[j * a_dim1 + l + pointer_a]));
            }
            int p = j * c_dim1 + i + pointer_c;
            if (beta.isZero()) {
              c[p] = alpha.multiply(temp1).add(alpha.multiply(temp2));
            } else {
              c[p] = beta.multiply(c[p]).add(alpha.multiply(temp1)).add(alpha.multiply(temp2));
            }
          }
        }
      }
    }
    return 0;
  }
}