Dsytrd.java

package org.mklab.sdpj.gpack.lapackwrap;

import org.mklab.nfc.matrix.ComplexNumericalMatrix;
import org.mklab.nfc.matrix.RealNumericalMatrix;
import org.mklab.nfc.scalar.ComplexNumericalScalar;
import org.mklab.nfc.scalar.RealNumericalScalar;
import org.mklab.sdpj.gpack.blaswrap.BLAS;


/**
 * @author koga
 * @version $Revision$, 2009/04/25
   * @param <RS> type of real scalar
   * @param <RM> type of real matrix
   * @param <CS> type of complex scalar
   * @param <CM> type of complex Matrix
 */
public class Dsytrd<RS extends RealNumericalScalar<RS, RM, CS, CM>, RM extends RealNumericalMatrix<RS, RM, CS, CM>, CS extends ComplexNumericalScalar<RS, RM, CS, CM>, CM extends ComplexNumericalMatrix<RS, RM, CS, CM>> {

  /** */
  private RS[] a;
  /** */
  private RS[] d;
  /** */
  private RS[] e;
  /** */
  private RS[] tau;
  /** */
  private RS[] work;

  /*  -- LAPACK routine (version 3.0) --   
  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
  Courant Institute, Argonne National Lab, and Rice University   
  June 30, 1999   


  Purpose   
  =======   

  DSYTRD reduces a real symmetric matrix A to real symmetric   
  tridiagonal form T by an orthogonal similarity transformation:   
  Q**T * A * Q = T.   

  Arguments   
  =========   

  UPLO    (input) CHARACTER*1   
       = 'U':  Upper triangle of A is stored;   
       = 'L':  Lower triangle of A is stored.   

  N       (input) INTEGER   
       The order of the matrix A.  N >= 0.   

  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)   
       On entry, the symmetric matrix A.  If UPLO = 'U', the leading   
       N-by-N upper triangular part of A contains the upper   
       triangular part of the matrix A, and the strictly lower   
       triangular part of A is not referenced.  If UPLO = 'L', the   
       leading N-by-N lower triangular part of A contains the lower   
       triangular part of the matrix A, and the strictly upper   
       triangular part of A is not referenced.   
       On exit, if UPLO = 'U', the diagonal and first superdiagonal   
       of A are overwritten by the corresponding elements of the   
       tridiagonal matrix T, and the elements above the first   
       superdiagonal, with the array TAU, represent the orthogonal   
       matrix Q as a product of elementary reflectors; if UPLO   
       = 'L', the diagonal and first subdiagonal of A are over-   
       written by the corresponding elements of the tridiagonal   
       matrix T, and the elements below the first subdiagonal, with   
       the array TAU, represent the orthogonal matrix Q as a product   
       of elementary reflectors. See Further Details.   

  LDA     (input) INTEGER   
       The leading dimension of the array A.  LDA >= max(1,N).   

  D       (output) DOUBLE PRECISION array, dimension (N)   
       The diagonal elements of the tridiagonal matrix T:   
       D(i) = A(i,i).   

  RS       (output) DOUBLE PRECISION array, dimension (N-1)   
       The off-diagonal elements of the tridiagonal matrix T:   
       E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.   

  TAU     (output) DOUBLE PRECISION array, dimension (N-1)   
       The scalar factors of the elementary reflectors (see Further   
       Details).   

  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)   
       On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

  LWORK   (input) INTEGER   
       The dimension of the array WORK.  LWORK >= 1.   
       For optimum performance LWORK >= N*NB, where NB is the   
       optimal blocksize.   

       If LWORK = -1, then a workspace query is assumed; the routine   
       only calculates the optimal size of the WORK array, returns   
       this value as the first entry of the WORK array, and no error   
       message related to LWORK is issued by XERBLA.   

  INFO    (output) INTEGER   
       = 0:  successful exit   
       < 0:  if INFO = -i, the i-th argument had an illegal value   

  Further Details   
  ===============   

  If UPLO = 'U', the matrix Q is represented as a product of elementary   
  reflectors   

  Q = H(n-1) . . . H(2) H(1).   

  Each H(i) has the form   

  H(i) = I - tau * v * v'   

  where tau is a real scalar, and v is a real vector with   
  v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in   
  A(1:i-1,i+1), and tau in TAU(i).   

  If UPLO = 'L', the matrix Q is represented as a product of elementary   
  reflectors   

  Q = H(1) H(2) . . . H(n-1).   

  Each H(i) has the form   

  H(i) = I - tau * v * v'   

  where tau is a real scalar, and v is a real vector with   
  v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),   
  and tau in TAU(i).   

  The contents of A on exit are illustrated by the following examples   
  with n = 5:   

  if UPLO = 'U':                       if UPLO = 'L':   

  (  d   e   v2  v3  v4 )              (  d                  )   
  (      d   e   v3  v4 )              (  e   d              )   
  (          d   e   v4 )              (  v1  e   d          )   
  (              d   e  )              (  v1  v2  e   d      )   
  (                  d  )              (  v1  v2  v3  e   d  )   

  where d and e denote diagonal and off-diagonal elements of T, and vi   
  denotes an element of the vector defining H(i).   

  =====================================================================
  */
  /**
   * @param uplo uplo
   * @param n n
   * @param ai ai
   * @param lda lda
   * @param di di
   * @param ei ei
   * @param taui taui
   * @param worki worki
   * @param lwork lwork
   * @return result
   */
  int execute(String uplo, int n, RS[] ai, int lda, RS[] di, RS[] ei, RS[] taui, RS[] worki, int lwork) {
    final RS unit = ai[0].createUnit();

    this.a = ai;
    this.d = di;
    this.e = ei;
    this.tau = taui;
    this.work = worki;

    RS c_b22 = unit.create(-1);

    int a_dim1 = lda;
    int a_offset = 1 + a_dim1 * 1;
    int pointer_a = -a_offset;
    int pointer_d = -1;
    int pointer_e = -1;
    int pointer_tau = -1;
    int pointer_work = -1;

    int info = 0;
    boolean upper = BLAS.lsame(uplo, "U"); //$NON-NLS-1$
    boolean lquery = (lwork == -1);
    if (!upper && !BLAS.lsame(uplo, "L")) { //$NON-NLS-1$
      info = -1;
    } else if (n < 0) {
      info = -2;
    } else if (lda < Math.max(1, n)) {
      info = -4;
    } else if (lwork < 1 && !lquery) {
      info = -9;
    }

    int lwkopt = 0;
    int nb = 0;
    if (info == 0) {
      // Determine the block size.
      nb = Clapack.ilaenv(1, "DSYTRD", uplo, n, -1, -1, -1, 6, 1, unit); //$NON-NLS-1$
      lwkopt = n * nb;
      this.work[pointer_work + 1] = unit.create(lwkopt);
    }

    if (info != 0) {
      BLAS.xerbla("DSYTRD", -(info)); //$NON-NLS-1$
      return info;
    } else if (lquery) {
      return info;
    }

    // Quick return if possible
    if (n == 0) {
      this.work[pointer_work + 1] = unit.create(1);
      return info;
    }

    int ldwork = 0;
    int nx = n;
    int iws = 1;
    if (nb > 1 && nb < n) {
      /* Determine when to cross over from blocked to unblocked code   
         (last block is always handled by unblocked code). */
      int i2 = Clapack.ilaenv(3, "DSYTRD", uplo, n, -1, -1, -1, 6, 1, unit); //$NON-NLS-1$
      nx = Math.max(nb, i2);
      if (nx < n) {
        /* Determine if workspace is large enough for blocked code. */

        ldwork = n;
        iws = ldwork * nb;
        if (lwork < iws) {
          /* Not enough workspace to use optimal NB:  determine the   
             minimum value of NB, and reduce NB or force use of   
             unblocked code by setting NX = N. */
          nb = Math.max(lwork / ldwork, 1);
          int nbmin = Clapack.ilaenv(2, "DSYTRD", uplo, n, -1, -1, -1, 6, 1, unit); //$NON-NLS-1$
          if (nb < nbmin) {
            nx = n;
          }
        }
      } else {
        nx = n;
      }
    } else {
      nb = 1;
    }

    if (upper) {
      /* Reduce the upper triangle of A. Columns 1:kk are handled by the unblocked method. */
      int kk = n - (n - nx + nb - 1) / nb * nb;
      for (int i = n - nb + 1; -nb < 0 ? i >= kk + 1 : i <= kk + 1; i += -nb) {
        /* Reduce columns i:i+nb-1 to tridiagonal form and form the   
           matrix W which is needed to update the unreduced part of the matrix */
        dlatrd(uplo, i + nb - 1, nb, pointer_a + a_offset, lda, pointer_e + 1, pointer_tau + 1, pointer_work + 1, ldwork);

        /* Update the unreduced submatrix A(1:i-1,1:i-1), using an update of the form:  A := A - V*W' - W*V' */
        RS c_b23 = unit.createUnit();
        dsyr2k(uplo, "No transpose", i - 1, nb, c_b22, (i) * a_dim1 + 1 + pointer_a, lda, pointer_work + 1, ldwork, c_b23, pointer_a + a_offset, lda); //$NON-NLS-1$

        /* Copy superdiagonal elements back into A, and diagonal elements into D */
        for (int j = i; j <= i + nb - 1; ++j) {
          this.a[(j) * a_dim1 + j - 1 + pointer_a] = this.e[pointer_e + j - 1];
          this.d[pointer_d + j] = this.a[(j) * a_dim1 + j + pointer_a];
        }
      }

      /*        Use unblocked code to reduce the last or only block */
      info = dsytd2(uplo, kk, pointer_a + a_offset, lda, pointer_d + 1, pointer_e + 1, pointer_tau + 1);
    } else {
      int i;
      for (i = 1; nb < 0 ? i >= n - nx : i <= n - nx; i += nb) {
        /* Reduce columns i:i+nb-1 to tridiagonal form and form the   
           matrix W which is needed to update the unreduced part of the matrix */
        dlatrd(uplo, n - i + 1, nb, i * a_dim1 + i + pointer_a, lda, pointer_e + i, pointer_tau + i, pointer_work + 1, ldwork);

        /* Update the unreduced submatrix A(i+ib:n,i+ib:n), using an update of the form:  A := A - V*W' - W*V' */
        RS c_b23 = unit.createUnit();
        dsyr2k(uplo, "No transpose", n - i - nb + 1, nb, c_b22, (i) * a_dim1 + i + nb + pointer_a, lda, pointer_work + nb + 1, ldwork, c_b23, (i + nb) * a_dim1 + i + nb + pointer_a, lda); //$NON-NLS-1$

        /* Copy subdiagonal elements back into A, and diagonal elements into D */
        for (int j = i; j <= i + nb - 1; ++j) {
          this.a[(j) * a_dim1 + j + 1 + pointer_a] = this.e[pointer_e + j];
          this.d[pointer_d + j] = this.a[(j) * a_dim1 + j + pointer_a];
        }
      }

      // Use unblocked code to reduce the last or only block *
      info = dsytd2(uplo, n - i + 1, (i) * a_dim1 + i + pointer_a, lda, pointer_d + i, pointer_e + i, pointer_tau + i);
    }

    this.work[pointer_work + 1] = unit.create(lwkopt);
    return info;
  }

  /**
   * 変更されるパラメータ：a e tau w
   * 
   * @param uplo uplo
   * @param n n
   * @param nb nb
   * @param aIndex aIndex
   * @param lda lda
   * @param eIndex eIndex
   * @param tauIndex tauIndex
   * @param wIndex wIndex
   * @param ldw ldw
   */
  private void dlatrd(String uplo, int n, int nb, int aIndex, int lda, int eIndex, int tauIndex, int wIndex, int ldw) {
    final RS unit = this.a[0].createUnit();

    RS[] aTemp = unit.createArray(this.a.length - aIndex);
    RS[] eTemp = unit.createArray(this.e.length - eIndex);
    RS[] tauTemp = unit.createArray(this.tau.length - tauIndex);
    RS[] wTemp = unit.createArray(this.work.length - wIndex);
    System.arraycopy(this.a, aIndex, aTemp, 0, aTemp.length);
    System.arraycopy(this.e, eIndex, eTemp, 0, eTemp.length);
    System.arraycopy(this.tau, tauIndex, tauTemp, 0, tauTemp.length);
    System.arraycopy(this.work, wIndex, wTemp, 0, wTemp.length);

    Clapack.dlatrd(uplo, n, nb, aTemp, lda, eTemp, tauTemp, wTemp, ldw);

    System.arraycopy(aTemp, 0, this.a, aIndex, aTemp.length);
    System.arraycopy(eTemp, 0, this.e, eIndex, eTemp.length);
    System.arraycopy(tauTemp, 0, this.tau, tauIndex, tauTemp.length);
    System.arraycopy(wTemp, 0, this.work, wIndex, wTemp.length);
  }

  /**
   * 引数（行列）は、この関数内ではどの呼び出しも順に a[] work[] a[] なのでIndexのみ渡すように変更。 Cのみ変更される。
   * 
   * @param uplo uplo
   * @param trans trans
   * @param n n
   * @param k k
   * @param alpha alpha
   * @param aIndex 行列aの部分行列の先頭
   * @param lda lda
   * @param bIndex 行列workの部分行列の先頭
   * @param ldb ldb
   * @param beta beta
   * @param cIndex 行列aの部分行列の先頭
   * @param ldc ldc
   */
  private void dsyr2k(String uplo, String trans, int n, int k, RS alpha, int aIndex, int lda, int bIndex, int ldb, RS beta, int cIndex, int ldc) {
    final RS unit = alpha.createUnit();

    RS[] aTemp = unit.createArray(this.a.length - aIndex);
    RS[] bTemp = unit.createArray(this.work.length - bIndex);
    RS[] cTemp = unit.createArray(this.a.length - cIndex);
    System.arraycopy(this.a, aIndex, aTemp, 0, aTemp.length);
    System.arraycopy(this.work, bIndex, bTemp, 0, bTemp.length);
    System.arraycopy(this.a, cIndex, cTemp, 0, cTemp.length);

    BLAS.dsyr2k(uplo, trans, n, k, alpha, aTemp, lda, bTemp, ldb, beta, cTemp, ldc);
    System.arraycopy(cTemp, 0, this.a, cIndex, cTemp.length);
  }

  /**
   * 変更されるパラメータ：a d e tau info
   * 
   * @param uplo uplo
   * @param n n
   * @param aIndex a
   * @param lda lda
   * @param dIndex d
   * @param eIndex e
   * @param tauIndex tau
   * @return result
   */
  private int dsytd2(String uplo, int n, int aIndex, int lda, int dIndex, int eIndex, int tauIndex) {
    final RS unit = this.a[0].createUnit();

    RS[] aTemp = unit.createArray(this.a.length - aIndex);
    RS[] dTemp = unit.createArray(this.d.length - dIndex);
    RS[] eTemp = unit.createArray(this.e.length - eIndex);
    RS[] tauTemp = unit.createArray(this.tau.length - tauIndex);

    System.arraycopy(this.a, aIndex, aTemp, 0, aTemp.length);
    System.arraycopy(this.d, dIndex, dTemp, 0, dTemp.length);
    System.arraycopy(this.e, eIndex, eTemp, 0, eTemp.length);
    System.arraycopy(this.tau, tauIndex, tauTemp, 0, tauTemp.length);
    int info = Clapack.dsytd2(uplo, n, aTemp, lda, dTemp, eTemp, tauTemp);

    System.arraycopy(aTemp, 0, this.a, aIndex, aTemp.length);
    System.arraycopy(dTemp, 0, this.d, dIndex, dTemp.length);
    System.arraycopy(eTemp, 0, this.e, eIndex, eTemp.length);
    System.arraycopy(tauTemp, 0, this.tau, tauIndex, tauTemp.length);
    return info;
  }
}