Dlatrd.java

package org.mklab.sdpj.gpack.lapackwrap;

import org.mklab.nfc.matrix.ComplexNumericalMatrix;
import org.mklab.nfc.matrix.RealNumericalMatrix;
import org.mklab.nfc.scalar.ComplexNumericalScalar;
import org.mklab.nfc.scalar.RealNumericalScalar;
import org.mklab.sdpj.gpack.blaswrap.BLAS;


/**
 * @author koga
 * @version $Revision$, 2009/04/25
   * @param <RS> type of real scalar
   * @param <RM> type of real matrix
   * @param <CS> type of complex scalar
   * @param <CM> type of complex Matrix
 */
public class Dlatrd<RS extends RealNumericalScalar<RS, RM, CS, CM>, RM extends RealNumericalMatrix<RS, RM, CS, CM>, CS extends ComplexNumericalScalar<RS, RM, CS, CM>, CM extends ComplexNumericalMatrix<RS, RM, CS, CM>> {

  /** */
  private RS[] a;
  /** */
  private RS[] w;

  /*  -- LAPACK auxiliary routine (version 3.0) --   
  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
  Courant Institute, Argonne National Lab, and Rice University   
  October 31, 1992   


  Purpose   
  =======   

  DLATRD reduces NB rows and columns of a real symmetric matrix A to   
  symmetric tridiagonal form by an orthogonal similarity   
  transformation Q' * A * Q, and returns the matrices V and W which are   
  needed to apply the transformation to the unreduced part of A.   

  If UPLO = 'U', DLATRD reduces the last NB rows and columns of a   
  matrix, of which the upper triangle is supplied;   
  if UPLO = 'L', DLATRD reduces the first NB rows and columns of a   
  matrix, of which the lower triangle is supplied.   

  This is an auxiliary routine called by DSYTRD.   

  Arguments   
  =========   

  UPLO    (input) CHARACTER   
       Specifies whether the upper or lower triangular part of the   
       symmetric matrix A is stored:   
       = 'U': Upper triangular   
       = 'L': Lower triangular   

  N       (input) INTEGER   
       The order of the matrix A.   

  NB      (input) INTEGER   
       The number of rows and columns to be reduced.   

  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)   
       On entry, the symmetric matrix A.  If UPLO = 'U', the leading   
       n-by-n upper triangular part of A contains the upper   
       triangular part of the matrix A, and the strictly lower   
       triangular part of A is not referenced.  If UPLO = 'L', the   
       leading n-by-n lower triangular part of A contains the lower   
       triangular part of the matrix A, and the strictly upper   
       triangular part of A is not referenced.   
       On exit:   
       if UPLO = 'U', the last NB columns have been reduced to   
         tridiagonal form, with the diagonal elements overwriting   
         the diagonal elements of A; the elements above the diagonal   
         with the array TAU, represent the orthogonal matrix Q as a   
         product of elementary reflectors;   
       if UPLO = 'L', the first NB columns have been reduced to   
         tridiagonal form, with the diagonal elements overwriting   
         the diagonal elements of A; the elements below the diagonal   
         with the array TAU, represent the  orthogonal matrix Q as a   
         product of elementary reflectors.   
       See Further Details.   

  LDA     (input) INTEGER   
       The leading dimension of the array A.  LDA >= (1,N).   

  RS       (output) DOUBLE PRECISION array, dimension (N-1)   
       If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal   
       elements of the last NB columns of the reduced matrix;   
       if UPLO = 'L', E(1:nb) contains the subdiagonal elements of   
       the first NB columns of the reduced matrix.   

  TAU     (output) DOUBLE PRECISION array, dimension (N-1)   
       The scalar factors of the elementary reflectors, stored in   
       TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'.   
       See Further Details.   

  W       (output) DOUBLE PRECISION array, dimension (LDW,NB)   
       The n-by-nb matrix W required to update the unreduced part   
       of A.   

  LDW     (input) INTEGER   
       The leading dimension of the array W. LDW >= max(1,N).   

  Further Details   
  ===============   

  If UPLO = 'U', the matrix Q is represented as a product of elementary   
  reflectors   

  Q = H(n) H(n-1) . . . H(n-nb+1).   

  Each H(i) has the form   

  H(i) = I - tau * v * v'   

  where tau is a real scalar, and v is a real vector with   
  v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i),   
  and tau in TAU(i-1).   

  If UPLO = 'L', the matrix Q is represented as a product of elementary   
  reflectors   

  Q = H(1) H(2) . . . H(nb).   

  Each H(i) has the form   

  H(i) = I - tau * v * v'   

  where tau is a real scalar, and v is a real vector with   
  v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i),   
  and tau in TAU(i).   

  The elements of the vectors v together form the n-by-nb matrix V   
  which is needed, with W, to apply the transformation to the unreduced   
  part of the matrix, using a symmetric rank-2k update of the form:   
  A := A - V*W' - W*V'.   

  The contents of A on exit are illustrated by the following examples   
  with n = 5 and nb = 2:   

  if UPLO = 'U':                       if UPLO = 'L':   

  (  a   a   a   v4  v5 )              (  d                  )   
  (      a   a   v4  v5 )              (  1   d              )   
  (          a   1   v5 )              (  v1  1   a          )   
  (              d   1  )              (  v1  v2  a   a      )   
  (                  d  )              (  v1  v2  a   a   a  )   

  where d denotes a diagonal element of the reduced matrix, a denotes   
  an element of the original matrix that is unchanged, and vi denotes   
  an element of the vector defining H(i).   

  =====================================================================   
  */
  /**
   * @param uplo uplo
   * @param n n
   * @param nb nb
   * @param ai ai
   * @param lda lda
   * @param e e
   * @param tau tau
   * @param wi wi
   * @param ldw ldw
   * @return result
   */
  int execute(String uplo, int n, int nb, RS[] ai, int lda, RS[] e, RS[] tau, RS[] wi, int ldw) {
    final RS unit = ai[0].createUnit();

    this.a = ai;
    this.w = wi;

    RS c_b5 = unit.create(-1);
    RS c_b6 = unit.create(1);
    RS c_b16 = unit.create(0);

    int a_dim1 = lda;
    int a_offset = 1 + a_dim1 * 1;
    int pointer_a = -a_offset;
    int pointer_e = -1;
    int pointer_tau = -1;
    int w_dim1 = ldw;
    int w_offset = 1 + w_dim1 * 1;
    int pointer_w = -w_offset;

    if (n <= 0) {
      return 0;
    }

    if (BLAS.lsame(uplo, "U")) { //$NON-NLS-1$
      /* Reduce last NB columns of upper triangle */
      for (int i = n; i >= n - nb + 1; --i) {
        int iw = i - n + nb;
        if (i < n) {
          /* Update A(1:i,i) */
          dgemv_awa("No transpose", i, n - i, c_b5, (i + 1) * a_dim1 + 1 + pointer_a, lda, (iw + 1) * w_dim1 + i + pointer_w, ldw, c_b6, i * a_dim1 + 1 + pointer_a, 1); //$NON-NLS-1$
          dgemv_waa("No transpose", i, n - i, c_b5, (iw + 1) * w_dim1 + 1 + pointer_w, ldw, (i + 1) * a_dim1 + i + pointer_a, lda, c_b6, i * a_dim1 + 1 + pointer_a, 1); //$NON-NLS-1$
        }

        if (i > 1) {
          /* Generate elementary reflector H(i) to annihilate A(1:i-2,i) */
          RS alpha1 = this.a[i * a_dim1 + i - 1 + pointer_a];
          RS[] ans = dlarfg(i - 1, alpha1, i * a_dim1 + 1 + pointer_a, 1, tau[pointer_tau + i - 1]);
          this.a[i * a_dim1 + i - 1 + pointer_a] = ans[0];
          tau[pointer_tau + i - 1] = ans[1];

          e[pointer_e + i - 1] = this.a[i * a_dim1 + i - 1 + pointer_a];
          this.a[i * a_dim1 + i - 1 + pointer_a] = unit.createUnit();

          /* Compute W(1:i-1,i) */
          dsymv("Upper", i - 1, c_b6, a_offset + pointer_a, lda, i * a_dim1 + 1 + pointer_a, 1, c_b16, (iw) * w_dim1 + 1 + pointer_w, 1); //$NON-NLS-1$

          if (i < n) {
            dgemv_waa("Transpose", i - 1, n - i, c_b6, (iw + 1) * w_dim1 + 1 + pointer_w, ldw, i * a_dim1 + 1 + pointer_a, 1, c_b16, (iw) * w_dim1 + i + 1 + pointer_w, 1); //$NON-NLS-1$
            dgemv_aww("No transpose", i - 1, n - i, c_b5, (i + 1) * a_dim1 + 1 + pointer_a, lda, (iw) * w_dim1 + i + 1 + pointer_w, 1, c_b6, (iw) * w_dim1 + 1 + pointer_w, 1); //$NON-NLS-1$
            dgemv_aaw("Transpose", i - 1, n - i, c_b6, (i + 1) * a_dim1 + 1 + pointer_a, lda, i * a_dim1 + 1 + pointer_a, 1, c_b16, iw * w_dim1 + i + 1 + pointer_w, 1); //$NON-NLS-1$
            dgemv_www("No transpose", i - 1, n - i, c_b5, (iw + 1) * w_dim1 + 1 + pointer_w, ldw, (iw) * w_dim1 + i + 1 + pointer_w, 1, c_b6, (iw) * w_dim1 + 1 + pointer_w, 1); //$NON-NLS-1$
          }

          dscal(i - 1, tau[pointer_tau + i - 1], (iw) * w_dim1 + 1 + pointer_w, 1);
          RS alpha = tau[pointer_tau + i - 1].divide(2).unaryMinus().multiply(ddot(i - 1, (iw) * w_dim1 + 1 + pointer_w, 1, (i) * a_dim1 + 1 + pointer_a, 1));
          daxpy(i - 1, alpha, i * a_dim1 + 1 + pointer_a, 1, iw * w_dim1 + 1 + pointer_w, 1);
        }
        /* L10: */
      }
    } else {
      /* Reduce first NB columns of lower triangle */
      for (int i = 1; i <= nb; ++i) {
        /* Update A(i:n,i) */
        dgemv_awa("No transpose", n - i + 1, i - 1, c_b5, a_dim1 + i + pointer_a, lda, w_dim1 + i + pointer_w, ldw, c_b6, i * a_dim1 + i + pointer_a, 1); //$NON-NLS-1$
        dgemv_waa("No transpose", n - i + 1, i - 1, c_b5, w_dim1 + i + pointer_w, ldw, a_dim1 + i + pointer_a, lda, c_b6, i * a_dim1 + i + pointer_a, 1); //$NON-NLS-1$

        if (i < n) {
          /* Generate elementary reflector H(i) to annihilate   
              A(i+2:n,i)   
             Computing MIN */

          RS alpha1 = this.a[i * a_dim1 + i + 1 + pointer_a];
          RS[] ans = dlarfg(n - i, alpha1, i * a_dim1 + Math.min(i + 2, n) + pointer_a, 1, tau[pointer_tau + i]);
          this.a[i * a_dim1 + i + 1 + pointer_a] = ans[0];
          tau[pointer_tau + i] = ans[1];

          e[pointer_e + i] = this.a[i * a_dim1 + i + 1 + pointer_a];
          this.a[i * a_dim1 + i + 1 + pointer_a] = unit.createUnit(); // Tools.toNumericalScalar(1);

          /*              Compute W(i+1:n,i) */
          dsymv("Lower", n - i, c_b6, (i + 1) * a_dim1 + i + 1 + pointer_a, lda, i * a_dim1 + i + 1 + pointer_a, 1, c_b16, (i) * w_dim1 + i + 1 + pointer_w, 1); //$NON-NLS-1$
          dgemv_waw("Transpose", n - i, i - 1, c_b6, w_dim1 + i + 1 + pointer_w, ldw, i * a_dim1 + i + 1 + pointer_a, 1, c_b16, (i) * w_dim1 + 1 + pointer_w, 1); //$NON-NLS-1$
          dgemv_aww("No transpose", n - i, i - 1, c_b5, a_dim1 + i + 1 + pointer_a, lda, (i) * w_dim1 + 1 + pointer_w, 1, c_b6, i * w_dim1 + i + 1 + pointer_w, 1); //$NON-NLS-1$
          dgemv_aaw("Transpose", n - i, i - 1, c_b6, a_dim1 + i + 1 + pointer_a, lda, i * a_dim1 + i + 1 + pointer_a, 1, c_b16, (i) * w_dim1 + 1 + pointer_w, 1); //$NON-NLS-1$
          dgemv_www("No transpose", n - i, i - 1, c_b5, w_dim1 + i + 1 + pointer_w, ldw, i * w_dim1 + 1 + pointer_w, 1, c_b6, (i) * w_dim1 + i + 1 + pointer_w, 1); //$NON-NLS-1$
          dscal(n - i, tau[pointer_tau + i], (i) * w_dim1 + i + 1 + pointer_w, 1);
          RS alpha = tau[pointer_tau + i].divide(2).unaryMinus().multiply(ddot(n - i, (i) * w_dim1 + i + 1 + pointer_w, 1, i * a_dim1 + i + 1 + pointer_a, 1));
          daxpy(n - i, alpha, i * a_dim1 + i + 1 + pointer_a, 1, (i) * w_dim1 + i + 1 + pointer_w, 1);
        }
        /* L20: */
      }
    }

    return 0;
  }

  /**
   * 引数で与えたパラメータ（配列等）は変更されません
   * 
   * @param n n
   * @param dxIndex dxIndex
   * @param incx incx
   * @param dyIndex dyIndex
   * @param incy incy
   * @return result
   */
  private RS ddot(int n, int dxIndex, int incx, int dyIndex, int incy) {
    final RS unit = this.a[0].createUnit();
    RS[] dx = unit.createArray(this.w.length - dxIndex);
    RS[] dy = unit.createArray(this.a.length - dyIndex);
    System.arraycopy(this.w, dxIndex, dx, 0, dx.length);
    System.arraycopy(this.a, dyIndex, dy, 0, dy.length);
    return BLAS.ddot(n, dx, incx, dy, incy);
  }

  /**
   * dy= dy+da.*dx 引数dyの値は変更されます。
   * 
   * @param n n
   * @param alpha alpha
   * @param dxIndex dxIndex
   * @param incx indx
   * @param dyIndex dyIndex
   * @param incy incy
   */
  private void daxpy(int n, RS alpha, int dxIndex, int incx, int dyIndex, int incy) {
    final RS unit = alpha.createUnit();
    RS[] dx = unit.createArray(this.a.length - dxIndex);
    RS[] dy = unit.createArray(this.w.length - dyIndex);
    System.arraycopy(this.a, dxIndex, dx, 0, dx.length);
    System.arraycopy(this.w, dyIndex, dy, 0, dy.length);
    BLAS.daxpy(n, alpha, dx, incx, dy, incy);
    System.arraycopy(dy, 0, this.w, dyIndex, dy.length);
  }

  /**
   * @param n n
   * @param da tau[?]
   * @param dxIndex w
   * @param incx indx
   */
  private void dscal(int n, RS da, int dxIndex, int incx) {
    final RS unit = da.createUnit();
    RS[] dx = unit.createArray(this.w.length - dxIndex);
    System.arraycopy(this.w, dxIndex, dx, 0, dx.length);
    BLAS.dscal(n, da, dx, incx);
    System.arraycopy(dx, 0, this.w, dxIndex, dx.length);
  }

  /**
   * @param uplo uplo
   * @param n n
   * @param alpha alpha
   * @param aIndex aIndex
   * @param lda lda
   * @param xIndex xIndex
   * @param incx incx
   * @param beta beta
   * @param yIndex w overwritten
   * @param incy incy
   * @return result
   */
  private int dsymv(String uplo, int n, RS alpha, int aIndex, int lda, int xIndex, int incx, RS beta, int yIndex, int incy) {
    final RS unit = alpha.createUnit();
    RS[] aTemp = unit.createArray(this.a.length - aIndex);
    RS[] x = unit.createArray(this.a.length - xIndex);
    RS[] y = unit.createArray(this.w.length - yIndex);
    System.arraycopy(this.a, aIndex, aTemp, 0, aTemp.length);
    System.arraycopy(this.a, xIndex, x, 0, x.length);
    System.arraycopy(this.w, yIndex, y, 0, y.length);
    int retValue = BLAS.dsymv(uplo, n, alpha, aTemp, lda, x, incx, beta, y, incy);
    System.arraycopy(y, 0, this.w, yIndex, y.length);

    return retValue;
  }

  /**
   * @param n n
   * @param alpha a[?]
   * @param xIndex a
   * @param incx indx
   * @param tau tau
   * @return result
   */
  private RS[] dlarfg(int n, RS alpha, int xIndex, int incx, RS tau) {
    final RS unit = alpha.createUnit();
    RS[] x = unit.createArray(this.a.length - xIndex);
    System.arraycopy(this.a, xIndex, x, 0, x.length);
    RS[] ans = Clapack.dlarfg(n, alpha, x, incx, tau);
    System.arraycopy(x, 0, this.a, xIndex, x.length);
    return ans;
  }

  /**
   * @param trans unchanged
   * @param m unchanged
   * @param n unchanged
   * @param alpha unchanged
   * @param aIndex unchanged
   * @param lda unchanged
   * @param xIndex unchanged
   * @param incx unchanged
   * @param beta unchanged
   * @param yIndex is overwritten by the update vector y
   * @param incy unchanged
   */
  private void dgemv_aww(String trans, int m, int n, RS alpha, int aIndex, int lda, int xIndex, int incx, RS beta, int yIndex, int incy) {
    final RS unit = alpha.createUnit();
    RS[] aTemp = unit.createArray(this.a.length - aIndex);
    RS[] x = unit.createArray(this.w.length - xIndex);
    RS[] y = unit.createArray(this.w.length - yIndex);
    System.arraycopy(this.a, aIndex, aTemp, 0, aTemp.length);
    System.arraycopy(this.w, xIndex, x, 0, x.length);
    System.arraycopy(this.w, yIndex, y, 0, y.length);
    BLAS.dgemv(trans, m, n, alpha, aTemp, lda, x, incx, beta, y, incy);
    System.arraycopy(y, 0, this.w, yIndex, y.length);
  }

  /**
   * @param trans unchanged
   * @param m unchanged
   * @param n unchanged
   * @param alpha unchanged
   * @param aIndex unchanged
   * @param lda unchanged
   * @param xIndex xIndex
   * @param incx unchanged
   * @param beta unchanged
   * @param yIndex is overwritten by the update vector y
   * @param incy unchanged
   */
  private void dgemv_awa(String trans, int m, int n, RS alpha, int aIndex, int lda, int xIndex, int incx, RS beta, int yIndex, int incy) {
    final RS unit = alpha.createUnit();
    RS[] aTemp = unit.createArray(this.a.length - aIndex);
    RS[] x = unit.createArray(this.w.length - xIndex);
    RS[] y = unit.createArray(this.a.length - yIndex);
    System.arraycopy(this.a, aIndex, aTemp, 0, aTemp.length);
    System.arraycopy(this.w, xIndex, x, 0, x.length);
    System.arraycopy(this.a, yIndex, y, 0, y.length);
    BLAS.dgemv(trans, m, n, alpha, aTemp, lda, x, incx, beta, y, incy);
    System.arraycopy(y, 0, this.a, yIndex, y.length);
  }

  /**
   * @param trans unchanged
   * @param m unchanged
   * @param n unchanged
   * @param alpha unchanged
   * @param aIndex aIndex
   * @param lda unchanged
   * @param xIndex xIndex
   * @param incx unchanged
   * @param beta unchanged
   * @param yIndex is overwritten by the update vector y
   * @param incy unchanged
   */
  private void dgemv_waa(String trans, int m, int n, RS alpha, int aIndex, int lda, int xIndex, int incx, RS beta, int yIndex, int incy) {
    final RS unit = alpha.createUnit();
    RS[] aTemp = unit.createArray(this.w.length - aIndex);
    RS[] x = unit.createArray(this.a.length - xIndex);
    RS[] y = unit.createArray(this.a.length - yIndex);
    System.arraycopy(this.w, aIndex, aTemp, 0, aTemp.length);
    System.arraycopy(this.a, xIndex, x, 0, x.length);
    System.arraycopy(this.a, yIndex, y, 0, y.length);
    BLAS.dgemv(trans, m, n, alpha, aTemp, lda, x, incx, beta, y, incy);
    System.arraycopy(y, 0, this.a, yIndex, y.length);
  }

  /**
   * @param trans unchanged
   * @param m unchanged
   * @param n unchanged
   * @param alpha unchanged
   * @param aIndex unchanged
   * @param lda unchanged
   * @param xIndex unchanged
   * @param incx unchanged
   * @param beta unchanged
   * @param yIndex is overwritten by the update vector y
   * @param incy unchanged
   */
  private void dgemv_aaw(String trans, int m, int n, RS alpha, int aIndex, int lda, int xIndex, int incx, RS beta, int yIndex, int incy) {
    final RS unit = alpha.createUnit();
    RS[] aTemp = unit.createArray(this.a.length - aIndex);
    RS[] x = unit.createArray(this.a.length - xIndex);
    RS[] y = unit.createArray(this.w.length - yIndex);
    System.arraycopy(this.a, aIndex, aTemp, 0, aTemp.length);
    System.arraycopy(this.a, xIndex, x, 0, x.length);
    System.arraycopy(this.w, yIndex, y, 0, y.length);
    BLAS.dgemv(trans, m, n, alpha, aTemp, lda, x, incx, beta, y, incy);
    System.arraycopy(y, 0, this.w, yIndex, y.length);
  }

  /**
   * @param trans unchanged
   * @param m unchanged
   * @param n unchanged
   * @param alpha unchanged
   * @param aIndex unchanged
   * @param lda unchanged
   * @param xIndex unchanged
   * @param incx unchanged
   * @param beta unchanged
   * @param yIndex is overwritten by the update vector y
   * @param incy unchanged
   */
  private void dgemv_www(String trans, int m, int n, RS alpha, int aIndex, int lda, int xIndex, int incx, RS beta, int yIndex, int incy) {
    final RS unit = alpha.createUnit();
    RS[] aTemp = unit.createArray(this.w.length - aIndex);
    RS[] x = unit.createArray(this.w.length - xIndex);
    RS[] y = unit.createArray(this.w.length - yIndex);
    System.arraycopy(this.w, aIndex, aTemp, 0, aTemp.length);
    System.arraycopy(this.w, xIndex, x, 0, x.length);
    System.arraycopy(this.w, yIndex, y, 0, y.length);
    BLAS.dgemv(trans, m, n, alpha, aTemp, lda, x, incx, beta, y, incy);
    System.arraycopy(y, 0, this.w, yIndex, y.length);
  }

  /**
   * @param trans unchanged
   * @param m unchanged
   * @param n unchanged
   * @param alpha unchanged
   * @param aIndex unchanged
   * @param lda unchanged
   * @param xIndex unchanged
   * @param incx unchanged
   * @param beta unchanged
   * @param yIndex is overwritten by the update vector y
   * @param incy unchanged
   */
  private void dgemv_waw(String trans, int m, int n, RS alpha, int aIndex, int lda, int xIndex, int incx, RS beta, int yIndex, int incy) {
    final RS unit = alpha.createUnit();
    RS[] aTemp = unit.createArray(this.w.length - aIndex);
    RS[] x = unit.createArray(this.a.length - xIndex);
    RS[] y = unit.createArray(this.w.length - yIndex);
    System.arraycopy(this.w, aIndex, aTemp, 0, aTemp.length);
    System.arraycopy(this.a, xIndex, x, 0, x.length);
    System.arraycopy(this.w, yIndex, y, 0, y.length);
    BLAS.dgemv(trans, m, n, alpha, aTemp, lda, x, incx, beta, y, incy);
    System.arraycopy(y, 0, this.w, yIndex, y.length);
  }
}