Dsytrd.java
package org.mklab.sdpj.gpack.lapackwrap;
import org.mklab.nfc.matrix.ComplexNumericalMatrix;
import org.mklab.nfc.matrix.RealNumericalMatrix;
import org.mklab.nfc.scalar.ComplexNumericalScalar;
import org.mklab.nfc.scalar.RealNumericalScalar;
import org.mklab.sdpj.gpack.blaswrap.BLAS;
/**
* @author koga
* @version $Revision$, 2009/04/25
* @param <RS> type of real scalar
* @param <RM> type of real matrix
* @param <CS> type of complex scalar
* @param <CM> type of complex Matrix
*/
public class Dsytrd<RS extends RealNumericalScalar<RS, RM, CS, CM>, RM extends RealNumericalMatrix<RS, RM, CS, CM>, CS extends ComplexNumericalScalar<RS, RM, CS, CM>, CM extends ComplexNumericalMatrix<RS, RM, CS, CM>> {
/** */
private RS[] a;
/** */
private RS[] d;
/** */
private RS[] e;
/** */
private RS[] tau;
/** */
private RS[] work;
/* -- LAPACK routine (version 3.0) --
Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
Courant Institute, Argonne National Lab, and Rice University
June 30, 1999
Purpose
=======
DSYTRD reduces a real symmetric matrix A to real symmetric
tridiagonal form T by an orthogonal similarity transformation:
Q**T * A * Q = T.
Arguments
=========
UPLO (input) CHARACTER*1
= 'U': Upper triangle of A is stored;
= 'L': Lower triangle of A is stored.
N (input) INTEGER
The order of the matrix A. N >= 0.
A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
On entry, the symmetric matrix A. If UPLO = 'U', the leading
N-by-N upper triangular part of A contains the upper
triangular part of the matrix A, and the strictly lower
triangular part of A is not referenced. If UPLO = 'L', the
leading N-by-N lower triangular part of A contains the lower
triangular part of the matrix A, and the strictly upper
triangular part of A is not referenced.
On exit, if UPLO = 'U', the diagonal and first superdiagonal
of A are overwritten by the corresponding elements of the
tridiagonal matrix T, and the elements above the first
superdiagonal, with the array TAU, represent the orthogonal
matrix Q as a product of elementary reflectors; if UPLO
= 'L', the diagonal and first subdiagonal of A are over-
written by the corresponding elements of the tridiagonal
matrix T, and the elements below the first subdiagonal, with
the array TAU, represent the orthogonal matrix Q as a product
of elementary reflectors. See Further Details.
LDA (input) INTEGER
The leading dimension of the array A. LDA >= max(1,N).
D (output) DOUBLE PRECISION array, dimension (N)
The diagonal elements of the tridiagonal matrix T:
D(i) = A(i,i).
RS (output) DOUBLE PRECISION array, dimension (N-1)
The off-diagonal elements of the tridiagonal matrix T:
E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
TAU (output) DOUBLE PRECISION array, dimension (N-1)
The scalar factors of the elementary reflectors (see Further
Details).
WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
LWORK (input) INTEGER
The dimension of the array WORK. LWORK >= 1.
For optimum performance LWORK >= N*NB, where NB is the
optimal blocksize.
If LWORK = -1, then a workspace query is assumed; the routine
only calculates the optimal size of the WORK array, returns
this value as the first entry of the WORK array, and no error
message related to LWORK is issued by XERBLA.
INFO (output) INTEGER
= 0: successful exit
< 0: if INFO = -i, the i-th argument had an illegal value
Further Details
===============
If UPLO = 'U', the matrix Q is represented as a product of elementary
reflectors
Q = H(n-1) . . . H(2) H(1).
Each H(i) has the form
H(i) = I - tau * v * v'
where tau is a real scalar, and v is a real vector with
v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
A(1:i-1,i+1), and tau in TAU(i).
If UPLO = 'L', the matrix Q is represented as a product of elementary
reflectors
Q = H(1) H(2) . . . H(n-1).
Each H(i) has the form
H(i) = I - tau * v * v'
where tau is a real scalar, and v is a real vector with
v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
and tau in TAU(i).
The contents of A on exit are illustrated by the following examples
with n = 5:
if UPLO = 'U': if UPLO = 'L':
( d e v2 v3 v4 ) ( d )
( d e v3 v4 ) ( e d )
( d e v4 ) ( v1 e d )
( d e ) ( v1 v2 e d )
( d ) ( v1 v2 v3 e d )
where d and e denote diagonal and off-diagonal elements of T, and vi
denotes an element of the vector defining H(i).
=====================================================================
*/
/**
* @param uplo uplo
* @param n n
* @param ai ai
* @param lda lda
* @param di di
* @param ei ei
* @param taui taui
* @param worki worki
* @param lwork lwork
* @return result
*/
int execute(String uplo, int n, RS[] ai, int lda, RS[] di, RS[] ei, RS[] taui, RS[] worki, int lwork) {
final RS unit = ai[0].createUnit();
this.a = ai;
this.d = di;
this.e = ei;
this.tau = taui;
this.work = worki;
RS c_b22 = unit.create(-1);
int a_dim1 = lda;
int a_offset = 1 + a_dim1 * 1;
int pointer_a = -a_offset;
int pointer_d = -1;
int pointer_e = -1;
int pointer_tau = -1;
int pointer_work = -1;
int info = 0;
boolean upper = BLAS.lsame(uplo, "U"); //$NON-NLS-1$
boolean lquery = (lwork == -1);
if (!upper && !BLAS.lsame(uplo, "L")) { //$NON-NLS-1$
info = -1;
} else if (n < 0) {
info = -2;
} else if (lda < Math.max(1, n)) {
info = -4;
} else if (lwork < 1 && !lquery) {
info = -9;
}
int lwkopt = 0;
int nb = 0;
if (info == 0) {
// Determine the block size.
nb = Clapack.ilaenv(1, "DSYTRD", uplo, n, -1, -1, -1, 6, 1, unit); //$NON-NLS-1$
lwkopt = n * nb;
this.work[pointer_work + 1] = unit.create(lwkopt);
}
if (info != 0) {
BLAS.xerbla("DSYTRD", -(info)); //$NON-NLS-1$
return info;
} else if (lquery) {
return info;
}
// Quick return if possible
if (n == 0) {
this.work[pointer_work + 1] = unit.create(1);
return info;
}
int ldwork = 0;
int nx = n;
int iws = 1;
if (nb > 1 && nb < n) {
/* Determine when to cross over from blocked to unblocked code
(last block is always handled by unblocked code). */
int i2 = Clapack.ilaenv(3, "DSYTRD", uplo, n, -1, -1, -1, 6, 1, unit); //$NON-NLS-1$
nx = Math.max(nb, i2);
if (nx < n) {
/* Determine if workspace is large enough for blocked code. */
ldwork = n;
iws = ldwork * nb;
if (lwork < iws) {
/* Not enough workspace to use optimal NB: determine the
minimum value of NB, and reduce NB or force use of
unblocked code by setting NX = N. */
nb = Math.max(lwork / ldwork, 1);
int nbmin = Clapack.ilaenv(2, "DSYTRD", uplo, n, -1, -1, -1, 6, 1, unit); //$NON-NLS-1$
if (nb < nbmin) {
nx = n;
}
}
} else {
nx = n;
}
} else {
nb = 1;
}
if (upper) {
/* Reduce the upper triangle of A. Columns 1:kk are handled by the unblocked method. */
int kk = n - (n - nx + nb - 1) / nb * nb;
for (int i = n - nb + 1; -nb < 0 ? i >= kk + 1 : i <= kk + 1; i += -nb) {
/* Reduce columns i:i+nb-1 to tridiagonal form and form the
matrix W which is needed to update the unreduced part of the matrix */
dlatrd(uplo, i + nb - 1, nb, pointer_a + a_offset, lda, pointer_e + 1, pointer_tau + 1, pointer_work + 1, ldwork);
/* Update the unreduced submatrix A(1:i-1,1:i-1), using an update of the form: A := A - V*W' - W*V' */
RS c_b23 = unit.createUnit();
dsyr2k(uplo, "No transpose", i - 1, nb, c_b22, (i) * a_dim1 + 1 + pointer_a, lda, pointer_work + 1, ldwork, c_b23, pointer_a + a_offset, lda); //$NON-NLS-1$
/* Copy superdiagonal elements back into A, and diagonal elements into D */
for (int j = i; j <= i + nb - 1; ++j) {
this.a[(j) * a_dim1 + j - 1 + pointer_a] = this.e[pointer_e + j - 1];
this.d[pointer_d + j] = this.a[(j) * a_dim1 + j + pointer_a];
}
}
/* Use unblocked code to reduce the last or only block */
info = dsytd2(uplo, kk, pointer_a + a_offset, lda, pointer_d + 1, pointer_e + 1, pointer_tau + 1);
} else {
int i;
for (i = 1; nb < 0 ? i >= n - nx : i <= n - nx; i += nb) {
/* Reduce columns i:i+nb-1 to tridiagonal form and form the
matrix W which is needed to update the unreduced part of the matrix */
dlatrd(uplo, n - i + 1, nb, i * a_dim1 + i + pointer_a, lda, pointer_e + i, pointer_tau + i, pointer_work + 1, ldwork);
/* Update the unreduced submatrix A(i+ib:n,i+ib:n), using an update of the form: A := A - V*W' - W*V' */
RS c_b23 = unit.createUnit();
dsyr2k(uplo, "No transpose", n - i - nb + 1, nb, c_b22, (i) * a_dim1 + i + nb + pointer_a, lda, pointer_work + nb + 1, ldwork, c_b23, (i + nb) * a_dim1 + i + nb + pointer_a, lda); //$NON-NLS-1$
/* Copy subdiagonal elements back into A, and diagonal elements into D */
for (int j = i; j <= i + nb - 1; ++j) {
this.a[(j) * a_dim1 + j + 1 + pointer_a] = this.e[pointer_e + j];
this.d[pointer_d + j] = this.a[(j) * a_dim1 + j + pointer_a];
}
}
// Use unblocked code to reduce the last or only block *
info = dsytd2(uplo, n - i + 1, (i) * a_dim1 + i + pointer_a, lda, pointer_d + i, pointer_e + i, pointer_tau + i);
}
this.work[pointer_work + 1] = unit.create(lwkopt);
return info;
}
/**
* 変更されるパラメータ:a e tau w
*
* @param uplo uplo
* @param n n
* @param nb nb
* @param aIndex aIndex
* @param lda lda
* @param eIndex eIndex
* @param tauIndex tauIndex
* @param wIndex wIndex
* @param ldw ldw
*/
private void dlatrd(String uplo, int n, int nb, int aIndex, int lda, int eIndex, int tauIndex, int wIndex, int ldw) {
final RS unit = this.a[0].createUnit();
RS[] aTemp = unit.createArray(this.a.length - aIndex);
RS[] eTemp = unit.createArray(this.e.length - eIndex);
RS[] tauTemp = unit.createArray(this.tau.length - tauIndex);
RS[] wTemp = unit.createArray(this.work.length - wIndex);
System.arraycopy(this.a, aIndex, aTemp, 0, aTemp.length);
System.arraycopy(this.e, eIndex, eTemp, 0, eTemp.length);
System.arraycopy(this.tau, tauIndex, tauTemp, 0, tauTemp.length);
System.arraycopy(this.work, wIndex, wTemp, 0, wTemp.length);
Clapack.dlatrd(uplo, n, nb, aTemp, lda, eTemp, tauTemp, wTemp, ldw);
System.arraycopy(aTemp, 0, this.a, aIndex, aTemp.length);
System.arraycopy(eTemp, 0, this.e, eIndex, eTemp.length);
System.arraycopy(tauTemp, 0, this.tau, tauIndex, tauTemp.length);
System.arraycopy(wTemp, 0, this.work, wIndex, wTemp.length);
}
/**
* 引数(行列)は、この関数内ではどの呼び出しも順に a[] work[] a[] なのでIndexのみ渡すように変更。 Cのみ変更される。
*
* @param uplo uplo
* @param trans trans
* @param n n
* @param k k
* @param alpha alpha
* @param aIndex 行列aの部分行列の先頭
* @param lda lda
* @param bIndex 行列workの部分行列の先頭
* @param ldb ldb
* @param beta beta
* @param cIndex 行列aの部分行列の先頭
* @param ldc ldc
*/
private void dsyr2k(String uplo, String trans, int n, int k, RS alpha, int aIndex, int lda, int bIndex, int ldb, RS beta, int cIndex, int ldc) {
final RS unit = alpha.createUnit();
RS[] aTemp = unit.createArray(this.a.length - aIndex);
RS[] bTemp = unit.createArray(this.work.length - bIndex);
RS[] cTemp = unit.createArray(this.a.length - cIndex);
System.arraycopy(this.a, aIndex, aTemp, 0, aTemp.length);
System.arraycopy(this.work, bIndex, bTemp, 0, bTemp.length);
System.arraycopy(this.a, cIndex, cTemp, 0, cTemp.length);
BLAS.dsyr2k(uplo, trans, n, k, alpha, aTemp, lda, bTemp, ldb, beta, cTemp, ldc);
System.arraycopy(cTemp, 0, this.a, cIndex, cTemp.length);
}
/**
* 変更されるパラメータ:a d e tau info
*
* @param uplo uplo
* @param n n
* @param aIndex a
* @param lda lda
* @param dIndex d
* @param eIndex e
* @param tauIndex tau
* @return result
*/
private int dsytd2(String uplo, int n, int aIndex, int lda, int dIndex, int eIndex, int tauIndex) {
final RS unit = this.a[0].createUnit();
RS[] aTemp = unit.createArray(this.a.length - aIndex);
RS[] dTemp = unit.createArray(this.d.length - dIndex);
RS[] eTemp = unit.createArray(this.e.length - eIndex);
RS[] tauTemp = unit.createArray(this.tau.length - tauIndex);
System.arraycopy(this.a, aIndex, aTemp, 0, aTemp.length);
System.arraycopy(this.d, dIndex, dTemp, 0, dTemp.length);
System.arraycopy(this.e, eIndex, eTemp, 0, eTemp.length);
System.arraycopy(this.tau, tauIndex, tauTemp, 0, tauTemp.length);
int info = Clapack.dsytd2(uplo, n, aTemp, lda, dTemp, eTemp, tauTemp);
System.arraycopy(aTemp, 0, this.a, aIndex, aTemp.length);
System.arraycopy(dTemp, 0, this.d, dIndex, dTemp.length);
System.arraycopy(eTemp, 0, this.e, eIndex, eTemp.length);
System.arraycopy(tauTemp, 0, this.tau, tauIndex, tauTemp.length);
return info;
}
}