Dorgtr.java
package org.mklab.sdpj.gpack.lapackwrap;
import org.mklab.nfc.matrix.ComplexNumericalMatrix;
import org.mklab.nfc.matrix.RealNumericalMatrix;
import org.mklab.nfc.scalar.ComplexNumericalScalar;
import org.mklab.nfc.scalar.RealNumericalScalar;
import org.mklab.sdpj.gpack.blaswrap.BLAS;
/**
* ?SYTRDで求めた3重対角形を使って直行行列の一つを一般行列に乗算する。
*
* @author takafumi
* @param <RS> type of real scalar
* @param <RM> type of real matrix
* @param <CS> type of complex scalar
* @param <CM> type of complex Matrix
*/
public class Dorgtr<RS extends RealNumericalScalar<RS, RM, CS, CM>, RM extends RealNumericalMatrix<RS, RM, CS, CM>, CS extends ComplexNumericalScalar<RS, RM, CS, CM>, CM extends ComplexNumericalMatrix<RS, RM, CS, CM>> {
/** */
private RS[] a;
/** */
private RS[] tau;
/** */
private RS[] work;
/* -- LAPACK routine (version 3.0) --
Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
Courant Institute, Argonne National Lab, and Rice University
June 30, 1999
Purpose
=======
DORGTR generates a real orthogonal matrix Q which is defined as the
product of n-1 elementary reflectors of order N, as returned by
DSYTRD:
if UPLO = 'U', Q = H(n-1) . . . H(2) H(1),
if UPLO = 'L', Q = H(1) H(2) . . . H(n-1).
Arguments
=========
UPLO (input) CHARACTER*1
= 'U': Upper triangle of A contains elementary reflectors
from DSYTRD;
= 'L': Lower triangle of A contains elementary reflectors
from DSYTRD.
N (input) INTEGER
The order of the matrix Q. N >= 0.
A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
On entry, the vectors which define the elementary reflectors,
as returned by DSYTRD.
On exit, the N-by-N orthogonal matrix Q.
LDA (input) INTEGER
The leading dimension of the array A. LDA >= max(1,N).
TAU (input) DOUBLE PRECISION array, dimension (N-1)
TAU(i) must contain the scalar factor of the elementary
reflector H(i), as returned by DSYTRD.
WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
LWORK (input) INTEGER
The dimension of the array WORK. LWORK >= max(1,N-1).
For optimum performance LWORK >= (N-1)*NB, where NB is
the optimal blocksize.
If LWORK = -1, then a workspace query is assumed; the routine
only calculates the optimal size of the WORK array, returns
this value as the first entry of the WORK array, and no error
message related to LWORK is issued by XERBLA.
INFO (output) INTEGER
= 0: successful exit
< 0: if INFO = -i, the i-th argument had an illegal value
=====================================================================
*/
/**
* @param uplo uplo
* @param n n
* @param at at
* @param lda lda
* @param taut taut
* @param workt workt
* @param lwork lwork
* @return result
*/
public int execute(String uplo, int n, RS[] at, int lda, RS[] taut, RS[] workt, int lwork) {
final RS unit = at[0].createUnit();
//debug work_indtauを監視
this.a = at;
this.tau = taut;
this.work = workt;
int lwkopt = 0;
int a_dim1 = lda;
int a_offset = 1 + a_dim1 * 1;
int pointer_a = -a_offset;
int pointer_work = -1;
int pointer_tau = -1;
int info = 0;
boolean lquery = lwork == -1;
boolean upper = BLAS.lsame(uplo, "U"); //$NON-NLS-1$
if (!upper && !BLAS.lsame(uplo, "L")) { //$NON-NLS-1$
info = -1;
} else if (n < 0) {
info = -2;
} else if (lda < Math.max(1, n)) {
info = -4;
} else /* if(complicated condition) */{
if (lwork < Math.max(1, n - 1) && !lquery) {
info = -7;
}
}
if (info == 0) {
int nb;
if (upper) {
nb = Clapack.ilaenv(1, "DORGQL", " ", n - 1, n - 1, n - 1, -1, 6, 1, unit); //$NON-NLS-1$ //$NON-NLS-2$
} else {
nb = Clapack.ilaenv(1, "DORGQR", " ", n - 1, n - 1, n - 1, -1, 6, 1, unit); //$NON-NLS-1$ //$NON-NLS-2$
}
lwkopt = Math.max(1, n - 1) * nb;
this.work[1 + pointer_work] = unit.create(lwkopt);
}
if (info != 0) {
BLAS.xerbla("DORGTR", -info); //$NON-NLS-1$
return info;
} else if (lquery) {
return info;
}
/* Quick return if possible */
if (n == 0) {
this.work[1 + pointer_work] = unit.create(1);
return info;
}
if (upper) {
/* Q was determined by a call to DSYTRD with UPLO = 'U'
Shift the vectors which define the elementary reflectors one
column to the left, and set the last row and column of Q to
those of the unit matrix */
for (int j = 1; j <= n - 1; ++j) {
for (int i = 1; i <= j - 1; ++i) {
this.a[j * a_dim1 + i + pointer_a] = this.a[(j + 1) * a_dim1 + i + pointer_a];
}
this.a[j * a_dim1 + n + pointer_a] = unit.create(0);
}
for (int i = 1; i <= n - 1; ++i) {
this.a[n * a_dim1 + i + pointer_a] = unit.create(0);
}
this.a[n * a_dim1 + n + pointer_a] = unit.create(1);
/* Generate Q(1:n-1,1:n-1) */
info = dorgql_(n - 1, n - 1, n - 1, a_offset + pointer_a, lda, 1 + pointer_tau, 1 + pointer_work, lwork);
} else {
/* Q was determined by a call to DSYTRD with UPLO = 'L'.
Shift the vectors which define the elementary reflectors one
column to the right, and set the first row and column of Q to
those of the unit matrix */
for (int j = n; j >= 2; --j) {
this.a[j * a_dim1 + 1 + pointer_a] = unit.create(0);
for (int i = j + 1; i <= n; ++i) {
this.a[j * a_dim1 + i + pointer_a] = this.a[(j - 1) * a_dim1 + i + pointer_a];
/* L40: */
}
/* L50: */
}
this.a[1 * a_dim1 + 1 + pointer_a] = unit.create(1);
for (int i = 2; i <= n; ++i) {
this.a[1 * a_dim1 + i + pointer_a] = unit.create(0);
/* L60: */
}
if (n > 1) {
/* Generate Q(2:n,2:n) */
info = dorgqr_(n - 1, n - 1, n - 1, 2 * a_dim1 + 2 + pointer_a, lda, 1 + pointer_tau, 1 + pointer_work, lwork);
}
}
this.work[1 + pointer_work] = unit.create(lwkopt);
return info;
}
/* -- LAPACK routine (version 3.0) --
Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
Courant Institute, Argonne National Lab, and Rice University
June 30, 1999
Purpose
=======
DORGQL generates an M-by-N real matrix Q with orthonormal columns,
which is defined as the last N columns of a product of K elementary
reflectors of order M
Q = H(k) . . . H(2) H(1)
as returned by DGEQLF.
Arguments
=========
M (input) INTEGER
The number of rows of the matrix Q. M >= 0.
N (input) INTEGER
The number of columns of the matrix Q. M >= N >= 0.
K (input) INTEGER
The number of elementary reflectors whose product defines the
matrix Q. N >= K >= 0.
A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
On entry, the (n-k+i)-th column must contain the vector which
defines the elementary reflector H(i), for i = 1,2,...,k, as
returned by DGEQLF in the last k columns of its array
argument A.
On exit, the M-by-N matrix Q.
LDA (input) INTEGER
The first dimension of the array A. LDA >= max(1,M).
TAU (input) DOUBLE PRECISION array, dimension (K)
TAU(i) must contain the scalar factor of the elementary
reflector H(i), as returned by DGEQLF.
WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
LWORK (input) INTEGER
The dimension of the array WORK. LWORK >= max(1,N).
For optimum performance LWORK >= N*NB, where NB is the
optimal blocksize.
If LWORK = -1, then a workspace query is assumed; the routine
only calculates the optimal size of the WORK array, returns
this value as the first entry of the WORK array, and no error
message related to LWORK is issued by XERBLA.
INFO (output) INTEGER
= 0: successful exit
< 0: if INFO = -i, the i-th argument has an illegal value
=====================================================================
*/
/**
* Dorgtrのサブルーチンとして作成しているため、別のクラスから利用する場合は 被せて作る必要有り。
*
* @param m m
* @param n n
* @param k k
* @param aIndex aIndex
* @param lda lda
* @param tauIndex tauIndex
* @param workIndex workIndex
* @param lwork lwork
* @return result
*/
private int dorgql_(int m, int n, int k, int aIndex, int lda, int tauIndex, int workIndex, int lwork) {
final RS unit = this.a[0].createUnit();
int a_dim1 = lda;
int a_offset = 1 + a_dim1 * 1;
int pointer_a = aIndex - a_offset;
int pointer_tau = tauIndex - 1;
int pointer_work = workIndex - 1;
int info = 0;
int nb = Clapack.ilaenv(1, "DORGQL", " ", m, n, k, -1, 6, 1, unit); //$NON-NLS-1$ //$NON-NLS-2$
int lwkopt = Math.max(1, n) * nb;
this.work[1 + pointer_work] = unit.create(lwkopt);
boolean lquery = lwork == -1;
if (m < 0) {
info = -1;
} else if (n < 0 || n > m) {
info = -2;
} else if (k < 0 || k > n) {
info = -3;
} else if (lda < Math.max(1, m)) {
info = -5;
} else if (lwork < Math.max(1, n) && !lquery) {
info = -8;
}
if (info != 0) {
BLAS.xerbla("DORGQL", -info); //$NON-NLS-1$
return info;
} else if (lquery) {
return info;
}
/* Quick return if possible */
if (n <= 0) {
this.work[1 + pointer_work] = unit.create(1);
return info;
}
int nbmin = 2;
int nx = 0;
int ldwork = 0;
int iws = n;
if (nb > 1 && nb < k) {
/* Determine when to cross over from blocked to unblocked code. Computing MAX */
int i2 = Clapack.ilaenv(3, "DORGQL", " ", m, n, k, -1, 6, 1, unit); //$NON-NLS-1$ //$NON-NLS-2$
nx = Math.max(0, i2);
if (nx < k) {
/* Determine if workspace is large enough for blocked code. */
ldwork = n;
iws = ldwork * nb;
if (lwork < iws) {
/* Not enough workspace to use optimal NB: reduce NB and
determine the minimum value of NB. */
nb = lwork / ldwork;
/* Computing MAX */
i2 = Clapack.ilaenv(2, "DORGQL", " ", m, n, k, -1, 6, 1, unit); //$NON-NLS-1$ //$NON-NLS-2$
nbmin = Math.max(2, i2);
}
}
}
int kk;
if (nb >= nbmin && nb < k && nx < k) {
int i2 = (k - nx + nb - 1) / nb * nb;
kk = Math.min(k, i2);
/* Set A(m-kk+1:m,1:n-kk) to zero. */
for (int j = 1; j <= n - kk; ++j) {
for (int i = m - kk + 1; i <= m; ++i) {
this.a[j * a_dim1 + i + pointer_a] = unit.create(0);
}
}
} else {
kk = 0;
}
/* Use unblocked code for the first or only block. */
info = dorg2l_(m - kk, n - kk, k - kk, a_offset + pointer_a, lda, 1 + pointer_tau, 1 + pointer_work);
if (kk > 0) {
/* Use blocked code */
for (int i = k - kk + 1; nb < 0 ? i >= k : i <= k; i += nb) {
/* Computing MIN */
int ib = Math.min(nb, k - i + 1);
if (n - k + i > 1) {
/* Form the triangular factor of the block reflector H = H(i+ib-1) . . . H(i+1) H(i) */
int i3 = m - k + i + ib - 1;
dlarft_("Backward", "Columnwise", i3, ib, (n - k + i) * a_dim1 + 1 + pointer_a, lda, i + pointer_tau, 1 + pointer_work, ldwork); //$NON-NLS-1$ //$NON-NLS-2$
/* Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left */
i3 = m - k + i + ib - 1;
int i4 = n - k + i - 1;
dlarfb_("Left", "No transpose", "Backward", "Columnwise", i3, i4, ib, (n - k + i) * a_dim1 + 1 + pointer_a, lda, 1 + pointer_work, ldwork, a_offset + pointer_a, lda, ib + 1 //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
+ pointer_work, ldwork);
}
/* Apply H to rows 1:m-k+i+ib-1 of current block */
info = dorg2l_(m - k + i + ib - 1, ib, ib, (n - k + i) * a_dim1 + 1 + pointer_a, lda, i + pointer_tau, 1 + pointer_work);
/* Set rows m-k+i+ib:m of current block to zero */
for (int j = n - k + i; j <= n - k + i + ib - 1; ++j) {
for (int l = m - k + i + ib; l <= m; ++l) {
this.a[j * a_dim1 + l + pointer_a] = unit.create(0);
}
}
}
}
this.work[1 + pointer_work] = unit.create(iws);
return info;
}
/* -- LAPACK routine (version 3.0) --
Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
Courant Institute, Argonne National Lab, and Rice University
February 29, 1992
Purpose
=======
DORG2L generates an m by n real matrix Q with orthonormal columns,
which is defined as the last n columns of a product of k elementary
reflectors of order m
Q = H(k) . . . H(2) H(1)
as returned by DGEQLF.
Arguments
=========
M (input) INTEGER
The number of rows of the matrix Q. M >= 0.
N (input) INTEGER
The number of columns of the matrix Q. M >= N >= 0.
K (input) INTEGER
The number of elementary reflectors whose product defines the
matrix Q. N >= K >= 0.
A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
On entry, the (n-k+i)-th column must contain the vector which
defines the elementary reflector H(i), for i = 1,2,...,k, as
returned by DGEQLF in the last k columns of its array
argument A.
On exit, the m by n matrix Q.
LDA (input) INTEGER
The first dimension of the array A. LDA >= max(1,M).
TAU (input) DOUBLE PRECISION array, dimension (K)
TAU(i) must contain the scalar factor of the elementary
reflector H(i), as returned by DGEQLF.
WORK (workspace) DOUBLE PRECISION array, dimension (N)
INFO (output) INTEGER
= 0: successful exit
< 0: if INFO = -i, the i-th argument has an illegal value
=====================================================================
*/
/**
* @param m m
* @param n n
* @param k k
* @param aIndex aIndex
* @param lda lda
* @param tauIndex tauIndex
* @param workIndex workIndex
* @return result
*/
private int dorg2l_(int m, int n, int k, int aIndex, int lda, int tauIndex, int workIndex) {
final RS unit = this.a[0].createUnit();
int a_dim1 = lda;
int a_offset = 1 + a_dim1 * 1;
int pointer_a = aIndex - a_offset;
int pointer_tau = tauIndex - 1;
int pointer_work = workIndex - 1;
int info = 0;
if (m < 0) {
info = -1;
} else if (n < 0 || n > m) {
info = -2;
} else if (k < 0 || k > n) {
info = -3;
} else if (lda < Math.max(1, m)) {
info = -5;
}
if (info != 0) {
BLAS.xerbla("DORG2L", -info); //$NON-NLS-1$
return info;
}
/* Quick return if possible */
if (n <= 0) {
return info;
}
/* Initialize columns 1:n-k to columns of the unit matrix */
for (int j = 1; j <= n - k; ++j) {
for (int l = 1; l <= m; ++l) {
this.a[(j) * a_dim1 + l + pointer_a] = unit.create(0);
}
this.a[(j) * a_dim1 + m - n + j + pointer_a] = unit.create(1);
}
for (int i = 1; i <= k; ++i) {
int ii = n - k + i;
/* Apply H(i) to A(1:m-k+i,1:n-k+i) from the left */
this.a[ii * a_dim1 + m - n + ii + pointer_a] = unit.create(1);
dlarf_("Left", m - n + ii, ii - 1, (ii) * a_dim1 + 1 + pointer_a, 1, this.tau[i + pointer_tau], a_offset + pointer_a, lda, 1 + pointer_work); //$NON-NLS-1$
RS d1 = this.tau[i + pointer_tau].unaryMinus();
//array copy
RS[] aTemp = unit.createArray(this.a.length - (ii * a_dim1 + 1 + pointer_a));
System.arraycopy(this.a, ii * a_dim1 + 1 + pointer_a, aTemp, 0, aTemp.length);
BLAS.dscal(m - n + ii - 1, d1, aTemp, 1);
System.arraycopy(aTemp, 0, this.a, ii * a_dim1 + 1 + pointer_a, aTemp.length);
this.a[ii * a_dim1 + m - n + ii + pointer_a] = unit.create(1).subtract(this.tau[i + pointer_tau]);
/* Set A(m-k+i+1:m,n-k+i) to zero */
for (int l = m - n + ii + 1; l <= m; ++l) {
this.a[(ii) * a_dim1 + l + pointer_a] = unit.create(0);
}
}
return info;
}
/* -- LAPACK auxiliary routine (version 3.0) --
Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
Courant Institute, Argonne National Lab, and Rice University
February 29, 1992
Purpose
=======
DLARF applies a real elementary reflector H to a real m by n matrix
C, from either the left or the right. H is represented in the form
H = I - tau * v * v'
where tau is a real scalar and v is a real vector.
If tau = 0, then H is taken to be the unit matrix.
Arguments
=========
SIDE (input) CHARACTER*1
= 'L': form H * C
= 'R': form C * H
M (input) INTEGER
The number of rows of the matrix C.
N (input) INTEGER
The number of columns of the matrix C.
V (input) DOUBLE PRECISION array, dimension
(1 + (M-1)*abs(INCV)) if SIDE = 'L'
or (1 + (N-1)*abs(INCV)) if SIDE = 'R'
The vector v in the representation of H. V is not used if
TAU = 0.
INCV (input) INTEGER
The increment between elements of v. INCV <> 0.
TAU (input) DOUBLE PRECISION
The value tau in the representation of H.
C (input/output) DOUBLE PRECISION array, dimension (LDC,N)
On entry, the m by n matrix C.
On exit, C is overwritten by the matrix H * C if SIDE = 'L',
or C * H if SIDE = 'R'.
LDC (input) INTEGER
The leading dimension of the array C. LDC >= max(1,M).
WORK (workspace) DOUBLE PRECISION array, dimension
(N) if SIDE = 'L'
or (M) if SIDE = 'R'
=====================================================================
*/
/**
* @param side side
* @param m m
* @param n n
* @param vIndex vIndex
* @param incv incv
* @param tau tau
* @param cIndex cIndex
* @param ldc ldc
* @param workIndex workIndex
* @return result
*/
private int dlarf_(String side, int m, int n, int vIndex, int incv, RS tau, int cIndex, int ldc, int workIndex) {
final RS unit = this.a[0].createUnit();
RS[] v = this.a;
RS[] c = this.a;
RS c_b4 = unit.createUnit();
RS c_b5 = unit.createZero();
int pointer_v = vIndex - 1;
int c_dim1 = ldc;
int c_offset = 1 + c_dim1 * 1;
int pointer_c = cIndex - c_offset;
int pointer_work = workIndex - 1;
if (BLAS.lsame(side, "L")) { //$NON-NLS-1$
/* Form H * C */
if (!tau.isZero()) {
//array copy
RS[] cTemp = unit.createArray(c.length - (c_offset + pointer_c));
RS[] workTemp = unit.createArray(this.work.length - (1 + pointer_work));
RS[] vTemp = unit.createArray(v.length - (1 + pointer_v));
System.arraycopy(c, c_offset + pointer_c, cTemp, 0, cTemp.length);
System.arraycopy(this.work, 1 + pointer_work, workTemp, 0, workTemp.length);
System.arraycopy(v, 1 + pointer_v, vTemp, 0, vTemp.length);
/* w := C' * v */
BLAS.dgemv("Transpose", m, n, c_b4, cTemp, ldc, vTemp, incv, c_b5, workTemp, 1); //$NON-NLS-1$
BLAS.dger(m, n, tau.unaryMinus(), vTemp, incv, workTemp, 1, cTemp, ldc);
System.arraycopy(cTemp, 0, c, c_offset + pointer_c, cTemp.length);
System.arraycopy(workTemp, 0, this.work, 1 + pointer_work, workTemp.length);
System.arraycopy(vTemp, 0, v, 1 + pointer_v, vTemp.length);
}
} else {
/* Form C * H */
if (!tau.isZero()) {
//array copy
RS[] cTemp = unit.createArray(c.length - (c_offset + pointer_c));
RS[] workTemp = unit.createArray(this.work.length - (1 + pointer_work));
RS[] vTemp = unit.createArray(v.length - (1 + pointer_v));
System.arraycopy(c, c_offset + pointer_c, cTemp, 0, cTemp.length);
System.arraycopy(this.work, 1 + pointer_work, workTemp, 0, workTemp.length);
System.arraycopy(v, 1 + pointer_v, vTemp, 0, vTemp.length);
/* w := C * v */
BLAS.dgemv("No transpose", m, n, c_b4, cTemp, ldc, vTemp, incv, c_b5, workTemp, 1); //$NON-NLS-1$
BLAS.dger(m, n, tau.unaryMinus(), workTemp, 1, vTemp, incv, cTemp, ldc);
System.arraycopy(cTemp, 0, c, c_offset + pointer_c, cTemp.length);
System.arraycopy(workTemp, 0, this.work, 1 + pointer_work, workTemp.length);
System.arraycopy(vTemp, 0, v, 1 + pointer_v, vTemp.length);
}
}
return 0;
}
/* -- LAPACK routine (version 3.0) --
Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
Courant Institute, Argonne National Lab, and Rice University
June 30, 1999
Purpose
=======
DORGQR generates an M-by-N real matrix Q with orthonormal columns,
which is defined as the first N columns of a product of K elementary
reflectors of order M
Q = H(1) H(2) . . . H(k)
as returned by DGEQRF.
Arguments
=========
M (input) INTEGER
The number of rows of the matrix Q. M >= 0.
N (input) INTEGER
The number of columns of the matrix Q. M >= N >= 0.
K (input) INTEGER
The number of elementary reflectors whose product defines the
matrix Q. N >= K >= 0.
A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
On entry, the i-th column must contain the vector which
defines the elementary reflector H(i), for i = 1,2,...,k, as
returned by DGEQRF in the first k columns of its array
argument A.
On exit, the M-by-N matrix Q.
LDA (input) INTEGER
The first dimension of the array A. LDA >= max(1,M).
TAU (input) DOUBLE PRECISION array, dimension (K)
TAU(i) must contain the scalar factor of the elementary
reflector H(i), as returned by DGEQRF.
WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
LWORK (input) INTEGER
The dimension of the array WORK. LWORK >= max(1,N).
For optimum performance LWORK >= N*NB, where NB is the
optimal blocksize.
If LWORK = -1, then a workspace query is assumed; the routine
only calculates the optimal size of the WORK array, returns
this value as the first entry of the WORK array, and no error
message related to LWORK is issued by XERBLA.
INFO (output) INTEGER
= 0: successful exit
< 0: if INFO = -i, the i-th argument has an illegal value
=====================================================================
*/
/**
* @param m m
* @param n n
* @param k k
* @param aIndex aIndex
* @param lda lda
* @param tauIndex tauIndex
* @param workIndex workIndex
* @param lwork lwork
* @return result
*/
private int dorgqr_(int m, int n, int k, int aIndex, int lda, int tauIndex, int workIndex, int lwork) {
final RS unit = this.a[0].createUnit();
int a_dim1 = lda;
int a_offset = 1 + a_dim1 * 1;
int pointer_a = aIndex - a_offset;
int pointer_tau = tauIndex - 1;
int pointer_work = workIndex - 1;
int nb = Clapack.ilaenv(1, "DORGQR", " ", m, n, k, -1, 6, 1, unit); //$NON-NLS-1$ //$NON-NLS-2$
int lwkopt = Math.max(1, n) * nb;
this.work[1 + pointer_work] = unit.create(lwkopt);
boolean lquery = lwork == -1;
int info = 0;
if (m < 0) {
info = -1;
} else if (n < 0 || n > m) {
info = -2;
} else if (k < 0 || k > n) {
info = -3;
} else if (lda < Math.max(1, m)) {
info = -5;
} else if (lwork < Math.max(1, n) && !lquery) {
info = -8;
}
if (info != 0) {
BLAS.xerbla("DORGQR", -info); //$NON-NLS-1$
return info;
} else if (lquery) {
return info;
}
/* Quick return if possible */
if (n <= 0) {
this.work[1 + pointer_work] = unit.create(1);
return info;
}
int ldwork = 0;
int nbmin = 2;
int nx = 0;
int iws = n;
if (nb > 1 && nb < k) {
/* Determine when to cross over from blocked to unblocked code.
Computing MAX */
int i2 = Clapack.ilaenv(3, "DORGQR", " ", m, n, k, -1, 6, 1, unit); //$NON-NLS-1$ //$NON-NLS-2$
nx = Math.max(0, i2);
if (nx < k) {
/* Determine if workspace is large enough for blocked code. */
ldwork = n;
iws = ldwork * nb;
if (lwork < iws) {
/* Not enough workspace to use optimal NB: reduce NB and
determine the minimum value of NB. */
nb = lwork / ldwork;
/* Computing MAX */
i2 = Clapack.ilaenv(2, "DORGQR", " ", m, n, k, -1, 6, 1, unit); //$NON-NLS-1$ //$NON-NLS-2$
nbmin = Math.max(2, i2);
}
}
}
int ki = 0;
int kk;
if (nb >= nbmin && nb < k && nx < k) {
/* Use blocked code after the last block.
The first kk columns are handled by the block method. */
ki = (k - nx - 1) / nb * nb;
kk = Math.min(k, ki + nb);
/* Set A(1:kk,kk+1:n) to zero. */
for (int j = kk + 1; j <= n; ++j) {
for (int i = 1; i <= kk; ++i) {
this.a[j * a_dim1 + i + pointer_a] = unit.create(0);
}
}
} else {
kk = 0;
}
/* Use unblocked code for the last or only block. */
if (kk < n) {
info = dorg2r_(m - kk, n - kk, k - kk, (kk + 1) * a_dim1 + kk + 1 + pointer_a, lda, kk + 1 + pointer_tau, 1 + pointer_work);
}
if (kk > 0) {
for (int i = ki + 1; -nb < 0 ? i >= 1 : i <= 1; i += -nb) {
/* Computing MIN */
int ib = Math.min(nb, k - i + 1);
if (i + ib <= n) {
/* Form the triangular factor of the block reflector
H = H(i) H(i+1) . . . H(i+ib-1) */
dlarft_("Forward", "Columnwise", m - i + 1, ib, i * a_dim1 + i + pointer_a, lda, i + pointer_tau, 1 + pointer_work, ldwork); //$NON-NLS-1$ //$NON-NLS-2$
/* Apply H to A(i:m,i+ib:n) from the left */
dlarfb_(
"Left", "No transpose", "Forward", "Columnwise", m - i + 1, n - i - ib + 1, ib, i * a_dim1 + i + pointer_a, lda, 1 + pointer_work, ldwork, (i + ib) * a_dim1 + i + pointer_a, lda, ib + 1 //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
+ pointer_work, ldwork);
}
/* Apply H to rows i:m of current block */
info = dorg2r_(m - i + 1, ib, ib, i * a_dim1 + i + pointer_a, lda, i + pointer_tau, 1 + pointer_work);
/* Set rows 1:i-1 of current block to zero */
for (int j = i; j <= i + ib - 1; ++j) {
for (int l = 1; l <= i - 1; ++l) {
this.a[j * a_dim1 + l + pointer_a] = unit.create(0);
}
}
}
}
this.work[1 + pointer_work] = unit.create(iws);
return info;
}
/* -- LAPACK auxiliary routine (version 3.0) --
Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
Courant Institute, Argonne National Lab, and Rice University
February 29, 1992
Purpose
=======
DLARFT forms the triangular factor T of a real block reflector H
of order n, which is defined as a product of k elementary reflectors.
If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
If STOREV = 'C', the vector which defines the elementary reflector
H(i) is stored in the i-th column of the array V, and
H = I - V * T * V'
If STOREV = 'R', the vector which defines the elementary reflector
H(i) is stored in the i-th row of the array V, and
H = I - V' * T * V
Arguments
=========
DIRECT (input) CHARACTER*1
Specifies the order in which the elementary reflectors are
multiplied to form the block reflector:
= 'F': H = H(1) H(2) . . . H(k) (Forward)
= 'B': H = H(k) . . . H(2) H(1) (Backward)
STOREV (input) CHARACTER*1
Specifies how the vectors which define the elementary
reflectors are stored (see also Further Details):
= 'C': columnwise
= 'R': rowwise
N (input) INTEGER
The order of the block reflector H. N >= 0.
K (input) INTEGER
The order of the triangular factor T (= the number of
elementary reflectors). K >= 1.
V (input/output) DOUBLE PRECISION array, dimension
(LDV,K) if STOREV = 'C'
(LDV,N) if STOREV = 'R'
The matrix V. See further details.
LDV (input) INTEGER
The leading dimension of the array V.
If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
TAU (input) DOUBLE PRECISION array, dimension (K)
TAU(i) must contain the scalar factor of the elementary
reflector H(i).
T (output) DOUBLE PRECISION array, dimension (LDT,K)
The k by k triangular factor T of the block reflector.
If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
lower triangular. The rest of the array is not used.
LDT (input) INTEGER
The leading dimension of the array T. LDT >= K.
Further Details
===============
The shape of the matrix V and the storage of the vectors which define
the H(i) is best illustrated by the following example with n = 5 and
k = 3. The elements equal to 1 are not stored; the corresponding
array elements are modified but restored on exit. The rest of the
array is not used.
DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R':
V = ( 1 ) V = ( 1 v1 v1 v1 v1 )
( v1 1 ) ( 1 v2 v2 v2 )
( v1 v2 1 ) ( 1 v3 v3 )
( v1 v2 v3 )
( v1 v2 v3 )
DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R':
V = ( v1 v2 v3 ) V = ( v1 v1 1 )
( v1 v2 v3 ) ( v2 v2 v2 1 )
( 1 v2 v3 ) ( v3 v3 v3 v3 1 )
( 1 v3 )
( 1 )
=====================================================================
*/
/**
* @param direct direct
* @param storev storev
* @param n n
* @param k k
* @param aIndex aIndex
* @param ldv ldv
* @param tauIndex tauIndex
* @param workIndex workIndex
* @param ldt ldt
* @return result
*/
private int dlarft_(String direct, String storev, int n, int k, int aIndex, int ldv, int tauIndex, int workIndex, int ldt) {
final RS unit = this.a[0].createUnit();
RS[] v = this.a;
RS[] t = this.work;
RS c_b8 = unit.createZero();
int v_dim1 = ldv;
int v_offset = 1 + v_dim1 * 1;
int pointer_v = aIndex - v_offset;
int pointer_tau = tauIndex - 1;
int t_dim1 = ldt;
int t_offset = 1 + t_dim1 * 1;
int pointer_t = workIndex - t_offset;
if (n == 0) {
return 0;
}
if (BLAS.lsame(direct, "F")) { //$NON-NLS-1$
for (int i = 1; i <= k; ++i) {
if (this.tau[i + pointer_tau].isZero()) {
/* H(i) = I */
for (int j = 1; j <= i; ++j) {
t[i * t_dim1 + j + pointer_t] = unit.create(0);
/* L10: */
}
} else {
/* general case */
RS vii = v[i * v_dim1 + i + pointer_v];
v[i * v_dim1 + i + pointer_v] = unit.create(1);
if (BLAS.lsame(storev, "C")) { //$NON-NLS-1$
/* T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */
RS d1 = this.tau[i + pointer_tau].unaryMinus();
//array copy
RS[] v1iTemp = unit.createArray(v.length - (v_dim1 + i + pointer_v));
RS[] viiTemp = unit.createArray(v.length - (i * v_dim1 + i + pointer_v));
RS[] ti1Temp = unit.createArray(t.length - (i * t_dim1 + 1 + pointer_t));
System.arraycopy(t, i * t_dim1 + 1 + pointer_t, ti1Temp, 0, ti1Temp.length);
System.arraycopy(v, v_dim1 + i + pointer_v, v1iTemp, 0, v1iTemp.length);
System.arraycopy(v, i * v_dim1 + i + pointer_v, viiTemp, 0, viiTemp.length);
BLAS.dgemv("Transpose", n - i + 1, i - 1, d1, v1iTemp, ldv, viiTemp, 1, c_b8, ti1Temp, 1); //$NON-NLS-1$
System.arraycopy(ti1Temp, 0, t, i * t_dim1 + 1 + pointer_t, ti1Temp.length);
} else {
/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */
RS d1 = this.tau[i + pointer_tau].unaryMinus();
//array copy
RS[] vi1Temp = unit.createArray(i * v_dim1 + 1 + pointer_v);
RS[] viiTemp = unit.createArray(i * v_dim1 + i + pointer_v);
RS[] ti1Temp = unit.createArray(t.length - (i * t_dim1 + 1 + pointer_t));
System.arraycopy(v, i * v_dim1 + 1 + pointer_v, vi1Temp, 0, vi1Temp.length);
System.arraycopy(v, i * v_dim1 + i + pointer_v, viiTemp, 0, viiTemp.length);
System.arraycopy(t, i * t_dim1 + 1 + pointer_t, ti1Temp, 0, ti1Temp.length);
BLAS.dgemv("No transpose", i - 1, n - i + 1, d1, vi1Temp, ldv, viiTemp, ldv, c_b8, ti1Temp, 1); //$NON-NLS-1$
System.arraycopy(ti1Temp, 0, t, i * t_dim1 + 1 + pointer_t, ti1Temp.length);
}
v[i * v_dim1 + i + pointer_v] = vii;
//array copy
RS[] toTemp = unit.createArray(t.length - (t_offset + pointer_t));
RS[] ti1Temp = unit.createArray(t.length - (i * t_dim1 + 1 + pointer_t));
System.arraycopy(t, t_offset + pointer_t, ti1Temp, 0, toTemp.length);
System.arraycopy(t, i * t_dim1 + 1 + pointer_t, ti1Temp, 0, ti1Temp.length);
BLAS.dtrmv("Upper", "No transpose", "Non-unit", i - 1, toTemp, ldt, ti1Temp, 1); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
System.arraycopy(ti1Temp, 0, t, i * t_dim1 + 1 + pointer_t, ti1Temp.length);
t[i * t_dim1 + i + pointer_t] = this.tau[i + pointer_tau];
}
/* L20: */
}
} else {
for (int i = k; i >= 1; --i) {
if (this.tau[i + pointer_tau].isZero()) {
/* H(i) = I */
for (int j = i; j <= k; ++j) {
t[i * t_dim1 + j + pointer_t] = unit.createZero();
/* L30: */
}
} else {
/* general case */
if (i < k) {
if (BLAS.lsame(storev, "C")) { //$NON-NLS-1$
RS vii = v[i * v_dim1 + n - k + i + pointer_v];// v_ref(n - k + i__, i__);
v[i * v_dim1 + n - k + i + pointer_v] = unit.create(1);
/* T(i+1:k,i) := - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i) */
RS d1 = this.tau[i + pointer_tau].unaryMinus();
//array copy
RS[] tTemp = unit.createArray(t.length - (i * t_dim1 + i + 1 + pointer_t));
RS[] viTemp = unit.createArray(v.length - ((i) * v_dim1 + 1 + pointer_v));
RS[] vi1Temp = unit.createArray(v.length - ((i + 1) * v_dim1 + 1 + pointer_v));
System.arraycopy(t, i * t_dim1 + i + 1 + pointer_t, tTemp, 0, tTemp.length);
System.arraycopy(v, (i) * v_dim1 + 1 + pointer_v, viTemp, 0, viTemp.length);
System.arraycopy(v, (i + 1) * v_dim1 + 1 + pointer_v, vi1Temp, 0, vi1Temp.length);
BLAS.dgemv("Transpose", n - k + i, k - i, d1, vi1Temp, ldv, viTemp, 1, c_b8, tTemp, 1); //$NON-NLS-1$
System.arraycopy(tTemp, 0, t, i * t_dim1 + i + 1 + pointer_t, tTemp.length);
v[i * v_dim1 + n - k + i + pointer_v] = vii;
} else {
RS vii = v[(n - k + i) * v_dim1 + i + pointer_v];//v_ref(i__, n - k + i__);
v[(n - k + i) * v_dim1 + i + pointer_v] = unit.create(1);
/* T(i+1:k,i) := - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)' */
RS d1 = this.tau[i + pointer_tau].unaryMinus();
//array copy
RS[] tTemp = unit.createArray(t.length - (i * t_dim1 + i + 1 + pointer_t));
RS[] v1i1Temp = unit.createArray(v.length - (v_dim1 + 1 + i + pointer_v));
RS[] v1iTemp = unit.createArray(v.length - (v_dim1 + i + pointer_v));
System.arraycopy(t, i * t_dim1 + i + 1 + pointer_t, tTemp, 0, tTemp.length);
System.arraycopy(v, v_dim1 + 1 + i + pointer_v, v1i1Temp, 0, v1i1Temp.length);
System.arraycopy(v, v_dim1 + i + pointer_v, v1iTemp, 0, v1iTemp.length);
BLAS.dgemv("No transpose", k - i, n - k + i, d1, v1i1Temp, ldv, v1iTemp, ldv, c_b8, tTemp, 1); //$NON-NLS-1$
System.arraycopy(tTemp, 0, t, i * t_dim1 + i + 1 + pointer_t, tTemp.length);
v[(n - k + i) * v_dim1 + i + pointer_v] = vii;
}
//array copy
RS[] ti1i1 = unit.createArray(t.length - ((i + 1) * t_dim1 + i + 1 + pointer_t));
RS[] tii1 = unit.createArray(t.length - (i * t_dim1 + i + 1 + pointer_t));
System.arraycopy(t, (i + 1) * t_dim1 + i + 1 + pointer_t, ti1i1, 0, ti1i1.length);
System.arraycopy(t, (i) * t_dim1 + i + 1 + pointer_t, tii1, 0, tii1.length);
BLAS.dtrmv("Lower", "No transpose", "Non-unit", k - i, ti1i1, ldt, tii1, 1); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
System.arraycopy(tii1, 0, t, i * t_dim1 + i + 1 + pointer_t, tii1.length);
}
t[i * t_dim1 + i + pointer_t] = this.tau[i + pointer_tau];
}
/* L40: */
}
}
return 0;
}
/**
* このメソッドはdorgtrのサブルーチンとして組み込んでいるので、
* 単体で使用する場合はClapackクラスにDlarfb用のメソッドを作ってから使用すること。
*
* @param side side
* @param trans trans
* @param direct direct
* @param storev storev
* @param m m
* @param n n
* @param k k
* @param vIndex 開始位置(配列aの部分行列)
* @param ldv ldv
* @param tIndex 開始位置(配列workの部分行列)
* @param ldt ldt
* @param cIndex 開始位置(aの部分行列)
* @param ldc ldc
* @param workIndex 開始位置(workの部分行列)
* @param ldwork2 ldwork2
*/
private void dlarfb_(String side, String trans, String direct, String storev, int m, int n, int k, int vIndex, int ldv, int tIndex, int ldt, int cIndex, int ldc, int workIndex, int ldwork2) {
final RS unit = this.a[0].createUnit();
RS[] vin = unit.createArray(this.a.length - vIndex);
RS[] tin = unit.createArray(this.work.length - tIndex);
RS[] cin = unit.createArray(this.a.length - cIndex);
RS[] workin = unit.createArray(this.work.length - workIndex);
System.arraycopy(this.a, vIndex, vin, 0, vin.length);
System.arraycopy(this.work, tIndex, tin, 0, tin.length);
System.arraycopy(this.a, cIndex, cin, 0, cin.length);
System.arraycopy(this.work, workIndex, workin, 0, workin.length);
new Dlarfb<RS,RM,CS,CM>().dlarfb_(side, trans, direct, storev, m, n, k, vin, ldv, tin, ldt, cin, ldc, workin, ldwork2);
System.arraycopy(cin, 0, this.a, cIndex, cin.length);
}
/**
* @param m m
* @param n n
* @param k k
* @param aIndex aIndex
* @param lda lda
* @param tauIndex tauIndex
* @param workIndex workIndex
* @return result
*/
private int dorg2r_(int m, int n, int k, int aIndex, int lda, int tauIndex, int workIndex) {
final RS unit = this.a[0].createUnit();
int a_dim1 = lda;
int a_offset = 1 + a_dim1 * 1;
int pointer_a = -a_offset;
int pointer_tau = -1;
int pointer_work = -1;
int info = 0;
if (m < 0) {
info = -1;
} else if (n < 0 || n > m) {
info = -2;
} else if (k < 0 || k > n) {
info = -3;
} else if (lda < Math.max(1, m)) {
info = -5;
}
if (info != 0) {
BLAS.xerbla("DORG2R", -info); //$NON-NLS-1$
return info;
}
/* Quick return if possible */
if (n <= 0) {
return info;
}
/* Initialise columns k+1:n to columns of the unit matrix */
for (int j = k + 1; j <= n; ++j) {
for (int l = 1; l <= m; ++l) {
this.a[j * a_dim1 + l + pointer_a] = unit.create(0);
}
this.a[j * a_dim1 + j + pointer_a] = unit.create(1);
}
for (int i = k; i >= 1; --i) {
/* Apply H(i) to A(i:m,i:n) from the left */
if (i < n) {
this.a[i * a_dim1 + i + pointer_a] = unit.create(1);
dlarf_("Left", m - i + 1, n - i, i * a_dim1 + i + pointer_a, 1, this.tau[i + pointer_tau], (i + 1) * a_dim1 + i + pointer_a, lda, 1 + pointer_work); //$NON-NLS-1$
}
if (i < m) {
RS d1 = this.tau[i + pointer_tau].unaryMinus();
RS[] aTemp = unit.createArray(this.a.length - (i * a_dim1 + i + 1 + pointer_a));
System.arraycopy(this.a, i * a_dim1 + i + 1 + pointer_a, aTemp, 0, aTemp.length);
BLAS.dscal(m - i, d1, aTemp, 1);
System.arraycopy(aTemp, 0, this.a, i * a_dim1 + i + 1 + pointer_a, aTemp.length);
}
this.a[i * a_dim1 + i + pointer_a] = unit.create(1).subtract(this.tau[i + pointer_tau]);
/* Set A(1:i-1,i) to zero */
for (int l = 1; l <= i - 1; ++l) {
this.a[i * a_dim1 + l + pointer_a] = unit.create(0);
}
}
return info;
}
}