Dlarfb.java
package org.mklab.sdpj.gpack.lapackwrap;
import org.mklab.nfc.matrix.ComplexNumericalMatrix;
import org.mklab.nfc.matrix.RealNumericalMatrix;
import org.mklab.nfc.scalar.ComplexNumericalScalar;
import org.mklab.nfc.scalar.RealNumericalScalar;
import org.mklab.sdpj.gpack.blaswrap.BLAS;
/**
* @author takafumi
* @param <RS> type of real scalar
* @param <RM> type of real matrix
* @param <CS> type of complex scalar
* @param <CM> type of complex Matrix
*/
public class Dlarfb<RS extends RealNumericalScalar<RS, RM, CS, CM>, RM extends RealNumericalMatrix<RS, RM, CS, CM>, CS extends ComplexNumericalScalar<RS, RM, CS, CM>, CM extends ComplexNumericalMatrix<RS, RM, CS, CM>> {
/** */
private RS[] v;
/** */
private RS[] c;
/** */
private RS[] work;
/** */
private RS[] t;
/** */
private RS[] vTempOffset;
/** */
private RS[] cTempOffset;
/** */
private RS[] workTempOffset;
/** */
private RS[] tTempOffset;
/** */
private int pointer_v;
/** */
private int pointer_c;
/** */
private int pointer_work;
/** */
private int pointer_t;
/** */
private int v_offset;
/** */
private int c_offset;
/** */
private int work_offset;
/** */
private int t_offset;
/* -- LAPACK auxiliary routine (version 3.0) --
Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
Courant Institute, Argonne National Lab, and Rice University
February 29, 1992
Purpose
=======
DLARFB applies a real block reflector H or its transpose H' to a
real m by n matrix C, from either the left or the right.
Arguments
=========
SIDE (input) CHARACTER*1
= 'L': apply H or H' from the Left
= 'R': apply H or H' from the Right
TRANS (input) CHARACTER*1
= 'N': apply H (No transpose)
= 'T': apply H' (Transpose)
DIRECT (input) CHARACTER*1
Indicates how H is formed from a product of elementary
reflectors
= 'F': H = H(1) H(2) . . . H(k) (Forward)
= 'B': H = H(k) . . . H(2) H(1) (Backward)
STOREV (input) CHARACTER*1
Indicates how the vectors which define the elementary
reflectors are stored:
= 'C': Columnwise
= 'R': Rowwise
M (input) INTEGER
The number of rows of the matrix C.
N (input) INTEGER
The number of columns of the matrix C.
K (input) INTEGER
The order of the matrix T (= the number of elementary
reflectors whose product defines the block reflector).
V (input) DOUBLE PRECISION array, dimension
(LDV,K) if STOREV = 'C'
(LDV,M) if STOREV = 'R' and SIDE = 'L'
(LDV,N) if STOREV = 'R' and SIDE = 'R'
The matrix V. See further details.
LDV (input) INTEGER
The leading dimension of the array V.
If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M);
if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N);
if STOREV = 'R', LDV >= K.
T (input) DOUBLE PRECISION array, dimension (LDT,K)
The triangular k by k matrix T in the representation of the
block reflector.
LDT (input) INTEGER
The leading dimension of the array T. LDT >= K.
C (input/output) DOUBLE PRECISION array, dimension (LDC,N)
On entry, the m by n matrix C.
On exit, C is overwritten by H*C or H'*C or C*H or C*H'.
LDC (input) INTEGER
The leading dimension of the array C. LDA >= max(1,M).
WORK (workspace) DOUBLE PRECISION array, dimension (LDWORK,K)
LDWORK (input) INTEGER
The leading dimension of the array WORK.
If SIDE = 'L', LDWORK >= max(1,N);
if SIDE = 'R', LDWORK >= max(1,M).
=====================================================================
*/
/**
* @param side side
* @param trans trans
* @param direct direct
* @param storev storev
* @param m m
* @param n n
* @param k k
* @param vin vin
* @param ldv ldv
* @param tin tin
* @param ldt ldt
* @param cin overwritten by H*C or H'*C or C*H or C*H'.
* @param ldc ldc
* @param workin working space
* @param ldwork ldwork
* @return result
*/
int dlarfb_(String side, String trans, String direct, String storev, int m, int n, int k, RS[] vin, int ldv, RS[] tin, int ldt, RS[] cin, int ldc, RS[] workin, int ldwork) {
final RS unit = vin[0].createUnit();
this.c = cin;
this.v = vin;
this.work = workin;
this.t = tin;
RS c_b14 = unit.createUnit();
RS c_b25 = unit.createUnit().unaryMinus();
int v_dim1 = ldv;
this.v_offset = 1 + v_dim1 * 1;
int pointer_v = -this.v_offset;
int t_dim1 = ldt;
this.t_offset = 1 + t_dim1 * 1;
int pointer_t = -this.t_offset;
int c_dim1 = ldc;
this.c_offset = 1 + c_dim1 * 1;
int pointer_c = -this.c_offset;
int work_dim1 = ldwork;
this.work_offset = 1 + work_dim1 * 1;
int pointer_work = -this.work_offset;
//変更されないv,tの部分行列を作成
this.createSubvectorVTtempOffset();
/* Function Body */
if (m <= 0 || n <= 0) {
return 0;
}
String transt;
if (BLAS.lsame(trans, "N")) { //$NON-NLS-1$
transt = "T"; //$NON-NLS-1$
} else {
transt = "N"; //$NON-NLS-1$
}
if (BLAS.lsame(storev, "C")) { //$NON-NLS-1$
if (BLAS.lsame(direct, "F")) { //$NON-NLS-1$
/* Let V = ( V1 ) (first K rows)
( V2 )
where V1 is unit lower triangular. */
if (BLAS.lsame(side, "L")) { //$NON-NLS-1$
/* Form H * C or H' * C where C = ( C1 )
( C2 )
W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK)
W := C1' */
for (int j = 1; j <= k; ++j) {
RS[] cTemp = unit.createArray(this.c.length - (c_dim1 + j + pointer_c));
RS[] workTemp = unit.createArray(this.work.length - (j * work_dim1 + 1 + pointer_work));
System.arraycopy(this.c, c_dim1 + j + pointer_c, cTemp, 0, cTemp.length);
System.arraycopy(this.work, j * work_dim1 + 1 + pointer_work, workTemp, 0, workTemp.length);
BLAS.dcopy(n, cTemp, ldc, workTemp, 1);
System.arraycopy(cTemp, 0, this.c, c_dim1 + j + pointer_c, cTemp.length);
System.arraycopy(workTemp, 0, this.work, j * work_dim1 + 1 + pointer_work, workTemp.length);
/* L10: */
}
/* W := W * V1 */
// 配列コピー(workTempOffset)
this.arraycopyWorkForWorkTempOffset();
BLAS.dtrmm("Right", "Lower", "No transpose", "Unit", n, k, c_b14, this.vTempOffset, ldv, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
RS[] v1k1 = unit.createArray(this.v.length - (v_dim1 + k + 1 + pointer_v));
RS[] c1k1 = unit.createArray(this.c.length - (c_dim1 + k + 1 + pointer_c));
System.arraycopy(this.v, v_dim1 + k + 1 + pointer_v, v1k1, 0, v1k1.length);
System.arraycopy(this.c, c_dim1 + k + 1 + pointer_c, c1k1, 0, c1k1.length);
if (m > k) {
// W := W + C2'*V2
BLAS.dgemm("Transpose", "No transpose", n, k, m - k, c_b14, c1k1, ldc, v1k1, ldv, c_b14, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$
System.arraycopy(c1k1, 0, this.c, c_dim1 + k + 1 + pointer_c, c1k1.length);
}
// W := W * T' or W * T
BLAS.dtrmm("Right", "Upper", transt, "Non-unit", n, k, c_b14, this.tTempOffset, ldt, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
// C := C - V * W'
if (m > k) {
BLAS.dgemm("No transpose", "Transpose", m - k, n, k, c_b25, v1k1, ldv, this.workTempOffset, ldwork, c_b14, c1k1, ldc); //$NON-NLS-1$ //$NON-NLS-2$
}
// W := W * V1'
BLAS.dtrmm("Right", "Lower", "Transpose", "Unit", n, k, c_b14, this.vTempOffset, ldv, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
// C1 := C1 - W'
//workTempOffsetをworkにコピー&解放
this.arraycopyWorkTempOffsetForWork();
for (int j = 1; j <= k; ++j) {
for (int i = 1; i <= n; ++i) {
this.c[i * c_dim1 + j + pointer_c] = this.c[i * c_dim1 + j + pointer_c].subtract(this.work[j * work_dim1 + i + pointer_work]);
/* L20: */
}
/* L30: */
}
} else if (BLAS.lsame(side, "R")) { //$NON-NLS-1$
/* Form C * H or C * H' where C = ( C1 C2 )
W := C * V = (C1*V1 + C2*V2) (stored in WORK)
W := C1 */
for (int j = 1; j <= k; ++j) {
RS[] workTempj1 = unit.createArray(this.work.length - (j * work_dim1 + 1 + pointer_work));
RS[] cTempj1 = unit.createArray(this.c.length - (j * c_dim1 + 1 + pointer_c));
System.arraycopy(this.work, j * work_dim1 + 1 + pointer_work, workTempj1, 0, workTempj1.length);
System.arraycopy(this.c, j * c_dim1 + 1 + pointer_c, cTempj1, 0, cTempj1.length);
BLAS.dcopy(m, cTempj1, 1, workTempj1, 1);
System.arraycopy(workTempj1, 0, this.work, j * work_dim1 + 1 + pointer_work, workTempj1.length);
System.arraycopy(cTempj1, 0, this.c, j * c_dim1 + 1 + pointer_c, cTempj1.length);
/* L40: */
}
/* W := W * V1 */
//***配列のコピー(work-->workTemp)
this.arraycopyWorkForWorkTempOffset();
BLAS.dtrmm("Right", "Lower", "No transpose", "Unit", m, k, c_b14, this.vTempOffset, ldv, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
if (n > k) {
/* W := W + C2 * V2 */
RS[] vTemp = unit.createArray(this.v.length - (v_dim1 + k + 1 + pointer_v));
RS[] cTemp = unit.createArray(this.c.length - ((k + 1) * c_dim1 + 1 + pointer_c));
System.arraycopy(this.v, v_dim1 + k + 1 + pointer_v, this.v, 0, vTemp.length);
System.arraycopy(this.c, (k + 1) * c_dim1 + 1 + pointer_c, cTemp, 0, cTemp.length);
BLAS.dgemm("No transpose", "No transpose", m, k, n - k, c_b14, cTemp, ldc, vTemp, ldv, c_b14, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$
System.arraycopy(vTemp, 0, this.v, this.v.length - vTemp.length, vTemp.length);
System.arraycopy(cTemp, 0, this.c, this.c.length - cTemp.length, cTemp.length);
}
/* W := W * T or W * T' */
BLAS.dtrmm("Right", "Upper", trans, "Non-unit", m, k, c_b14, this.tTempOffset, ldt, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
/* C := C - W * V' */
if (n > k) {
/* C2 := C2 - W * V2' */
RS[] vTemp = unit.createArray(this.v.length - (v_dim1 + k + 1 + pointer_v));
RS[] cTemp = unit.createArray(this.c.length - ((k + 1) * c_dim1 + 1 + pointer_c));
System.arraycopy(this.v, v_dim1 + k + 1 + pointer_v, this.v, 0, vTemp.length);
System.arraycopy(this.c, (k + 1) * c_dim1 + 1 + pointer_c, cTemp, 0, cTemp.length);
BLAS.dgemm("No transpose", "Transpose", m, n - k, k, c_b25, this.workTempOffset, ldwork, vTemp, ldv, c_b14, cTemp, ldc); //$NON-NLS-1$ //$NON-NLS-2$
System.arraycopy(vTemp, 0, this.v, this.v.length - vTemp.length, vTemp.length);
System.arraycopy(cTemp, 0, this.c, this.c.length - cTemp.length, cTemp.length);
}
/* W := W * V1' */
BLAS.dtrmm("Right", "Lower", "Transpose", "Unit", m, k, c_b14, this.vTempOffset, ldv, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
/* C1 := C1 - W */
//配列のコピーを元に戻す
this.arraycopyWorkTempOffsetForWork();
for (int j = 1; j <= k; ++j) {
for (int i = 1; i <= m; ++i) {
this.c[j * c_dim1 + i + pointer_c] = this.c[j * c_dim1 + i + pointer_c].subtract(this.work[j * work_dim1 + i + pointer_work]);
/* L50: */
}
/* L60: */
}
}
} else {
/* Let V = ( V1 )
( V2 ) (last K rows)
where V2 is unit upper triangular. */
if (BLAS.lsame(side, "L")) { //$NON-NLS-1$
/* Form H * C or H' * C where C = ( C1 )
( C2 )
W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK)
W := C2' */
for (int j = 1; j <= k; ++j) {
RS[] cTemp = unit.createArray(n);
RS[] workTemp = unit.createArray(n);
System.arraycopy(this.c, c_dim1 + m - k + j + pointer_c, cTemp, 0, n);
System.arraycopy(this.work, j * work_dim1 + 1 + pointer_work, workTemp, 0, n);
BLAS.dcopy(n, cTemp, ldc, workTemp, 1);
System.arraycopy(cTemp, 0, this.c, c_dim1 + m - k + j + pointer_c, n);
System.arraycopy(workTemp, 0, this.work, j * work_dim1 + 1 + pointer_work, n);
/* L70: */
}
//配列をコピー work-->workTempOffset c-->cTempOffset
this.arraycopyWorkForWorkTempOffset();
this.arraycopyCForCTempOffset();
/* W := W * V2 */
RS[] vTemp = unit.createArray(this.v.length - (v_dim1 + m - k + 1 + pointer_v));
System.arraycopy(this.v, v_dim1 + m - k + 1 + pointer_v, vTemp, 0, vTemp.length);
BLAS.dtrmm("Right", "Upper", "No transpose", "Unit", n, k, c_b14, vTemp, ldv, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
// System.arraycopy(vTemp, 0, v, v_dim1 + m-k+1+pointer_v, vTemp.length);
if (m > k) {
/* W := W + C1'*V1 */
BLAS.dgemm("Transpose", "No transpose", n, k, m - k, c_b14, this.cTempOffset, ldc, this.vTempOffset, ldv, c_b14, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$
}
/* W := W * T' or W * T */
BLAS.dtrmm("Right", "Lower", transt, "Non-unit", n, k, c_b14, this.tTempOffset, ldt, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
/* C := C - V * W' */
if (m > k) {
/* C1 := C1 - V1 * W' */
BLAS.dgemm("No transpose", "Transpose", m - k, n, k, c_b25, this.vTempOffset, ldv, this.workTempOffset, ldwork, c_b14, this.cTempOffset, ldc); //$NON-NLS-1$ //$NON-NLS-2$
}
/* W := W * V2' */
BLAS.dtrmm("Right", "Upper", "Transpose", "Unit", n, k, c_b14, vTemp, ldv, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
System.arraycopy(vTemp, 0, this.v, v_dim1 + m - k + 1 + pointer_v, vTemp.length);
/* C2 := C2 - W' */
//配列を元に戻す。
this.arraycopyCTempOffsetForC();
this.arraycopyWorkTempOffsetForWork();
for (int j = 1; j <= k; ++j) {
for (int i = 1; i <= n; ++i) {
this.c[i * c_dim1 + m - k + j + pointer_c] = this.c[i * c_dim1 + m - k + j + pointer_c].subtract(this.work[j * work_dim1 + i + pointer_work]);
/* L80: */
}
/* L90: */
}
} else if (BLAS.lsame(side, "R")) { //$NON-NLS-1$
/* Form C * H or C * H' where C = ( C1 C2 )
W := C * V = (C1*V1 + C2*V2) (stored in WORK)
W := C2 */
for (int j = 1; j <= k; ++j) {
RS[] cTemp = unit.createArray(m);
RS[] workTemp = unit.createArray(m);
System.arraycopy(this.c, (n - k + j) * c_dim1 + 1 + pointer_c, cTemp, 0, m);
System.arraycopy(this.work, j * work_dim1 + 1 + pointer_work, workTemp, 0, m);
BLAS.dcopy(m, cTemp, 1, workTemp, 1);
System.arraycopy(cTemp, 0, this.c, (n - k + j) * c_dim1 + 1 + pointer_c, m);
System.arraycopy(workTemp, 0, this.work, j * work_dim1 + 1 + pointer_work, m);
/* L100: */
}
//配列のコピー c,work-->cTempOffset,workTempOffset
this.arraycopyWorkForWorkTempOffset();
this.arraycopyCForCTempOffset();
/* W := W * V2 */
RS[] vTemp = unit.createArray(this.v.length - (v_dim1 + n - k + 1 + pointer_v));
System.arraycopy(this.v, v_dim1 + n - k + 1 + pointer_v, vTemp, 0, vTemp.length);
BLAS.dtrmm("Right", "Upper", "No transpose", "Unit", m, k, c_b14, vTemp, ldv, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
if (n > k) {
/* W := W + C1 * V1 */
BLAS.dgemm("No transpose", "No transpose", m, k, n - k, c_b14, this.cTempOffset, ldc, this.vTempOffset, ldv, c_b14, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$
}
/* W := W * T or W * T' */
BLAS.dtrmm("Right", "Lower", trans, "Non-unit", m, k, c_b14, this.tTempOffset, ldt, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
/* C := C - W * V' */
if (n > k) {
/* C1 := C1 - W * V1' */
BLAS.dgemm("No transpose", "Transpose", m, n - k, k, c_b25, this.workTempOffset, ldwork, this.vTempOffset, ldv, c_b14, this.cTempOffset, ldc); //$NON-NLS-1$ //$NON-NLS-2$
}
/* W := W * V2' */
BLAS.dtrmm("Right", "Upper", "Transpose", "Unit", m, k, c_b14, vTemp, ldv, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
//コピーした配列を戻す
this.arraycopyWorkTempOffsetForWork();
this.arraycopyCTempOffsetForC();
System.arraycopy(vTemp, 0, this.v, v_dim1 + n - k + 1 + pointer_v, vTemp.length);
/* C2 := C2 - W */
for (int j = 1; j <= k; ++j) {
for (int i = 1; i <= m; ++i) {
this.c[(n - k + j) * c_dim1 + i + pointer_c] = this.c[(n - k + j) * c_dim1 + i + pointer_c].subtract(this.work[j * work_dim1 + i + pointer_work]);
/* L110: */
}
/* L120: */
}
}
}
} else if (BLAS.lsame(storev, "R")) { //$NON-NLS-1$
if (BLAS.lsame(direct, "F")) { //$NON-NLS-1$
/* Let V = ( V1 V2 ) (V1: first K columns)
where V1 is unit upper triangular. */
if (BLAS.lsame(side, "L")) { //$NON-NLS-1$
/* Form H * C or H' * C where C = ( C1 )
( C2 )
W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK)
W := C1' */
for (int j = 1; j <= k; ++j) {
RS[] cTemp = unit.createArray(n);
RS[] workTemp = unit.createArray(n);
System.arraycopy(this.c, c_dim1 + j + pointer_c, cTemp, 0, n);
System.arraycopy(this.work, j * work_dim1 + 1 + pointer_work, workTemp, 0, n);
BLAS.dcopy(n, cTemp, ldc, workTemp, 1);
System.arraycopy(cTemp, 0, this.c, c_dim1 + j + pointer_c, n);
System.arraycopy(workTemp, 0, this.work, j * work_dim1 + 1 + pointer_work, n);
/* L130: */
}
//配列のコピーwork c --> workTempOffset,cTempOffset
this.arraycopyCForCTempOffset();
this.arraycopyWorkForWorkTempOffset();
RS[] cTemp = unit.createArray(this.c.length - (c_dim1 + k + 1 + pointer_c));
RS[] vTemp = unit.createArray(this.v.length - ((k + 1) * v_dim1 + 1 + pointer_v));
System.arraycopy(this.c, c_dim1 + k + 1 + pointer_c, cTemp, 0, cTemp.length);
System.arraycopy(this.v, (k + 1) * v_dim1 + 1 + pointer_v, vTemp, 0, vTemp.length);
/* W := W * V1' */
BLAS.dtrmm("Right", "Upper", "Transpose", "Unit", n, k, c_b14, this.vTempOffset, ldv, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
if (m > k) {
/* W := W + C2'*V2' */
BLAS.dgemm("Transpose", "Transpose", n, k, m - k, c_b14, cTemp, ldc, vTemp, ldv, c_b14, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$
}
/* W := W * T' or W * T */
BLAS.dtrmm("Right", "Upper", transt, "Non-unit", n, k, c_b14, this.tTempOffset, ldt, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
/* C := C - V' * W' */
if (m > k) {
/* C2 := C2 - V2' * W' */
BLAS.dgemm("Transpose", "Transpose", m - k, n, k, c_b25, vTemp, ldv, this.workTempOffset, ldwork, c_b14, cTemp, ldc); //$NON-NLS-1$ //$NON-NLS-2$
}
/* W := W * V1 */
BLAS.dtrmm("Right", "Upper", "No transpose", "Unit", n, k, c_b14, this.vTempOffset, ldv, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
//コピーした配列を元に戻す
this.arraycopyCTempOffsetForC();
this.arraycopyWorkTempOffsetForWork();
System.arraycopy(cTemp, 0, this.c, c_dim1 + k + 1 + pointer_c, cTemp.length);
System.arraycopy(vTemp, 0, this.v, (k + 1) * v_dim1 + 1 + pointer_v, vTemp.length);
/* C1 := C1 - W' */
for (int j = 1; j <= k; ++j) {
for (int i = 1; i <= n; ++i) {
this.c[j * c_dim1 + i + pointer_c] = this.c[j * c_dim1 + i + pointer_c].subtract(this.work[j * work_dim1 + i + pointer_work]);
/* L140: */
}
/* L150: */
}
} else if (BLAS.lsame(side, "R")) { //$NON-NLS-1$
/* Form C * H or C * H' where C = ( C1 C2 )
W := C * V' = (C1*V1' + C2*V2') (stored in WORK)
W := C1 */
for (int j = 1; j <= k; ++j) {
RS[] cTemp = unit.createArray(m);
RS[] workTemp = unit.createArray(m);
System.arraycopy(this.c, j * c_dim1 + 1 + pointer_c, cTemp, 0, m);
System.arraycopy(this.work, j * work_dim1 + 1 + pointer_work, workTemp, 0, m);
BLAS.dcopy(m, cTemp, 1, workTemp, 1);
System.arraycopy(cTemp, 0, this.c, j * c_dim1 + 1 + pointer_c, m);
System.arraycopy(workTemp, 0, this.work, j * work_dim1 + 1 + pointer_work, m);
/* L160: */
}
//配列のコピー work c --> workTempOffset cTempOffset
this.arraycopyCForCTempOffset();
this.arraycopyWorkForWorkTempOffset();
RS[] cTemp = unit.createArray(this.c.length - ((k + 1) * c_dim1 + 1 + pointer_c));
RS[] vTemp = unit.createArray(this.v.length - ((k + 1) * v_dim1 + 1 + pointer_v));
System.arraycopy(this.c, (k + 1) * c_dim1 + 1 + pointer_c, cTemp, 0, cTemp.length);
System.arraycopy(this.v, (k + 1) * v_dim1 + 1 + pointer_v, vTemp, 0, vTemp.length);
/* W := W * V1' */
BLAS.dtrmm("Right", "Upper", "Transpose", "Unit", m, k, c_b14, this.vTempOffset, ldv, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
if (n > k) {
/* W := W + C2 * V2' */
BLAS.dgemm("No transpose", "Transpose", m, k, n - k, c_b14, cTemp, ldc, vTemp, ldv, c_b14, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$
}
/* W := W * T or W * T' */
BLAS.dtrmm("Right", "Upper", trans, "Non-unit", m, k, c_b14, this.tTempOffset, ldt, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
/* C := C - W * V */
if (n > k) {
/* C2 := C2 - W * V2 */
BLAS.dgemm("No transpose", "No transpose", m, n - k, k, c_b25, this.workTempOffset, ldwork, vTemp, ldv, c_b14, cTemp, ldc); //$NON-NLS-1$ //$NON-NLS-2$
}
/* W := W * V1 */
BLAS.dtrmm("Right", "Upper", "No transpose", "Unit", m, k, c_b14, this.vTempOffset, ldv, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
//コピーした配列を元に戻す
this.arraycopyCTempOffsetForC();
this.arraycopyWorkTempOffsetForWork();
System.arraycopy(cTemp, 0, this.c, (k + 1) * c_dim1 + 1 + pointer_c, cTemp.length);
/* C1 := C1 - W */
for (int j = 1; j <= k; ++j) {
for (int i = 1; i <= m; ++i) {
this.c[j * c_dim1 + i + pointer_c] = this.c[j * c_dim1 + i + pointer_c].subtract(this.work[j * work_dim1 + i + pointer_work]);
/* L170: */
}
/* L180: */
}
}
} else {
/* Let V = ( V1 V2 ) (V2: last K columns)
where V2 is unit lower triangular. */
if (BLAS.lsame(side, "L")) { //$NON-NLS-1$
/* Form H * C or H' * C where C = ( C1 )
( C2 )
W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK)
W := C2' */
for (int j = 1; j <= k; ++j) {
RS[] cTemp = unit.createArray(n);
RS[] workTemp = unit.createArray(n);
System.arraycopy(this.c, c_dim1 + m - k + j + pointer_c, cTemp, 0, n);
System.arraycopy(this.work, j * work_dim1 + 1 + pointer_work, workTemp, 0, n);
BLAS.dcopy(n, cTemp, ldc, workTemp, 1);
System.arraycopy(cTemp, 0, this.c, c_dim1 + m - k + j + pointer_c, n);
System.arraycopy(workTemp, 0, this.work, j * work_dim1 + 1 + pointer_work, n);
/* L190: */
}
//配列のコピーwork c --> TempOffset
this.arraycopyCForCTempOffset();
this.arraycopyWorkForWorkTempOffset();
RS[] vTemp = unit.createArray(this.v.length - ((m - k + 1) * v_dim1 + 1 + pointer_v));
System.arraycopy(this.v, this.v.length - vTemp.length, vTemp, 0, vTemp.length);
/* W := W * V2' */
BLAS.dtrmm("Right", "Lower", "Transpose", "Unit", n, k, c_b14, vTemp, ldv, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
if (m > k) {
/* W := W + C1'*V1' */
BLAS.dgemm("Transpose", "Transpose", n, k, m - k, c_b14, this.cTempOffset, ldc, this.vTempOffset, ldv, c_b14, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$
}
/* W := W * T' or W * T */
BLAS.dtrmm("Right", "Lower", transt, "Non-unit", n, k, c_b14, this.tTempOffset, ldt, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
/* C := C - V' * W' */
if (m > k) {
/* C1 := C1 - V1' * W' */
BLAS.dgemm("Transpose", "Transpose", m - k, n, k, c_b25, this.vTempOffset, ldv, this.workTempOffset, ldwork, c_b14, this.cTempOffset, ldc); //$NON-NLS-1$ //$NON-NLS-2$
}
/* W := W * V2 */
BLAS.dtrmm("Right", "Lower", "No transpose", "Unit", n, k, c_b14, vTemp, ldv, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
//コピーした配列を元に戻す
this.arraycopyCTempOffsetForC();
this.arraycopyWorkTempOffsetForWork();
/* C2 := C2 - W' */
for (int j = 1; j <= k; ++j) {
for (int i = 1; i <= n; ++i) {
this.c[i * c_dim1 + m - k + j + pointer_c] = this.c[i * c_dim1 + m - k + j + pointer_c].subtract(this.work[j * work_dim1 + i + pointer_work]);
/* L200: */
}
/* L210: */
}
} else if (BLAS.lsame(side, "R")) { //$NON-NLS-1$
/* Form C * H or C * H' where C = ( C1 C2 )
W := C * V' = (C1*V1' + C2*V2') (stored in WORK)
W := C2 */
for (int j = 1; j <= k; ++j) {
RS[] cTemp = unit.createArray(m);
RS[] workTemp = unit.createArray(m);
System.arraycopy(this.c, (n - k + j) * c_dim1 + 1 + pointer_c, cTemp, 0, n);
System.arraycopy(this.work, j * work_dim1 + 1 + pointer_work, workTemp, 0, workTemp.length);
BLAS.dcopy(m, cTemp, 1, workTemp, 1);
System.arraycopy(cTemp, 0, this.c, (n - k + j) * c_dim1 + 1 + pointer_c, n);
System.arraycopy(workTemp, 0, this.work, j * work_dim1 + 1 + pointer_work, workTemp.length);
/* L220: */
}
//配列のコピー
this.arraycopyCForCTempOffset();
this.arraycopyWorkForWorkTempOffset();
RS[] vTemp = unit.createArray(this.v.length - ((n - k + 1) * v_dim1 + 1 + pointer_v));
System.arraycopy(this.v, (n - k + 1) * v_dim1 + 1 + pointer_v, vTemp, 0, vTemp.length);
/* W := W * V2' */
BLAS.dtrmm("Right", "Lower", "Transpose", "Unit", m, k, c_b14, vTemp, ldv, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
if (n > k) {
/* W := W + C1 * V1' */
BLAS.dgemm("No transpose", "Transpose", m, k, n - k, c_b14, this.cTempOffset, ldc, this.vTempOffset, ldv, c_b14, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$
}
/* W := W * T or W * T' */
BLAS.dtrmm("Right", "Lower", trans, "Non-unit", m, k, c_b14, this.tTempOffset, ldt, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
/* C := C - W * V */
if (n > k) {
/* C1 := C1 - W * V1 */
BLAS.dgemm("No transpose", "No transpose", m, n - k, k, c_b25, this.workTempOffset, ldwork, this.vTempOffset, ldv, c_b14, this.cTempOffset, ldc); //$NON-NLS-1$ //$NON-NLS-2$
}
/* W := W * V2 */
BLAS.dtrmm("Right", "Lower", "No transpose", "Unit", m, k, c_b14, vTemp, ldv, this.workTempOffset, ldwork); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
//コピーした配列を元に戻す
this.arraycopyCTempOffsetForC();
this.arraycopyWorkTempOffsetForWork();
/* C1 := C1 - W */
for (int j = 1; j <= k; ++j) {
for (int i = 1; i <= m; ++i) {
this.c[(n - k + j) * c_dim1 + i + pointer_c] = this.c[(n - k + j) * c_dim1 + i + pointer_c].subtract(this.work[j * work_dim1 + i + pointer_work]);
/* L230: */
}
/* L240: */
}
}
}
}
return 0;
}
/**
* 使用する部分行列(変更されない部分)を作成します。 変更されるworkやc__は扱っていません。
*/
private void createSubvectorVTtempOffset() {
final RS unit = this.c[0].createUnit();
this.vTempOffset = unit.createArray(this.v.length - (this.v_offset + this.pointer_v));
this.tTempOffset = unit.createArray(this.t.length - (this.t_offset + this.pointer_t));
System.arraycopy(this.v, this.v_offset + this.pointer_v, this.vTempOffset, 0, this.vTempOffset.length);
System.arraycopy(this.t, this.t_offset + this.pointer_t, this.tTempOffset, 0, this.tTempOffset.length);
}
/**
* c__ --> cTempOffset c__の部分行列を生成。(offset+pointer)
*/
private void arraycopyCForCTempOffset() {
final RS unit = this.c[0].createUnit();
this.cTempOffset = unit.createArray(this.c.length - (this.c_offset + this.pointer_c));
System.arraycopy(this.c, this.c_offset + this.pointer_c, this.cTempOffset, 0, this.cTempOffset.length);
}
/**
* work --> workTempOffset
*/
private void arraycopyWorkForWorkTempOffset() {
final RS unit = this.c[0].createUnit();
this.workTempOffset = unit.createArray(this.work.length - (this.work_offset + this.pointer_work));
System.arraycopy(this.work, this.work_offset + this.pointer_work, this.workTempOffset, 0, this.workTempOffset.length);
}
/**
* workTempOffset --> work
*/
private void arraycopyWorkTempOffsetForWork() {
System.arraycopy(this.workTempOffset, 0, this.work, this.work_offset + this.pointer_work, this.workTempOffset.length);
this.workTempOffset = null;
}
/**
* cTempOffset --> c__
*/
private void arraycopyCTempOffsetForC() {
System.arraycopy(this.cTempOffset, 0, this.c, this.c_offset + this.pointer_c, this.cTempOffset.length);
this.cTempOffset = null;
}
/**
* @param side side
* @param trans trans
* @param direct direct
* @param storev storev
* @param m m
* @param n n
* @param k k
* @param vin vin
* @param ldv ldv
* @param tin tin
* @param ldt ldt
* @param cin cin
* @param ldc ldc
* @param workin workin
* @param ldwork ldwork
* @return result
*/
public int execute(String side, String trans, String direct, String storev, int m, int n, int k, RS[] vin, int ldv, RS[] tin, int ldt, RS[] cin, int ldc, RS[] workin, int ldwork) {
return this.dlarfb_(side, trans, direct, storev, m, n, k, vin, ldv, tin, ldt, cin, ldc, workin, ldwork);
}
}