Dtrmm.java
package org.mklab.sdpj.gpack.blaswrap;
import org.mklab.nfc.matrix.ComplexNumericalMatrix;
import org.mklab.nfc.matrix.RealNumericalMatrix;
import org.mklab.nfc.scalar.ComplexNumericalScalar;
import org.mklab.nfc.scalar.RealNumericalScalar;
/**
* @author takafumi
* @param <RS> 実スカラーの型
* @param <RM> 実行列の型
* @param <CS> 複素スカラーの型
* @param <CM> 複素行列の型
*/
public class Dtrmm<RS extends RealNumericalScalar<RS, RM, CS, CM>, RM extends RealNumericalMatrix<RS, RM, CS, CM>, CS extends ComplexNumericalScalar<RS, RM, CS, CM>, CM extends ComplexNumericalMatrix<RS, RM, CS, CM>> {
/* Purpose
=======
DTRMM performs one of the matrix-matrix operations
B := alpha*op( A )*B, or B := alpha*B*op( A ),
where alpha is a scalar, B is an m by n matrix, A is a unit, or
non-unit, upper or lower triangular matrix and op( A ) is one of
op( A ) = A or op( A ) = A'.
Parameters
==========
SIDE - CHARACTER*1.
On entry, SIDE specifies whether op( A ) multiplies B from
the left or right as follows:
SIDE = 'L' or 'l' B := alpha*op( A )*B.
SIDE = 'R' or 'r' B := alpha*B*op( A ).
Unchanged on exit.
UPLO - CHARACTER*1.
On entry, UPLO specifies whether the matrix A is an upper or
lower triangular matrix as follows:
UPLO = 'U' or 'u' A is an upper triangular matrix.
UPLO = 'L' or 'l' A is a lower triangular matrix.
Unchanged on exit.
TRANSA - CHARACTER*1.
On entry, TRANSA specifies the form of op( A ) to be used in
the matrix multiplication as follows:
TRANSA = 'N' or 'n' op( A ) = A.
TRANSA = 'T' or 't' op( A ) = A'.
TRANSA = 'C' or 'c' op( A ) = A'.
Unchanged on exit.
DIAG - CHARACTER*1.
On entry, DIAG specifies whether or not A is unit triangular
as follows:
DIAG = 'U' or 'u' A is assumed to be unit triangular.
DIAG = 'N' or 'n' A is not assumed to be unit
triangular.
Unchanged on exit.
M - INTEGER.
On entry, M specifies the number of rows of B. M must be at
least zero.
Unchanged on exit.
N - INTEGER.
On entry, N specifies the number of columns of B. N must be
at least zero.
Unchanged on exit.
ALPHA - DOUBLE PRECISION.
On entry, ALPHA specifies the scalar alpha. When alpha is
zero then A is not referenced and B need not be set before
entry.
Unchanged on exit.
A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m
when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'.
Before entry with UPLO = 'U' or 'u', the leading k by k
upper triangular part of the array A must contain the upper
triangular matrix and the strictly lower triangular part of
A is not referenced.
Before entry with UPLO = 'L' or 'l', the leading k by k
lower triangular part of the array A must contain the lower
triangular matrix and the strictly upper triangular part of
A is not referenced.
Note that when DIAG = 'U' or 'u', the diagonal elements of
A are not referenced either, but are assumed to be unity.
Unchanged on exit.
LDA - INTEGER.
On entry, LDA specifies the first dimension of A as declared
in the calling (sub) program. When SIDE = 'L' or 'l' then
LDA must be at least max( 1, m ), when SIDE = 'R' or 'r'
then LDA must be at least max( 1, n ).
Unchanged on exit.
B - DOUBLE PRECISION array of DIMENSION ( LDB, n ).
Before entry, the leading m by n part of the array B must
contain the matrix B, and on exit is overwritten by the
transformed matrix.
LDB - INTEGER.
On entry, LDB specifies the first dimension of B as declared
in the calling (sub) program. LDB must be at least
max( 1, m ).
Unchanged on exit.
Level 3 Blas routine.
-- Written on 8-February-1989.
Jack Dongarra, Argonne National Laboratory.
Iain Duff, AERE Harwell.
Jeremy Du Croz, Numerical Algorithms Group Ltd.
Sven Hammarling, Numerical Algorithms Group Ltd.
Test the input parameters.
Parameter adjustments */
/**
* @param side side
* @param uplo uplo
* @param transa transa
* @param diag diag
* @param m m
* @param n n
* @param alpha alpha
* @param a a
* @param lda lda
* @param b b
* @param ldb ldb
* @return result
*/
public int execute(String side, String uplo, String transa, String diag, int m, int n, RS alpha, RS[] a, int lda, RS[] b, int ldb) {
final RS unit = a[0].createUnit();
int a_dim1 = lda;
int a_offset = 1 + a_dim1 * 1;
int pointer_a = -a_offset;
int b_dim1 = ldb;
int b_offset = 1 + b_dim1 * 1;
int pointer_b = -b_offset;
boolean lside = BLAS.lsame(side, "L"); //$NON-NLS-1$
int nrowa;
if (lside) {
nrowa = m;
} else {
nrowa = n;
}
boolean nounit = BLAS.lsame(diag, "N"); //$NON-NLS-1$
boolean upper = BLAS.lsame(uplo, "U"); //$NON-NLS-1$
int info = 0;
if (!lside && !BLAS.lsame(side, "R")) { //$NON-NLS-1$
info = 1;
} else if (!upper && !BLAS.lsame(uplo, "L")) { //$NON-NLS-1$
info = 2;
} else if (!BLAS.lsame(transa, "N") && !BLAS.lsame(transa, "T") && !BLAS.lsame(transa, "C")) { //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
info = 3;
} else if (!BLAS.lsame(diag, "U") && !BLAS.lsame(diag, "N")) { //$NON-NLS-1$ //$NON-NLS-2$
info = 4;
} else if (m < 0) {
info = 5;
} else if (n < 0) {
info = 6;
} else if (lda < Math.max(1, nrowa)) {
info = 9;
} else if (ldb < Math.max(1, m)) {
info = 11;
}
if (info != 0) {
BLAS.xerbla("DTRMM ", info); //$NON-NLS-1$
return 0;
}
// Quick return if possible.
if (n == 0) {
return 0;
}
// And when alpha.eq.zero.
if (alpha.isZero()) {
for (int j = 1; j <= n; ++j) {
for (int i = 1; i <= m; ++i) {
b[j * b_dim1 + i + pointer_b] = unit.createZero();
}
}
return 0;
}
if (lside) {
if (BLAS.lsame(transa, "N")) { //$NON-NLS-1$
// Form B := alphaAB.
if (upper) {
for (int j = 1; j <= n; ++j) {
for (int k = 1; k <= m; ++k) {
if (!b[j * b_dim1 + k + pointer_b].isZero()) {
RS temp = alpha.multiply(b[j * b_dim1 + k + pointer_b]);
for (int i = 1; i <= k - 1; ++i) {
int p = j * b_dim1 + i + pointer_b;
b[p] = b[p].add(temp.multiply(a[k * a_dim1 + i + pointer_a]));
}
if (nounit) {
temp = temp.multiply(a[k * a_dim1 + k + pointer_a]);
}
b[j * b_dim1 + k + pointer_b] = temp;
}
}
}
} else {
for (int j = 1; j <= n; ++j) {
for (int k = m; k >= 1; --k) {
int p = j * b_dim1 + k + pointer_b;
if (!b[p].isZero()) {
RS temp = alpha.multiply(b[p]);
b[p] = temp;
if (nounit) {
b[p] = b[p].multiply(a[k * a_dim1 + k + pointer_a]);
}
for (int i = k + 1; i <= m; ++i) {
int p2 = j * b_dim1 + i + pointer_b;
b[p2] = b[p2].add(temp.multiply(a[k * a_dim1 + i + pointer_a]));
}
}
}
}
}
} else {
// Form B := alphaA'B.
if (upper) {
for (int j = 1; j <= n; ++j) {
for (int i = m; i >= 1; --i) {
RS temp = b[j * b_dim1 + i + pointer_b];
if (nounit) {
temp = temp.multiply(a[i * a_dim1 + i + pointer_a]);
}
for (int k = 1; k <= i - 1; ++k) {
temp = temp.add(a[i * a_dim1 + k + pointer_a].multiply(b[j * b_dim1 + k + pointer_b]));
}
b[j * b_dim1 + i + pointer_b] = alpha.multiply(temp);
}
}
} else {
for (int j = 1; j <= n; ++j) {
for (int i = 1; i <= m; ++i) {
RS temp = b[j * b_dim1 + i + pointer_b];
if (nounit) {
temp = temp.multiply(a[i * a_dim1 + i + pointer_a]);
}
for (int k = i + 1; k <= m; ++k) {
temp = temp.add(a[i * a_dim1 + k + pointer_a].multiply(b[j * b_dim1 + k + pointer_b]));
}
b[j * b_dim1 + i + pointer_b] = alpha.multiply(temp);
}
}
}
}
} else {
if (BLAS.lsame(transa, "N")) { //$NON-NLS-1$
// Form B := alphaBA.
if (upper) {
for (int j = n; j >= 1; --j) {
RS temp = alpha;
if (nounit) {
temp = temp.multiply(a[j * a_dim1 + j + pointer_a]);
}
for (int i = 1; i <= m; ++i) {
int p = j * b_dim1 + i + pointer_b;
b[p] = temp.multiply(b[p]);
}
for (int k = 1; k <= j - 1; ++k) {
if (!a[j * a_dim1 + k + pointer_a].isZero()) {
temp = alpha.multiply(a[j * a_dim1 + k + pointer_a]);
for (int i = 1; i <= m; ++i) {
int p = j * b_dim1 + i + pointer_b;
b[p] = b[p].add(temp.multiply(b[k * b_dim1 + i + pointer_b]));
}
}
}
}
} else {
for (int j = 1; j <= n; ++j) {
RS temp = alpha;
if (nounit) {
temp = temp.multiply(a[j * a_dim1 + j + pointer_a]);
}
for (int i = 1; i <= m; ++i) {
int p = j * b_dim1 + i + pointer_b;
b[p] = temp.multiply(b[p]);
}
for (int k = j + 1; k <= n; ++k) {
if (!a[j * a_dim1 + k + pointer_a].isZero()) {
temp = alpha.multiply(a[j * a_dim1 + k + pointer_a]);
for (int i = 1; i <= m; ++i) {
int p = j * b_dim1 + i + pointer_b;
b[p] = b[p].add(temp.multiply(b[k * b_dim1 + i + pointer_b]));
}
}
}
}
}
} else {
// Form B := alphaBA'.
if (upper) {
for (int k = 1; k <= n; ++k) {
for (int j = 1; j <= k - 1; ++j) {
if (!a[k * a_dim1 + j + pointer_a].isZero()) {
RS temp = alpha.multiply(a[k * a_dim1 + j + pointer_a]);
for (int i = 1; i <= m; ++i) {
int p = j * b_dim1 + i + pointer_b;
b[p] = b[p].add(temp.multiply(b[k * b_dim1 + i + pointer_b]));
}
}
}
RS temp = alpha;
if (nounit) {
temp = temp.multiply(a[k * a_dim1 + k + pointer_a]);
}
if (!temp.equals(unit.create(1))) {
for (int i = 1; i <= m; ++i) {
int p = k * b_dim1 + i + pointer_b;
b[p] = temp.multiply(b[p]);
}
}
}
} else {
for (int k = n; k >= 1; --k) {
for (int j = k + 1; j <= n; ++j) {
if (!a[k * a_dim1 + j + pointer_a].isZero()) {
RS temp = alpha.multiply(a[k * a_dim1 + j + pointer_a]);
for (int i = 1; i <= m; ++i) {
int p = j * b_dim1 + i + pointer_b;
b[p] = b[p].add(temp.multiply(b[k * b_dim1 + i + pointer_b]));
}
}
}
RS temp = alpha;
if (nounit) {
temp = temp.multiply(a[k * a_dim1 + k + pointer_a]);
}
if (!temp.equals(unit.createUnit())) {
for (int i = 1; i <= m; ++i) {
int p = k * b_dim1 + i + pointer_b;
b[p] = temp.multiply(b[p]);
}
}
}
}
}
}
return 0;
}
}