Dtrsm.java
package org.mklab.sdpj.gpack.blaswrap;
import org.mklab.nfc.matrix.ComplexNumericalMatrix;
import org.mklab.nfc.matrix.RealNumericalMatrix;
import org.mklab.nfc.scalar.ComplexNumericalScalar;
import org.mklab.nfc.scalar.RealNumericalScalar;
/**
* @author koga
* @version $Revision$, 2009/04/24
* @param <RS> 実スカラーの型
* @param <RM> 実行列の型
* @param <CS> 複素スカラーの型
* @param <CM> 複素行列の型
*/
public class Dtrsm<RS extends RealNumericalScalar<RS, RM, CS, CM>, RM extends RealNumericalMatrix<RS, RM, CS, CM>, CS extends ComplexNumericalScalar<RS, RM, CS, CM>, CM extends ComplexNumericalMatrix<RS, RM, CS, CM>> {
/* Purpose
=======
DTRSM solves one of the matrix equations
op( A )*X = alpha*B, or X*op( A ) = alpha*B,
where alpha is a scalar, X and B are m by n matrices, A is a unit, or
non-unit, upper or lower triangular matrix and op( A ) is one of
op( A ) = A or op( A ) = A'.
The matrix X is overwritten on B.
Parameters
==========
SIDE - CHARACTER*1.
On entry, SIDE specifies whether op( A ) appears on the left
or right of X as follows:
SIDE = 'L' or 'l' op( A )*X = alpha*B.
SIDE = 'R' or 'r' X*op( A ) = alpha*B.
Unchanged on exit.
UPLO - CHARACTER*1.
On entry, UPLO specifies whether the matrix A is an upper or
lower triangular matrix as follows:
UPLO = 'U' or 'u' A is an upper triangular matrix.
UPLO = 'L' or 'l' A is a lower triangular matrix.
Unchanged on exit.
TRANSA - CHARACTER*1.
On entry, TRANSA specifies the form of op( A ) to be used in
the matrix multiplication as follows:
TRANSA = 'N' or 'n' op( A ) = A.
TRANSA = 'T' or 't' op( A ) = A'.
TRANSA = 'C' or 'c' op( A ) = A'.
Unchanged on exit.
DIAG - CHARACTER*1.
On entry, DIAG specifies whether or not A is unit triangular
as follows:
DIAG = 'U' or 'u' A is assumed to be unit triangular.
DIAG = 'N' or 'n' A is not assumed to be unit
triangular.
Unchanged on exit.
M - INTEGER.
On entry, M specifies the number of rows of B. M must be at
least zero.
Unchanged on exit.
N - INTEGER.
On entry, N specifies the number of columns of B. N must be
at least zero.
Unchanged on exit.
ALPHA - DOUBLE PRECISION.
On entry, ALPHA specifies the scalar alpha. When alpha is
zero then A is not referenced and B need not be set before
entry.
Unchanged on exit.
A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m
when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'.
Before entry with UPLO = 'U' or 'u', the leading k by k
upper triangular part of the array A must contain the upper
triangular matrix and the strictly lower triangular part of
A is not referenced.
Before entry with UPLO = 'L' or 'l', the leading k by k
lower triangular part of the array A must contain the lower
triangular matrix and the strictly upper triangular part of
A is not referenced.
Note that when DIAG = 'U' or 'u', the diagonal elements of
A are not referenced either, but are assumed to be unity.
Unchanged on exit.
LDA - INTEGER.
On entry, LDA specifies the first dimension of A as declared
in the calling (sub) program. When SIDE = 'L' or 'l' then
LDA must be at least max( 1, m ), when SIDE = 'R' or 'r'
then LDA must be at least max( 1, n ).
Unchanged on exit.
B - DOUBLE PRECISION array of DIMENSION ( LDB, n ).
Before entry, the leading m by n part of the array B must
contain the right-hand side matrix B, and on exit is
overwritten by the solution matrix X.
LDB - INTEGER.
On entry, LDB specifies the first dimension of B as declared
in the calling (sub) program. LDB must be at least
max( 1, m ).
Unchanged on exit.
Level 3 Blas routine.
-- Written on 8-February-1989.
Jack Dongarra, Argonne National Laboratory.
Iain Duff, AERE Harwell.
Jeremy Du Croz, Numerical Algorithms Group Ltd.
Sven Hammarling, Numerical Algorithms Group Ltd.
Test the input parameters.
Parameter adjustments */
/**
* @param side side
* @param uplo uplo
* @param transa transa
* @param diag diag
* @param m m
* @param n n
* @param alpha alpha
* @param a a
* @param lda lda
* @param b b
* @param ldb ldb
* @return result
*/
public int execute(String side, String uplo, String transa, String diag, int m, int n, RS alpha, RS[] a, int lda, RS[] b, int ldb) {
final RS unit = a[0].createUnit();
int a_dim1 = lda;
int a_offset = 1 + a_dim1 * 1;
int pointer_a = -a_offset;
int b_dim1 = ldb;
int b_offset = 1 + b_dim1 * 1;
int pointer_b = -b_offset;
boolean lside = BLAS.lsame(side, "L"); //$NON-NLS-1$
int nrowa;
if (lside) {
nrowa = m;
} else {
nrowa = n;
}
boolean nounit = BLAS.lsame(diag, "N"); //$NON-NLS-1$
boolean upper = BLAS.lsame(uplo, "U"); //$NON-NLS-1$
int info = 0;
if (!lside && !BLAS.lsame(side, "R")) { //$NON-NLS-1$
info = 1;
} else if (!upper && !BLAS.lsame(uplo, "L")) { //$NON-NLS-1$
info = 2;
} else if (!BLAS.lsame(transa, "N") && !BLAS.lsame(transa, "T") && !BLAS.lsame(transa, "C")) { //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
info = 3;
} else if (!BLAS.lsame(diag, "U") && !BLAS.lsame(diag, "N")) { //$NON-NLS-1$ //$NON-NLS-2$
info = 4;
} else if (m < 0) {
info = 5;
} else if (n < 0) {
info = 6;
} else if (lda < Math.max(1, nrowa)) {
info = 9;
} else if (ldb < Math.max(1, m)) {
info = 11;
}
if (info != 0) {
BLAS.xerbla("DTRSM ", info); //$NON-NLS-1$
return 0;
}
// Quick return if possible.
if (n == 0) {
return 0;
}
// And when alpha.eq.zero.
if (alpha.isZero(alpha.getMachineEpsilon())) {
for (int j = 1; j <= n; ++j) {
for (int i = 1; i <= m; ++i) {
// b_ref(i, j) = 0.;
b[j * b_dim1 + i + pointer_b] = unit.createZero();
}
}
return 0;
}
if (lside) {
if (BLAS.lsame(transa, "N")) { //$NON-NLS-1$
// Form B := alphainv( A )B.
if (upper) {
for (int j = 1; j <= n; ++j) {
if (!alpha.equals(unit.createUnit())) {
for (int i = 1; i <= m; ++i) {
// b_ref(i, j) = alpha * b_ref(i, j);
int p = j * b_dim1 + i + pointer_b;
b[p] = alpha.multiply(b[p]);
}
}
for (int k = m; k >= 1; --k) {
//if (!b[j * b_dim1 + k + pointer_b].equals(unit.create(0), Tools.EPS)) {
int bp = j * b_dim1 + k + pointer_b;
if (!b[bp].equals(unit.createZero(), unit.getMachineEpsilon())) {
if (nounit) {
// b_ref(k, j) = b_ref(k, j) / a_ref(k, k);
b[bp] = b[bp].divide(a[k * a_dim1 + k + pointer_a]);
}
for (int i = 1; i <= k - 1; ++i) {
// b_ref(i, j) = b_ref(i, j) - b_ref(k, j) * a_ref(i, k);
int p = j * b_dim1 + i + pointer_b;
b[p] = b[p].subtract(b[bp].multiply(a[k * a_dim1 + i + pointer_a]));
}
}
}
}
} else {
for (int j = 1; j <= n; ++j) {
if (!alpha.equals(unit.createUnit(), unit.getMachineEpsilon())) {
for (int i = 1; i <= m; ++i) {
int p = j * b_dim1 + i + pointer_b;
b[p] = alpha.multiply(b[p]);
}
}
for (int k = 1; k <= m; ++k) {
int bp = j * b_dim1 + k + pointer_b;
if (!b[bp].equals(unit.createZero(), unit.getMachineEpsilon())) {
if (nounit) {
b[bp] = b[bp].divide(a[k * a_dim1 + k + pointer_a]);
// b_ref(k, j) = b_ref(k, j) / a_ref(k, k);
}
for (int i = k + 1; i <= m; ++i) {
int p = j * b_dim1 + i + pointer_b;
b[p] = b[p].subtract(b[bp].multiply(a[k * a_dim1 + i + pointer_a]));
// b_ref(i, j) = b_ref(i, j) - b_ref(k, j) * a_ref(i, k);
}
}
}
}
}
} else {
// Form B := alphainv( A' )B.
if (upper) {
for (int j = 1; j <= n; ++j) {
for (int i = 1; i <= m; ++i) {
RS temp = alpha.multiply(b[j * b_dim1 + i + pointer_b]);
for (int k = 1; k <= i - 1; ++k) {
// temp -= a_ref(k, i) * b_ref(k, j);
temp = temp.subtract(a[i * a_dim1 + k + pointer_a].multiply(b[j * b_dim1 + k + pointer_b]));
}
if (nounit) {
temp = temp.divide(a[i * a_dim1 + i + pointer_a]);
// temp /= a_ref(i, i);
}
b[j * b_dim1 + i + pointer_b] = temp;
// b_ref(i, j) = temp;
}
}
} else {
for (int j = 1; j <= n; ++j) {
for (int i = m; i >= 1; --i) {
RS temp = alpha.multiply(b[(j) * b_dim1 + i + pointer_b]);
for (int k = i + 1; k <= m; ++k) {
// temp -= a_ref(k, i) * b_ref(k, j);
temp = temp.subtract(a[i * a_dim1 + k + pointer_a].multiply(b[j * b_dim1 + k + pointer_b]));
}
if (nounit) {
temp = temp.divide(a[i * a_dim1 + i + pointer_a]);
// temp /= a_ref(i, i);
}
// b_ref(i, j) = temp;
b[j * b_dim1 + i + pointer_b] = temp;
}
}
}
}
} else {
if (BLAS.lsame(transa, "N")) { //$NON-NLS-1$
// Form B := alphaBinv( A ).
if (upper) {
for (int j = 1; j <= n; ++j) {
if (!alpha.equals(unit.create(1), unit.getMachineEpsilon())) {
for (int i = 1; i <= m; ++i) {
int p = j * b_dim1 + i + pointer_b;
b[p] = alpha.multiply(b[p]);
// b_ref(i, j) = alpha * b_ref(i, j);
}
}
for (int k = 1; k <= j - 1; ++k) {
int ap = j * a_dim1 + k + pointer_a;
if (!a[ap].equals(unit.create(0), unit.getMachineEpsilon())) {
for (int i = 1; i <= m; ++i) {
// b_ref(i, j) = b_ref(i, j) - a_ref(k, j) * b_ref(i,k);
int bp = j * b_dim1 + i + pointer_b;
b[bp] = b[bp].subtract(a[ap].multiply(b[k * b_dim1 + i + pointer_b]));
}
}
}
if (nounit) {
RS temp = unit.create(1).divide(a[j * a_dim1 + j + pointer_a]);
for (int i = 1; i <= m; ++i) {
// b_ref(i, j) = temp * b_ref(i, j);
int p = j * b_dim1 + i + pointer_b;
b[p] = temp.multiply(b[p]);
}
}
}
} else {
for (int j = n; j >= 1; --j) {
if (!alpha.equals(unit.create(1), unit.getMachineEpsilon())) {
for (int i = 1; i <= m; ++i) {
// b_ref(i, j) = *alpha * b_ref(i, j);
int p = j * b_dim1 + i + pointer_b;
b[p] = alpha.multiply(b[p]);
}
}
for (int k = j + 1; k <= n; ++k) {
int ap = j * a_dim1 + k + pointer_a;
if (!a[ap].equals(unit.create(0), unit.getMachineEpsilon())) {
for (int i = 1; i <= m; ++i) {
int bp = j * b_dim1 + i + pointer_b;
b[bp] = b[bp].subtract(a[ap].multiply(b[k * b_dim1 + i + pointer_b]));
// b_ref(i, j) = b_ref(i, j) - a_ref(k, j) * b_ref(i, k);
}
}
}
if (nounit) {
RS temp = unit.create(1).divide(a[j * a_dim1 + j + pointer_a]);
// temp = 1. / a_ref(j, j);
for (int i = 1; i <= m; ++i) {
// b_ref(i, j) = temp * b_ref(i, j);
int p = j * b_dim1 + i + pointer_b;
b[p] = temp.multiply(b[p]);
}
}
}
}
} else {
// Form B := alphaBinv( A' ).
if (upper) {
for (int k = n; k >= 1; --k) {
if (nounit) {
// temp = 1. / a_ref(k, k);
RS temp = unit.create(1).divide(a[k * a_dim1 + k + pointer_a]);
for (int i = 1; i <= m; ++i) {
// b_ref(i, k) = temp * b_ref(i, k);
int p = k * b_dim1 + i + pointer_b;
b[p] = temp.multiply(b[p]);
}
}
for (int j = 1; j <= k - 1; ++j) {
int ap = k * a_dim1 + j + pointer_a;
if (!a[ap].equals(unit.create(0), unit.getMachineEpsilon())) {
RS temp = a[ap];
// temp = a_ref(j, k);
for (int i = 1; i <= m; ++i) {
int bp = j * b_dim1 + i + pointer_b;
b[bp] = b[bp].subtract(temp.multiply(b[k * b_dim1 + i + pointer_b]));
// b_ref(i, j) = b_ref(i, j) - temp * b_ref(i, k);
}
}
}
if (!alpha.equals(unit.create(1), unit.getMachineEpsilon())) {
for (int i = 1; i <= m; ++i) {
// b_ref(i, k) = *alpha * b_ref(i, k);
int p = k * b_dim1 + i + pointer_b;
b[p] = alpha.multiply(b[p]);
}
}
}
} else {
for (int k = 1; k <= n; ++k) {
if (nounit) {
RS temp = unit.createUnit().divide(a[k * a_dim1 + k + pointer_a]);
// temp = 1. / a_ref(k, k);
for (int i = 1; i <= m; ++i) {
// b_ref(i, k) = temp * b_ref(i, k);
int p = k * b_dim1 + i + pointer_b;
b[p] = temp.multiply(b[p]);
}
}
for (int j = k + 1; j <= n; ++j) {
int ap = k * a_dim1 + j + pointer_a;
if (!a[ap].equals(unit.createZero(), unit.getMachineEpsilon())) {
RS temp = a[ap];
for (int i = 1; i <= m; ++i) {
int bp = j * b_dim1 + i + pointer_b;
b[bp] = b[bp].subtract(temp.multiply(b[k * b_dim1 + i + pointer_b]));
// b_ref(i, j) = b_ref(i, j) - temp * b_ref(i, k);
}
}
}
if (!alpha.equals(unit.createUnit(), unit.getMachineEpsilon())) {
for (int i = 1; i <= m; ++i) {
// b_ref(i, k) = *alpha * b_ref(i, k);
int p = k * b_dim1 + i + pointer_b;
b[p] = alpha.multiply(b[p]);
}
}
}
}
}
}
return 0;
}
}