Dtrsm.java

package org.mklab.sdpj.gpack.blaswrap;

import org.mklab.nfc.matrix.ComplexNumericalMatrix;
import org.mklab.nfc.matrix.RealNumericalMatrix;
import org.mklab.nfc.scalar.ComplexNumericalScalar;
import org.mklab.nfc.scalar.RealNumericalScalar;


/**
 * @author koga
 * @version $Revision$, 2009/04/24
 * @param <RS> 実スカラーの型
 * @param <RM> 実行列の型
 * @param <CS> 複素スカラーの型
 * @param <CM> 複素行列の型
 */
public class Dtrsm<RS extends RealNumericalScalar<RS, RM, CS, CM>, RM extends RealNumericalMatrix<RS, RM, CS, CM>, CS extends ComplexNumericalScalar<RS, RM, CS, CM>, CM extends ComplexNumericalMatrix<RS, RM, CS, CM>> {

  /*  Purpose   
  =======   
  DTRSM  solves one of the matrix equations   
     op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,   
  where alpha is a scalar, X and B are m by n matrices, A is a unit, or   
  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of   
     op( A ) = A   or   op( A ) = A'.   
  The matrix X is overwritten on B.   
  Parameters   
  ==========   
  SIDE   - CHARACTER*1.   
           On entry, SIDE specifies whether op( A ) appears on the left   
           or right of X as follows:   
              SIDE = 'L' or 'l'   op( A )*X = alpha*B.   
              SIDE = 'R' or 'r'   X*op( A ) = alpha*B.   
           Unchanged on exit.   
  UPLO   - CHARACTER*1.   
           On entry, UPLO specifies whether the matrix A is an upper or   
           lower triangular matrix as follows:   
              UPLO = 'U' or 'u'   A is an upper triangular matrix.   
              UPLO = 'L' or 'l'   A is a lower triangular matrix.   
           Unchanged on exit.   
  TRANSA - CHARACTER*1.   
           On entry, TRANSA specifies the form of op( A ) to be used in   
           the matrix multiplication as follows:   
              TRANSA = 'N' or 'n'   op( A ) = A.   
              TRANSA = 'T' or 't'   op( A ) = A'.   
              TRANSA = 'C' or 'c'   op( A ) = A'.   
           Unchanged on exit.   
  DIAG   - CHARACTER*1.   
           On entry, DIAG specifies whether or not A is unit triangular   
           as follows:   
              DIAG = 'U' or 'u'   A is assumed to be unit triangular.   
              DIAG = 'N' or 'n'   A is not assumed to be unit   
                                  triangular.   
           Unchanged on exit.   
  M      - INTEGER.   
           On entry, M specifies the number of rows of B. M must be at   
           least zero.   
           Unchanged on exit.   
  N      - INTEGER.   
           On entry, N specifies the number of columns of B.  N must be   
           at least zero.   
           Unchanged on exit.   
  ALPHA  - DOUBLE PRECISION.   
           On entry,  ALPHA specifies the scalar  alpha. When  alpha is   
           zero then  A is not referenced and  B need not be set before   
           entry.   
           Unchanged on exit.   
  A      - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m   
           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.   
           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k   
           upper triangular part of the array  A must contain the upper   
           triangular matrix  and the strictly lower triangular part of   
           A is not referenced.   
           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k   
           lower triangular part of the array  A must contain the lower   
           triangular matrix  and the strictly upper triangular part of   
           A is not referenced.   
           Note that when  DIAG = 'U' or 'u',  the diagonal elements of   
           A  are not referenced either,  but are assumed to be  unity.   
           Unchanged on exit.   
  LDA    - INTEGER.   
           On entry, LDA specifies the first dimension of A as declared   
           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then   
           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'   
           then LDA must be at least max( 1, n ).   
           Unchanged on exit.   
  B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ).   
           Before entry,  the leading  m by n part of the array  B must   
           contain  the  right-hand  side  matrix  B,  and  on exit  is   
           overwritten by the solution matrix  X.   
  LDB    - INTEGER.   
           On entry, LDB specifies the first dimension of B as declared   
           in  the  calling  (sub)  program.   LDB  must  be  at  least   
           max( 1, m ).   
           Unchanged on exit.   
  Level 3 Blas routine.   
  -- Written on 8-February-1989.   
     Jack Dongarra, Argonne National Laboratory.   
     Iain Duff, AERE Harwell.   
     Jeremy Du Croz, Numerical Algorithms Group Ltd.   
     Sven Hammarling, Numerical Algorithms Group Ltd.   
     Test the input parameters.   
     Parameter adjustments */

  /**
   * @param side side
   * @param uplo uplo
   * @param transa transa
   * @param diag diag
   * @param m m
   * @param n n
   * @param alpha alpha
   * @param a a
   * @param lda lda
   * @param b b
   * @param ldb ldb
   * @return result
   */
  public int execute(String side, String uplo, String transa, String diag, int m, int n, RS alpha, RS[] a, int lda, RS[] b, int ldb) {
    final RS unit = a[0].createUnit();

    int a_dim1 = lda;
    int a_offset = 1 + a_dim1 * 1;
    int pointer_a = -a_offset;

    int b_dim1 = ldb;
    int b_offset = 1 + b_dim1 * 1;
    int pointer_b = -b_offset;

    boolean lside = BLAS.lsame(side, "L"); //$NON-NLS-1$

    int nrowa;
    if (lside) {
      nrowa = m;
    } else {
      nrowa = n;
    }
    boolean nounit = BLAS.lsame(diag, "N"); //$NON-NLS-1$
    boolean upper = BLAS.lsame(uplo, "U"); //$NON-NLS-1$

    int info = 0;
    if (!lside && !BLAS.lsame(side, "R")) { //$NON-NLS-1$
      info = 1;
    } else if (!upper && !BLAS.lsame(uplo, "L")) { //$NON-NLS-1$
      info = 2;
    } else if (!BLAS.lsame(transa, "N") && !BLAS.lsame(transa, "T") && !BLAS.lsame(transa, "C")) { //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
      info = 3;
    } else if (!BLAS.lsame(diag, "U") && !BLAS.lsame(diag, "N")) { //$NON-NLS-1$ //$NON-NLS-2$
      info = 4;
    } else if (m < 0) {
      info = 5;
    } else if (n < 0) {
      info = 6;
    } else if (lda < Math.max(1, nrowa)) {
      info = 9;
    } else if (ldb < Math.max(1, m)) {
      info = 11;
    }
    if (info != 0) {
      BLAS.xerbla("DTRSM ", info); //$NON-NLS-1$
      return 0;
    }
    // Quick return if possible.
    if (n == 0) {
      return 0;
    }
    // And when alpha.eq.zero.
    if (alpha.isZero(alpha.getMachineEpsilon())) {
      for (int j = 1; j <= n; ++j) {
        for (int i = 1; i <= m; ++i) {
          // b_ref(i, j) = 0.;
          b[j * b_dim1 + i + pointer_b] = unit.createZero();
        }
      }
      return 0;
    }

    if (lside) {
      if (BLAS.lsame(transa, "N")) { //$NON-NLS-1$
        // Form B := alphainv( A )B.
        if (upper) {
          for (int j = 1; j <= n; ++j) {
            if (!alpha.equals(unit.createUnit())) {
              for (int i = 1; i <= m; ++i) {
                // b_ref(i, j) = alpha * b_ref(i, j);
                int p = j * b_dim1 + i + pointer_b;
                b[p] = alpha.multiply(b[p]);
              }
            }
            for (int k = m; k >= 1; --k) {
              //if (!b[j * b_dim1 + k + pointer_b].equals(unit.create(0), Tools.EPS)) {
              int bp = j * b_dim1 + k + pointer_b;
              if (!b[bp].equals(unit.createZero(), unit.getMachineEpsilon())) {
                if (nounit) {
                  // b_ref(k, j) = b_ref(k, j) / a_ref(k, k);
                  b[bp] = b[bp].divide(a[k * a_dim1 + k + pointer_a]);
                }
                for (int i = 1; i <= k - 1; ++i) {
                  // b_ref(i, j) = b_ref(i, j) - b_ref(k, j) * a_ref(i, k);
                  int p = j * b_dim1 + i + pointer_b;
                  b[p] = b[p].subtract(b[bp].multiply(a[k * a_dim1 + i + pointer_a]));
                }
              }
            }
          }
        } else {
          for (int j = 1; j <= n; ++j) {
            if (!alpha.equals(unit.createUnit(), unit.getMachineEpsilon())) {
              for (int i = 1; i <= m; ++i) {
                int p = j * b_dim1 + i + pointer_b;
                b[p] = alpha.multiply(b[p]);
              }
            }
            for (int k = 1; k <= m; ++k) {
              int bp = j * b_dim1 + k + pointer_b;
              if (!b[bp].equals(unit.createZero(), unit.getMachineEpsilon())) {
                if (nounit) {
                  b[bp] = b[bp].divide(a[k * a_dim1 + k + pointer_a]);
                  // b_ref(k, j) = b_ref(k, j) / a_ref(k, k);
                }
                for (int i = k + 1; i <= m; ++i) {
                  int p = j * b_dim1 + i + pointer_b;
                  b[p] = b[p].subtract(b[bp].multiply(a[k * a_dim1 + i + pointer_a]));
                  // b_ref(i, j) = b_ref(i, j) - b_ref(k, j) * a_ref(i, k);
                }
              }
            }
          }
        }
      } else {
        // Form B := alphainv( A' )B.
        if (upper) {
          for (int j = 1; j <= n; ++j) {
            for (int i = 1; i <= m; ++i) {
              RS temp = alpha.multiply(b[j * b_dim1 + i + pointer_b]);
              for (int k = 1; k <= i - 1; ++k) {
                // temp -= a_ref(k, i) * b_ref(k, j);
                temp = temp.subtract(a[i * a_dim1 + k + pointer_a].multiply(b[j * b_dim1 + k + pointer_b]));
              }
              if (nounit) {
                temp = temp.divide(a[i * a_dim1 + i + pointer_a]);
                // temp /= a_ref(i, i);
              }
              b[j * b_dim1 + i + pointer_b] = temp;
              // b_ref(i, j) = temp;
            }
          }
        } else {
          for (int j = 1; j <= n; ++j) {
            for (int i = m; i >= 1; --i) {
              RS temp = alpha.multiply(b[(j) * b_dim1 + i + pointer_b]);
              for (int k = i + 1; k <= m; ++k) {
                // temp -= a_ref(k, i) * b_ref(k, j);
                temp = temp.subtract(a[i * a_dim1 + k + pointer_a].multiply(b[j * b_dim1 + k + pointer_b]));
              }
              if (nounit) {
                temp = temp.divide(a[i * a_dim1 + i + pointer_a]);
                // temp /= a_ref(i, i);
              }
              // b_ref(i, j) = temp;
              b[j * b_dim1 + i + pointer_b] = temp;
            }
          }
        }
      }
    } else {
      if (BLAS.lsame(transa, "N")) { //$NON-NLS-1$
        // Form B := alphaBinv( A ).
        if (upper) {
          for (int j = 1; j <= n; ++j) {
            if (!alpha.equals(unit.create(1), unit.getMachineEpsilon())) {
              for (int i = 1; i <= m; ++i) {
                int p = j * b_dim1 + i + pointer_b;
                b[p] = alpha.multiply(b[p]);
                // b_ref(i, j) = alpha * b_ref(i, j);
              }
            }
            for (int k = 1; k <= j - 1; ++k) {
              int ap = j * a_dim1 + k + pointer_a;
              if (!a[ap].equals(unit.create(0), unit.getMachineEpsilon())) {
                for (int i = 1; i <= m; ++i) {
                  // b_ref(i, j) = b_ref(i, j) - a_ref(k, j) * b_ref(i,k);
                  int bp = j * b_dim1 + i + pointer_b;
                  b[bp] = b[bp].subtract(a[ap].multiply(b[k * b_dim1 + i + pointer_b]));
                }
              }
            }
            if (nounit) {
              RS temp = unit.create(1).divide(a[j * a_dim1 + j + pointer_a]);

              for (int i = 1; i <= m; ++i) {
                // b_ref(i, j) = temp * b_ref(i, j);
                int p = j * b_dim1 + i + pointer_b;
                b[p] = temp.multiply(b[p]);
              }
            }
          }
        } else {
          for (int j = n; j >= 1; --j) {
            if (!alpha.equals(unit.create(1), unit.getMachineEpsilon())) {
              for (int i = 1; i <= m; ++i) {
                // b_ref(i, j) = *alpha * b_ref(i, j);
                int p = j * b_dim1 + i + pointer_b;
                b[p] = alpha.multiply(b[p]);
              }
            }
            for (int k = j + 1; k <= n; ++k) {
              int ap = j * a_dim1 + k + pointer_a;
              if (!a[ap].equals(unit.create(0), unit.getMachineEpsilon())) {
                for (int i = 1; i <= m; ++i) {
                  int bp = j * b_dim1 + i + pointer_b;
                  b[bp] = b[bp].subtract(a[ap].multiply(b[k * b_dim1 + i + pointer_b]));
                  // b_ref(i, j) = b_ref(i, j) - a_ref(k, j) * b_ref(i, k);
                }
              }
            }
            if (nounit) {
              RS temp = unit.create(1).divide(a[j * a_dim1 + j + pointer_a]);
              // temp = 1. / a_ref(j, j);
              for (int i = 1; i <= m; ++i) {
                // b_ref(i, j) = temp * b_ref(i, j);
                int p = j * b_dim1 + i + pointer_b;
                b[p] = temp.multiply(b[p]);
              }
            }
          }
        }
      } else {
        // Form B := alphaBinv( A' ).
        if (upper) {
          for (int k = n; k >= 1; --k) {
            if (nounit) {
              // temp = 1. / a_ref(k, k);
              RS temp = unit.create(1).divide(a[k * a_dim1 + k + pointer_a]);
              for (int i = 1; i <= m; ++i) {
                // b_ref(i, k) = temp * b_ref(i, k);
                int p = k * b_dim1 + i + pointer_b;
                b[p] = temp.multiply(b[p]);
              }
            }
            for (int j = 1; j <= k - 1; ++j) {
              int ap = k * a_dim1 + j + pointer_a;
              if (!a[ap].equals(unit.create(0), unit.getMachineEpsilon())) {
                RS temp = a[ap];
                // temp = a_ref(j, k);
                for (int i = 1; i <= m; ++i) {
                  int bp = j * b_dim1 + i + pointer_b;
                  b[bp] = b[bp].subtract(temp.multiply(b[k * b_dim1 + i + pointer_b]));
                  // b_ref(i, j) = b_ref(i, j) - temp * b_ref(i, k);
                }
              }
            }
            if (!alpha.equals(unit.create(1), unit.getMachineEpsilon())) {
              for (int i = 1; i <= m; ++i) {
                // b_ref(i, k) = *alpha * b_ref(i, k);
                int p = k * b_dim1 + i + pointer_b;
                b[p] = alpha.multiply(b[p]);
              }
            }
          }
        } else {
          for (int k = 1; k <= n; ++k) {
            if (nounit) {
              RS temp = unit.createUnit().divide(a[k * a_dim1 + k + pointer_a]);
              // temp = 1. / a_ref(k, k);
              for (int i = 1; i <= m; ++i) {
                // b_ref(i, k) = temp * b_ref(i, k);
                int p = k * b_dim1 + i + pointer_b;
                b[p] = temp.multiply(b[p]);
              }
            }
            for (int j = k + 1; j <= n; ++j) {
              int ap = k * a_dim1 + j + pointer_a;
              if (!a[ap].equals(unit.createZero(), unit.getMachineEpsilon())) {
                RS temp = a[ap];
                for (int i = 1; i <= m; ++i) {
                  int bp = j * b_dim1 + i + pointer_b;
                  b[bp] = b[bp].subtract(temp.multiply(b[k * b_dim1 + i + pointer_b]));
                  // b_ref(i, j) = b_ref(i, j) - temp * b_ref(i, k);
                }
              }
            }
            if (!alpha.equals(unit.createUnit(), unit.getMachineEpsilon())) {
              for (int i = 1; i <= m; ++i) {
                // b_ref(i, k) = *alpha * b_ref(i, k);
                int p = k * b_dim1 + i + pointer_b;
                b[p] = alpha.multiply(b[p]);
              }
            }
          }
        }
      }
    }

    return 0;
  }
}