OOFEM  2.4
OOFEM.org - Object Oriented Finite Element Solver
superlusolver.C
Go to the documentation of this file.
1 /*
2  *
3  * ##### ##### ###### ###### ### ###
4  * ## ## ## ## ## ## ## ### ##
5  * ## ## ## ## #### #### ## # ##
6  * ## ## ## ## ## ## ## ##
7  * ## ## ## ## ## ## ## ##
8  * ##### ##### ## ###### ## ##
9  *
10  *
11  * OOFEM : Object Oriented Finite Element Code
12  *
13  * Copyright (C) 1993 - 2013 Borek Patzak
14  *
15  *
16  *
17  * Czech Technical University, Faculty of Civil Engineering,
18  * Department of Structural Mechanics, 166 29 Prague, Czech Republic
19  *
20  * This library is free software; you can redistribute it and/or
21  * modify it under the terms of the GNU Lesser General Public
22  * License as published by the Free Software Foundation; either
23  * version 2.1 of the License, or (at your option) any later version.
24  *
25  * This program is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28  * Lesser General Public License for more details.
29  *
30  * You should have received a copy of the GNU Lesser General Public
31  * License along with this library; if not, write to the Free Software
32  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33  */
34 
35 #include "sparsemtrx.h"
36 #include "floatarray.h"
37 #include "compcol.h"
38 #include "linsystsolvertype.h"
39 #include "classfactory.h"
40 
41 #include "SUPERLU_MT/include/slu_mt_ddefs.h"
42 #include "superlusolver.h"
43 #include <stdlib.h>
44 #include <math.h>
45 #include "verbose.h"
46 //#include "globals.h"
47 
48 #ifdef TIME_REPORT
49  #include "timer.h"
50 #endif
51 
52 namespace oofem {
54 
55 
57 { }
58 
59 
61 
62 
65 {
66  return IRRT_OK;
67 }
68 
71 {
72  //1. Step: Transform SparseMtrx *A to SuperMatrix
73  //2. Step: Transfrom FloatArray *b to SuperVector
74  //3. Step: Transfrom FLoatArray *x to SuperVector
75  //3. Step: call superLU Solve with tranformed input parameters
76  //4. Step: Transfrom result back to FloatArray *x
77  //5. Step: return SUCCESS!
78 
79  CompCol *CC = dynamic_cast< CompCol * >( & Lhs );
80  if ( CC ) {
81  SuperMatrix A, L, U;
82  SuperMatrix B, X;
83  int_t nprocs;
84  fact_t fact;
85  trans_t trans;
86  yes_no_t refact, usepr;
87  equed_t equed;
88  double *a;
89  int_t *asub, *xa;
90  int_t *perm_c;
91  int_t *perm_r;
92  void *work;
93  superlumt_options_t superlumt_options;
94  int_t info, lwork, nrhs, /*ldx,*/ panel_size, relax;
95  int_t m, n, nnz, permc_spec;
96  double *rhsb, *rhsx /*, *xact*/;
97  double *R, *C;
98  double *ferr, *berr;
99  double /*u,*/ drop_tol, rpg, rcond;
100  superlu_memusage_t superlu_memusage;
101  void parse_command_line();
102 
103  /* Default parameters to control factorization. */
104  nprocs = omp_get_max_threads(); //omp_get_num_threads() does not work as we are not in parallel region!!;
105  printf("Use number of LU threads: %u\n", nprocs);
106  fact = EQUILIBRATE;
107  trans = NOTRANS;
108  equed = NOEQUIL;
109  refact = NO;
110  panel_size = sp_ienv(1);
111  relax = sp_ienv(2);
112  //u = 1.0;
113  usepr = NO;
114  drop_tol = 0.0;
115  lwork = 0;
116  nrhs = 1;
117 
118  m = CC->giveNumberOfRows();
119  n = CC->giveNumberOfColumns();
120  nnz = CC->giveNumberOfNonzeros();
121  a = CC->giveValues().givePointer();
122  asub = CC->giveRowIndex().givePointer();
123  xa = CC->giveColPtr().givePointer();
124 
125  /* Command line options to modify default behavior. */
126  //parse_command_line(argc, argv, &nprocs, &lwork, &panel_size, &relax,
127  // &u, &fact, &trans, &refact, &equed);
128 
129  if ( lwork > 0 ) {
130  work = SUPERLU_MALLOC(lwork);
131  printf("Use work space of size LWORK = " IFMT " bytes\n", lwork);
132  if ( !work ) {
133  SUPERLU_ABORT("cannot allocate work[]");
134  }
135  }
136 
137  //printf("Use work space of size LWORK = " IFMT " bytes\n", lwork);
138 
139 #if ( PRNTlevel == 1 )
140  cpp_defs();
141  printf( "int_t %d bytes\n", sizeof( int_t ) );
142 #endif
143 
144 #ifdef TIME_REPORT
145  Timer timer;
146  timer.startTimer();
147 #endif
148 
149 
150 
151  dCreate_CompCol_Matrix(& A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE);
152 
153 
154  if ( !( rhsb = doubleMalloc(m * nrhs) ) ) {
155  SUPERLU_ABORT("Malloc fails for rhsb[].");
156  }
157  if ( !( rhsx = doubleMalloc(m * nrhs) ) ) {
158  SUPERLU_ABORT("Malloc fails for rhsx[].");
159  }
160  dCreate_Dense_Matrix(& B, m, nrhs, b.givePointer(), m, SLU_DN, SLU_D, SLU_GE);
161  dCreate_Dense_Matrix(& X, m, nrhs, rhsx, m, SLU_DN, SLU_D, SLU_GE);
162  //dPrint_Dense_Matrix(&B);
163  //dPrint_Dense_Matrix(&X);
164 
165  if ( !( perm_r = intMalloc(m) ) ) {
166  SUPERLU_ABORT("Malloc fails for perm_r[].");
167  }
168  if ( !( perm_c = intMalloc(n) ) ) {
169  SUPERLU_ABORT("Malloc fails for perm_c[].");
170  }
171  if ( !( R = ( double * ) SUPERLU_MALLOC( A.nrow * sizeof( double ) ) ) ) {
172  SUPERLU_ABORT("SUPERLU_MALLOC fails for R[].");
173  }
174  if ( !( C = ( double * ) SUPERLU_MALLOC( A.ncol * sizeof( double ) ) ) ) {
175  SUPERLU_ABORT("SUPERLU_MALLOC fails for C[].");
176  }
177  if ( !( ferr = ( double * ) SUPERLU_MALLOC( nrhs * sizeof( double ) ) ) ) {
178  SUPERLU_ABORT("SUPERLU_MALLOC fails for ferr[].");
179  }
180  if ( !( berr = ( double * ) SUPERLU_MALLOC( nrhs * sizeof( double ) ) ) ) {
181  SUPERLU_ABORT("SUPERLU_MALLOC fails for berr[].");
182  }
183 
184  /*
185  * Get column permutation vector perm_c[], according to permc_spec:
186  * permc_spec = 0: natural ordering
187  * permc_spec = 1: minimum degree ordering on structure of A'*A
188  * permc_spec = 2: minimum degree ordering on structure of A'+A
189  * permc_spec = 3: approximate minimum degree for unsymmetric matrices
190  */
191 
192  permc_spec = 2;
193  get_perm_c(permc_spec, & A, perm_c);
194 
195  superlumt_options.SymmetricMode = YES;
196  superlumt_options.diag_pivot_thresh = 0.0;
197 
198  superlumt_options.nprocs = nprocs;
199  superlumt_options.fact = fact;
200  superlumt_options.trans = trans;
201  superlumt_options.refact = refact;
202  superlumt_options.panel_size = panel_size;
203  superlumt_options.relax = relax;
204  superlumt_options.usepr = usepr;
205  superlumt_options.drop_tol = drop_tol;
206  superlumt_options.PrintStat = NO;
207  superlumt_options.perm_c = perm_c;
208  superlumt_options.perm_r = perm_r;
209  //superlumt_options.work = work;
210  superlumt_options.lwork = lwork;
211  if ( !( superlumt_options.etree = intMalloc(n) ) ) {
212  SUPERLU_ABORT("Malloc fails for etree[].");
213  }
214  if ( !( superlumt_options.colcnt_h = intMalloc(n) ) ) {
215  SUPERLU_ABORT("Malloc fails for colcnt_h[].");
216  }
217  if ( !( superlumt_options.part_super_h = intMalloc(n) ) ) {
218  SUPERLU_ABORT("Malloc fails for colcnt_h[].");
219  }
220 
221  printf("sym_mode %d\tdiag_pivot_thresh %.4e\n",
222  superlumt_options.SymmetricMode,
223  superlumt_options.diag_pivot_thresh);
224 
225  /*
226  * Solve the system and compute the condition number
227  * and error bounds using pdgssvx.
228  */
229  pdgssvx(nprocs, & superlumt_options, & A, perm_c, perm_r, & equed, R, C, & L, & U, & B, & X, & rpg, & rcond, ferr, berr, & superlu_memusage, & info);
230 
231  //dPrint_Dense_Matrix(&B);
232  //dPrint_Dense_Matrix(&X);
233 
234 #if 0
235  printf("psgssvx(): info " IFMT "\n", info);
236 
237  if ( info == 0 || info == n + 1 ) {
238  SCPformat *Lstore;
239  NCPformat *Ustore;
240 
241  printf("Recip. pivot growth = %e\n", rpg);
242  printf("Recip. condition number = %e\n", rcond);
243  printf("%8s%16s%16s\n", "rhs", "FERR", "BERR");
244  for ( int i = 0; i < nrhs; ++i ) {
245  printf(IFMT "%16e%16e\n", i + 1, ferr [ i ], berr [ i ]);
246  }
247 
248  Lstore = ( SCPformat * ) L.Store;
249  Ustore = ( NCPformat * ) U.Store;
250  printf("No of nonzeros in factor L = " IFMT "\n", Lstore->nnz);
251  printf("No of nonzeros in factor U = " IFMT "\n", Ustore->nnz);
252  printf("No of nonzeros in L+U = " IFMT "\n", Lstore->nnz + Ustore->nnz - n);
253  printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions " IFMT "\n",
254  superlu_memusage.for_lu / 1e6, superlu_memusage.total_needed / 1e6,
255  superlu_memusage.expansions);
256 
257  fflush(stdout);
258  } else if ( info > 0 && lwork == -1 ) {
259  printf("** Estimated memory: " IFMT " bytes\n", info - n);
260  }
261 #else
262  if ( info > 0 && lwork == -1 ) {
263  printf("** Estimated memory: " IFMT " bytes\n", info - n);
264  }
265 #endif
266 
267  //dPrint_Dense_Matrix(&B);
268  // copy B into x (assuming only one rhs)
269  x.resize( b.giveSize() );
270  this->convertRhs(& X, x);
271  SUPERLU_FREE(rhsb);
272  SUPERLU_FREE(rhsx);
273  //SUPERLU_FREE (xact);
274  SUPERLU_FREE(perm_r);
275  SUPERLU_FREE(perm_c);
276  SUPERLU_FREE(R);
277  SUPERLU_FREE(C);
278  SUPERLU_FREE(ferr);
279  SUPERLU_FREE(berr);
280  Destroy_SuperMatrix_Store(& A);
281  Destroy_SuperMatrix_Store(& B);
282  Destroy_SuperMatrix_Store(& X);
283  // SUPERLU_FREE (superlumt_options.etree);
284  // SUPERLU_FREE (superlumt_options.colcnt_h);
286  if ( lwork == 0 ) {
287  Destroy_SuperNode_SCP(& L);
288  Destroy_CompCol_NCP(& U);
289  } else if ( lwork > 0 ) {
290  SUPERLU_FREE(work);
291  }
292 
293 #ifdef TIME_REPORT
294  timer.stopTimer();
295  OOFEM_LOG_INFO( "SuperLU_MT info: user time consumed by solution: %.2fs\n", timer.getUtime() );
296  OOFEM_LOG_INFO( "SuperLU_MT info: real time consumed by solution: %.2fs\n", timer.getWtime() );
297 #endif
298  } else {
299  OOFEM_ERROR("Incompatible sparse storage encountered");
300  }
301 
302 
303 
304 
305 
306  //dPrint_Dense_Matrix(&B);
307  /*
308  * Lstore = (SCPformat *) L.Store;
309  * Ustore = (NCPformat *) U.Store;
310  * printf("No of nonzeros in factor L = " IFMT "\n", Lstore->nnz);
311  * printf("No of nonzeros in factor U = " IFMT "\n", Ustore->nnz);
312  *
313  * fflush(stdout);
314  * dPrint_CompCol_Matrix(&L);
315  * dPrint_CompCol_Matrix(&U);
316  */
317  //solved = 1;
318  return NM_Success;
319 }
320 
321 void
323 {
324  x.zero();
325 
326  if ( ( X->Stype == SLU_DN ) && ( X->Dtype == SLU_D ) ) {
327  // process only first column //int r;
328 
329  DNformat *data = ( ( DNformat * ) ( X->Store ) );
330 
331 #pragma omp parallel
332  {
333 #pragma omp parallel for
334  for ( int r = 0; r <= data->lda - 1; r++ ) {
335  //r = data->nzval[i];
336  x.at(r + 1) = ( ( double * ) data->nzval ) [ r ];
337  }
338  }
339  } else {
340  OOFEM_ERROR("convertRhs: unsupported matrix storage type or data type");
341  }
342 }
343 
344 //dPrint - following code use to print SuperLU matrix
346 {
347  NCformat *Astore;
348  register int_t i;
349  double *dp;
350 
351  printf("\nCompCol matrix: ");
352  printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype, A->Dtype, A->Mtype);
353  Astore = ( NCformat * ) A->Store;
354  dp = ( double * ) Astore->nzval;
355  printf("nrow " IFMT ", ncol " IFMT ", nnz " IFMT "\n", A->nrow, A->ncol, Astore->nnz);
356  printf("\nnzval: ");
357  for ( i = 0; i < Astore->nnz; ++i ) {
358  printf("%f ", dp [ i ]);
359  }
360  printf("\nrowind: ");
361  for ( i = 0; i < Astore->nnz; ++i ) {
362  printf(IFMT, Astore->rowind [ i ]);
363  }
364  printf("\ncolptr: ");
365  for ( i = 0; i <= A->ncol; ++i ) {
366  printf(IFMT, Astore->colptr [ i ]);
367  }
368  printf("\nend CompCol matrix.\n");
369 
370  return 0;
371 }
372 
374 {
375  DNformat *Astore;
376  register int_t i;
377  double *dp;
378 
379  printf("\nDense matrix: ");
380  printf("Stype %d , Dtype %d , Mtype %d\n", A->Stype, A->Dtype, A->Mtype);
381  Astore = ( DNformat * ) A->Store;
382  dp = ( double * ) Astore->nzval;
383  printf("nrow " IFMT ", ncol " IFMT ", lda " IFMT "\n",
384  A->nrow, A->ncol, Astore->lda);
385  printf("\nnzval: ");
386  for ( i = 0; i < A->nrow; ++i ) {
387  printf("%f ", dp [ i ]);
388  }
389  printf("\nend Dense matrix.\n");
390 
391  return 0;
392 }
393 
394 int_t
395 dCheckZeroDiagonal(int_t n, int_t *rowind, int_t *colbeg,
396  int_t *colend, int_t *perm)
397 {
398  register int_t i, j, nzd, nd = 0;
399 
400  for ( j = 0; j < n; ++j ) {
401  nzd = 0;
402  for ( i = colbeg [ j ]; i < colend [ j ]; ++i ) {
403  if ( perm [ rowind [ i ] ] == j ) {
404  nzd = 1;
405  ++nd;
406  break;
407  }
408  }
409  if ( nzd == 0 ) {
410  printf("Zero diagonal at column " IFMT "\n", j);
411  }
412  }
413 
414  printf(".. dCheckZeroDiagonal() -- # diagonals " IFMT "\n", nd);
415 
416  return 0;
417 }
418 } // end namespace oofem
IntArray & giveRowIndex()
Definition: compcol.h:132
IntArray & giveColPtr()
Definition: compcol.h:133
#define NM_Success
Numerical method exited with success.
Definition: nmstatus.h:47
Class and object Domain.
Definition: domain.h:115
Base class for all matrices stored in sparse format.
Definition: sparsemtrx.h:60
double & at(int i)
Coefficient access function.
Definition: floatarray.h:131
This base class is an abstraction for all numerical methods solving sparse linear system of equations...
const int * givePointer() const
Breaks encapsulation.
Definition: intarray.h:336
int_t dPrint_Dense_Matrix(SuperMatrix *A)
unsigned long NM_Status
Mask defining NumMetod Status; which can be asked after finishing computation by Numerical Method...
Definition: nmstatus.h:44
FloatArray & giveValues()
Definition: compcol.h:131
virtual NM_Status solve(SparseMtrx &A, FloatArray &b, FloatArray &x)
Solves the given linear system iteratively by method described by SuperLU SolverType.
Definition: superlusolver.C:70
int giveNumberOfRows() const
Returns number of rows of receiver.
Definition: sparsemtrx.h:114
#define OOFEM_LOG_INFO(...)
Definition: logger.h:127
virtual IRResultType initializeFrom(InputRecord *ir)
Definition: superlusolver.C:64
#define OOFEM_ERROR(...)
Definition: error.h:61
Initializes the variable VERBOSE, in order to get a few intermediate messages on screen: beginning an...
Class representing vector of real numbers.
Definition: floatarray.h:82
Class implementig interface to SuperLU_MT solver.
Definition: superlusolver.h:57
IRResultType
Type defining the return values of InputRecord reading operations.
Definition: irresulttype.h:47
Class representing the general Input Record.
Definition: inputrecord.h:101
void zero()
Zeroes all coefficients of receiver.
Definition: floatarray.C:658
REGISTER_SparseLinSolver(IMLSolver, ST_IML)
Definition: imlsolver.C:56
Class implementing single timer, providing wall clock and user time capabilities. ...
Definition: timer.h:46
const double * givePointer() const
Gives the pointer to the raw data, breaking encapsulation.
Definition: floatarray.h:469
virtual ~SuperLUSolver()
Destructor.
Definition: superlusolver.C:60
int giveNumberOfColumns() const
Returns number of columns of receiver.
Definition: sparsemtrx.h:116
int_t dCheckZeroDiagonal(int_t n, int_t *rowind, int_t *colbeg, int_t *colend, int_t *perm)
Abstract base class representing the "problem" under consideration.
Definition: engngm.h:181
double getWtime()
Returns total elapsed wall clock time in seconds.
Definition: timer.C:111
int giveSize() const
Returns the size of receiver.
Definition: floatarray.h:218
the oofem namespace is to define a context or scope in which all oofem names are defined.
void convertRhs(SuperMatrix *A, FloatArray &x)
const int giveNumberOfNonzeros()
Definition: compcol.h:136
void startTimer()
Definition: timer.C:69
double getUtime()
Returns total user time elapsed in seconds.
Definition: timer.C:105
void stopTimer()
Definition: timer.C:77
int_t dPrint_CompCol_Matrix(SuperMatrix *A)
void resize(int s)
Resizes receiver towards requested size.
Definition: floatarray.C:631

This page is part of the OOFEM documentation. Copyright (c) 2011 Borek Patzak
Project e-mail: info@oofem.org
Generated at Tue Jan 2 2018 20:07:31 for OOFEM by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2011