OOFEM 3.0
Loading...
Searching...
No Matches
superlusolver.C
Go to the documentation of this file.
1/*
2 *
3 * ##### ##### ###### ###### ### ###
4 * ## ## ## ## ## ## ## ### ##
5 * ## ## ## ## #### #### ## # ##
6 * ## ## ## ## ## ## ## ##
7 * ## ## ## ## ## ## ## ##
8 * ##### ##### ## ###### ## ##
9 *
10 *
11 * OOFEM : Object Oriented Finite Element Code
12 *
13 * Copyright (C) 1993 - 2025 Borek Patzak
14 *
15 *
16 *
17 * Czech Technical University, Faculty of Civil Engineering,
18 * Department of Structural Mechanics, 166 29 Prague, Czech Republic
19 *
20 * This library is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU Lesser General Public
22 * License as published by the Free Software Foundation; either
23 * version 2.1 of the License, or (at your option) any later version.
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28 * Lesser General Public License for more details.
29 *
30 * You should have received a copy of the GNU Lesser General Public
31 * License along with this library; if not, write to the Free Software
32 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 */
34
35#include "sparsemtrx.h"
36#include "floatarray.h"
37#include "compcol.h"
38#include "linsystsolvertype.h"
39#include "classfactory.h"
40
41#include "SUPERLU_MT/include/slu_mt_ddefs.h"
42#include "superlusolver.h"
43#include <stdlib.h>
44#include <math.h>
45#include "verbose.h"
46//#include "globals.h"
47
48#ifdef TIME_REPORT
49 #include "timer.h"
50#endif
51
52namespace oofem {
54
55
56SuperLUSolver :: SuperLUSolver(Domain *d, EngngModel *m) : SparseLinearSystemNM(d, m)
57{}
58
59
60SuperLUSolver :: ~SuperLUSolver() {
61 if (this->AAllocated) {
62 Destroy_SuperMatrix_Store(& this->A);
63 Destroy_SuperNode_SCP(& this->L);
64 Destroy_CompCol_NCP(& this->U);
65 }
66 if (this->permAllocated) {
67 SUPERLU_FREE(this->perm_r);
68 SUPERLU_FREE(this->perm_c);
69 }
70}
71
72
73void
74SuperLUSolver :: initializeFrom(InputRecord &ir)
75{
76 /*
77 * Get column permutation vector perm_c[], according to permc_spec:
78 * permc_spec = 0: natural ordering
79 * permc_spec = 1: minimum degree ordering on structure of A'*A
80 * permc_spec = 2: minimum degree ordering on structure of A'+A
81 * permc_spec = 3: approximate minimum degree for unsymmetric matrices
82 */
83
84 this->permc_spec = 0; // default
86}
87
89SuperLUSolver :: solve(SparseMtrx &Lhs, FloatArray &b, FloatArray &x)
90{
91 //1. Step: Transform SparseMtrx *A to SuperMatrix
92 //2. Step: Transfrom FloatArray *b to SuperVector
93 //3. Step: Transfrom FLoatArray *x to SuperVector
94 //3. Step: call superLU Solve with tranformed input parameters
95 //4. Step: Transfrom result back to FloatArray *x
96 //5. Step: return SUCCESS!
97#ifdef TIME_REPORT
98 Timer timer;
99 timer.startTimer();
100#endif
101
102
103 CompCol *CC = dynamic_cast< CompCol * >( & Lhs );
104
105 if ( CC ) {
106 SuperMatrix B, X;
107 int_t nprocs;
108 fact_t fact;
109 trans_t trans;
110 yes_no_t refact, usepr;
111 equed_t equed;
112 double *a;
113 int_t *asub, *xa;
114 void *work;
115 superlumt_options_t superlumt_options;
116 int_t info, lwork, nrhs, /*ldx,*/ panel_size, relax;
117 int_t m, n, nnz;
118 double *rhsb, *rhsx /*, *xact*/;
119 double *R, *C;
120 double *ferr, *berr;
121 double /*u,*/ drop_tol, rpg, rcond;
122 superlu_memusage_t superlu_memusage;
123 void parse_command_line();
124
125
126 /* Default parameters to control factorization. */
127 nprocs = omp_get_max_threads(); //omp_get_num_threads() does not work as we are not in parallel region!!;
128 printf("Use number of LU threads: %u\n", nprocs);
129 fact = DOFACT; //EQUILIBRATE;
130 trans = NOTRANS;
131 equed = NOEQUIL;
132 refact = NO;
133 panel_size = sp_ienv(1);
134 relax = sp_ienv(2);
135 //u = 1.0;
136 usepr = NO;
137 drop_tol = 0.0;
138 lwork = 0;
139 nrhs = 1;
140
141 m = CC->giveNumberOfRows();
142 n = CC->giveNumberOfColumns();
143 nnz = CC->giveNumberOfNonzeros();
144 a = CC->giveValues().givePointer();
145 asub = CC->giveRowIndex().givePointer();
146 xa = CC->giveColPtr().givePointer();
147
148 if (0) {
149 dCreate_CompCol_Matrix(& this->A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE);
150 if ( !( this->perm_r = intMalloc(m) ) ) {
151 SUPERLU_ABORT("Malloc fails for perm_r[].");
152 }
153 if ( !( this->perm_c = intMalloc(n) ) ) {
154 SUPERLU_ABORT("Malloc fails for perm_c[].");
155 }
156 dCreate_Dense_Matrix(& B, m, nrhs, b.givePointer(), m, SLU_DN, SLU_D, SLU_GE);
157
158 get_perm_c(this->permc_spec, &this->A, this->perm_c);
159 //dPrint_Dense_Matrix(&B);
160 pdgssv(nprocs, &this->A, this->perm_c, this->perm_r, &this->L, &this->U, &B, &info);
161 //dPrint_Dense_Matrix(&B);
162 x.resize( m );
163 this->convertRhs(& B, x);
164 Destroy_SuperMatrix_Store(& this->A);
165 Destroy_SuperMatrix_Store(& B);
166 Destroy_SuperNode_SCP(& this->L);
167 Destroy_CompCol_NCP(& this->U);
168 SUPERLU_FREE(this->perm_r);
169 SUPERLU_FREE(this->perm_c);
170 } else {
171
172 /* Command line options to modify default behavior. */
173 //parse_command_line(argc, argv, &nprocs, &lwork, &panel_size, &relax,
174 // &u, &fact, &trans, &refact, &equed);
175
176 if ( lwork > 0 ) {
177 work = SUPERLU_MALLOC(lwork);
178 OOFEM_LOG_DEBUG("Use work space of size LWORK = " IFMT " bytes\n", lwork);
179 if ( !work ) {
180 SUPERLU_ABORT("cannot allocate work[]");
181 }
182 }
183
184 //printf("Use work space of size LWORK = " IFMT " bytes\n", lwork);
185
186#if ( PRNTlevel == 1 )
187 cpp_defs();
188 printf( "int_t %d bytes\n", sizeof( int_t ) );
189#endif
190
191 if (CC->giveVersion() == this->lhsVersion) {
192 // reuse existing factorization
193 fact = FACTORED;
194 OOFEM_LOG_DEBUG ("SuperLU_MT:LHS already factored\n");
195 } else {
196 fact = DOFACT; //EQUILIBRATE;
197 // solve for new (updated) lhs
198 if (this->AAllocated) Destroy_SuperMatrix_Store(& this->A);
199 dCreate_CompCol_Matrix(& this->A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE);
200 this->AAllocated = true;
201 this->lhsVersion = CC->giveVersion();
202 OOFEM_LOG_DEBUG ("SuperLU_MT:Factoring.....\n");
203
204 if (!this->permAllocated) {
205 if ( !( this->perm_r = intMalloc(m) ) ) {
206 SUPERLU_ABORT("Malloc fails for perm_r[].");
207 }
208 if ( !( this->perm_c = intMalloc(n) ) ) {
209 SUPERLU_ABORT("Malloc fails for perm_c[].");
210 }
211 this->permAllocated = true;
212 }
213 }
214
215 if ( !( rhsb = doubleMalloc(m * nrhs) ) ) {
216 SUPERLU_ABORT("Malloc fails for rhsb[].");
217 }
218 if ( !( rhsx = doubleMalloc(m * nrhs) ) ) {
219 SUPERLU_ABORT("Malloc fails for rhsx[].");
220 }
221 dCreate_Dense_Matrix(& B, m, nrhs, b.givePointer(), m, SLU_DN, SLU_D, SLU_GE);
222 dCreate_Dense_Matrix(& X, m, nrhs, rhsx, m, SLU_DN, SLU_D, SLU_GE);
223 //dPrint_Dense_Matrix(&B);
224 //dPrint_Dense_Matrix(&X);
225
226 if ( !( R = ( double * ) SUPERLU_MALLOC( this->A.nrow * sizeof( double ) ) ) ) {
227 SUPERLU_ABORT("SUPERLU_MALLOC fails for R[].");
228 }
229 if ( !( C = ( double * ) SUPERLU_MALLOC( this->A.ncol * sizeof( double ) ) ) ) {
230 SUPERLU_ABORT("SUPERLU_MALLOC fails for C[].");
231 }
232 if ( !( ferr = ( double * ) SUPERLU_MALLOC( nrhs * sizeof( double ) ) ) ) {
233 SUPERLU_ABORT("SUPERLU_MALLOC fails for ferr[].");
234 }
235 if ( !( berr = ( double * ) SUPERLU_MALLOC( nrhs * sizeof( double ) ) ) ) {
236 SUPERLU_ABORT("SUPERLU_MALLOC fails for berr[].");
237 }
238
239 get_perm_c(this->permc_spec, & this->A, this->perm_c);
240
241 superlumt_options.SymmetricMode = YES;
242 superlumt_options.diag_pivot_thresh = 0.0;
243
244 superlumt_options.nprocs = nprocs;
245 superlumt_options.fact = fact;
246 superlumt_options.trans = trans;
247 superlumt_options.refact = refact;
248 superlumt_options.panel_size = panel_size;
249 superlumt_options.relax = relax;
250 superlumt_options.usepr = usepr;
251 superlumt_options.drop_tol = drop_tol;
252 superlumt_options.PrintStat = NO;
253 superlumt_options.perm_c = perm_c;
254 superlumt_options.perm_r = perm_r;
255 //superlumt_options.work = work;
256 superlumt_options.lwork = lwork;
257 if ( !( superlumt_options.etree = intMalloc(n) ) ) {
258 SUPERLU_ABORT("Malloc fails for etree[].");
259 }
260 if ( !( superlumt_options.colcnt_h = intMalloc(n) ) ) {
261 SUPERLU_ABORT("Malloc fails for colcnt_h[].");
262 }
263 if ( !( superlumt_options.part_super_h = intMalloc(n) ) ) {
264 SUPERLU_ABORT("Malloc fails for colcnt_h[].");
265 }
266
267 OOFEM_LOG_DEBUG ("sym_mode %d\tdiag_pivot_thresh %.4e\n",
268 superlumt_options.SymmetricMode,
269 superlumt_options.diag_pivot_thresh);
270
271 /*
272 * Solve the system and compute the condition number
273 * and error bounds using pdgssvx.
274 */
275 pdgssvx(nprocs, & superlumt_options, & this->A, this->perm_c, this->perm_r, & equed, R, C, & this->L, & this->U, & B, & X, & rpg, & rcond, ferr, berr, & superlu_memusage, & info);
276 x.resize( b.giveSize() );
277 this->convertRhs(& X, x);
278
279#if 0
280 printf("psgssvx(): info " IFMT "\n", info);
281
282 if ( info == 0 || info == n + 1 ) {
283 SCPformat *Lstore;
284 NCPformat *Ustore;
285
286 printf("Recip. pivot growth = %e\n", rpg);
287 printf("Recip. condition number = %e\n", rcond);
288 printf("%8s%16s%16s\n", "rhs", "FERR", "BERR");
289 for ( int i = 0; i < nrhs; ++i ) {
290 printf(IFMT "%16e%16e\n", i + 1, ferr [ i ], berr [ i ]);
291 }
292
293 Lstore = ( SCPformat * ) L.Store;
294 Ustore = ( NCPformat * ) U.Store;
295 printf("No of nonzeros in factor L = " IFMT "\n", Lstore->nnz);
296 printf("No of nonzeros in factor U = " IFMT "\n", Ustore->nnz);
297 printf("No of nonzeros in L+U = " IFMT "\n", Lstore->nnz + Ustore->nnz - n);
298 printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions " IFMT "\n",
299 superlu_memusage.for_lu / 1e6, superlu_memusage.total_needed / 1e6,
300 superlu_memusage.expansions);
301
302 fflush(stdout);
303 } else if ( info > 0 && lwork == -1 ) {
304 printf("** Estimated memory: " IFMT " bytes\n", info - n);
305 }
306#else
307 if ( info > 0 && lwork == -1 ) {
308 printf("** Estimated memory: " IFMT " bytes\n", info - n);
309 }
310#endif
311
312 SUPERLU_FREE(rhsb);
313 SUPERLU_FREE(rhsx);
314 //SUPERLU_FREE (xact);
315 SUPERLU_FREE(R);
316 SUPERLU_FREE(C);
317 SUPERLU_FREE(ferr);
318 SUPERLU_FREE(berr);
319 //Destroy_SuperMatrix_Store(& this->A);
320 Destroy_SuperMatrix_Store(& B);
321 Destroy_SuperMatrix_Store(& X);
322 // SUPERLU_FREE (superlumt_options.etree);
323 // SUPERLU_FREE (superlumt_options.colcnt_h);
325 if ( lwork == 0 ) {
326 //Destroy_SuperNode_SCP(& this->L);
327 //Destroy_CompCol_NCP(& this->U);
328 } else if ( lwork > 0 ) {
329 SUPERLU_FREE(work);
330 }
331 }
332#ifdef TIME_REPORT
333 timer.stopTimer();
334 OOFEM_LOG_INFO( "SuperLU_MT info: user time consumed by solution: %.2fs\n", timer.getUtime() );
335 OOFEM_LOG_INFO( "SuperLU_MT info: real time consumed by solution: %.2fs\n", timer.getWtime() );
336#endif
337
338 } else {
339 OOFEM_ERROR("Incompatible sparse storage encountered");
340 }
341
342 //dPrint_Dense_Matrix(&B);
343 /*
344 * Lstore = (SCPformat *) L.Store;
345 * Ustore = (NCPformat *) U.Store;
346 * printf("No of nonzeros in factor L = " IFMT "\n", Lstore->nnz);
347 * printf("No of nonzeros in factor U = " IFMT "\n", Ustore->nnz);
348 *
349 * fflush(stdout);
350 * dPrint_CompCol_Matrix(&L);
351 * dPrint_CompCol_Matrix(&U);
352 */
353 //solved = 1;
354 return CR_CONVERGED;
355}
356
357void
358SuperLUSolver :: convertRhs(SuperMatrix *X, FloatArray &x)
359{
360 x.zero();
361
362 if ( ( X->Stype == SLU_DN ) && ( X->Dtype == SLU_D ) ) {
363 // process only first column //int r;
364
365 DNformat *data = ( ( DNformat * ) ( X->Store ) );
366
367#pragma omp parallel
368 {
369#pragma omp parallel for
370 for ( int r = 0; r <= data->lda - 1; r++ ) {
371 //r = data->nzval[i];
372 x.at(r + 1) = ( ( double * ) data->nzval ) [ r ];
373 }
374 }
375 } else {
376 OOFEM_ERROR("convertRhs: unsupported matrix storage type or data type");
377 }
378}
379
380//dPrint - following code use to print SuperLU matrix
381int_t SuperLUSolver :: dPrint_CompCol_Matrix(SuperMatrix *A)
382{
383 NCformat *Astore;
384 double *dp;
385
386 printf("\nCompCol matrix: ");
387 printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype, A->Dtype, A->Mtype);
388 Astore = ( NCformat * ) A->Store;
389 dp = ( double * ) Astore->nzval;
390 printf("nrow " IFMT ", ncol " IFMT ", nnz " IFMT "\n", A->nrow, A->ncol, Astore->nnz);
391 printf("\nnzval: ");
392 for ( int_t i = 0; i < Astore->nnz; ++i ) {
393 printf("%f ", dp [ i ]);
394 }
395 printf("\nrowind: ");
396 for ( int_t i = 0; i < Astore->nnz; ++i ) {
397 printf(IFMT, Astore->rowind [ i ]);
398 }
399 printf("\ncolptr: ");
400 for ( int_t i = 0; i <= A->ncol; ++i ) {
401 printf(IFMT, Astore->colptr [ i ]);
402 }
403 printf("\nend CompCol matrix.\n");
404
405 return 0;
406}
407
408int_t SuperLUSolver :: dPrint_Dense_Matrix(SuperMatrix *A)
409{
410 DNformat *Astore;
411 double *dp;
412
413 printf("\nDense matrix: ");
414 printf("Stype %d , Dtype %d , Mtype %d\n", A->Stype, A->Dtype, A->Mtype);
415 Astore = ( DNformat * ) A->Store;
416 dp = ( double * ) Astore->nzval;
417 printf("nrow " IFMT ", ncol " IFMT ", lda " IFMT "\n",
418 A->nrow, A->ncol, Astore->lda);
419 printf("\nnzval: ");
420 for ( int_t i = 0; i < A->nrow; ++i ) {
421 printf("%f ", dp [ i ]);
422 }
423 printf("\nend Dense matrix.\n");
424
425 return 0;
426}
427
428int_t
429dCheckZeroDiagonal(int_t n, int_t *rowind, int_t *colbeg,
430 int_t *colend, int_t *perm)
431{
432 int_t nd = 0;
433
434 for ( int_t j = 0; j < n; ++j ) {
435 int_t nzd = 0;
436 for ( int_t i = colbeg [ j ]; i < colend [ j ]; ++i ) {
437 if ( perm [ rowind [ i ] ] == j ) {
438 nzd = 1;
439 ++nd;
440 break;
441 }
442 }
443 if ( nzd == 0 ) {
444 printf("Zero diagonal at column " IFMT "\n", j);
445 }
446 }
447
448 printf(".. dCheckZeroDiagonal() -- # diagonals " IFMT "\n", nd);
449
450 return 0;
451}
452} // end namespace oofem
#define REGISTER_SparseLinSolver(class, type)
FloatArray & giveValues()
Definition compcol.h:126
IntArray & giveColPtr()
Definition compcol.h:128
const int giveNumberOfNonzeros()
Definition compcol.h:131
IntArray & giveRowIndex()
Definition compcol.h:127
void resize(Index s)
Definition floatarray.C:94
double & at(Index i)
Definition floatarray.h:202
Index giveSize() const
Returns the size of receiver.
Definition floatarray.h:261
void zero()
Zeroes all coefficients of receiver.
Definition floatarray.C:683
const double * givePointer() const
Definition floatarray.h:506
const int * givePointer() const
Definition intarray.h:345
SparseMtrxVersionType giveVersion()
Return receiver version.
Definition sparsemtrx.h:94
int giveNumberOfRows() const
Returns number of rows of receiver.
Definition sparsemtrx.h:114
int giveNumberOfColumns() const
Returns number of columns of receiver.
Definition sparsemtrx.h:116
SparseMtrx::SparseMtrxVersionType lhsVersion
void convertRhs(SuperMatrix *A, FloatArray &x)
double getUtime()
Returns total user time elapsed in seconds.
Definition timer.C:105
void startTimer()
Definition timer.C:69
void stopTimer()
Definition timer.C:77
double getWtime()
Returns total elapsed wall clock time in seconds.
Definition timer.C:111
#define OOFEM_ERROR(...)
Definition error.h:79
#define IR_GIVE_OPTIONAL_FIELD(__ir, __value, __id)
Definition inputrecord.h:75
#define OOFEM_LOG_INFO(...)
Definition logger.h:143
#define OOFEM_LOG_DEBUG(...)
Definition logger.h:144
int_t dCheckZeroDiagonal(int_t n, int_t *rowind, int_t *colbeg, int_t *colend, int_t *perm)
double rcond(const FloatMatrixF< N, N > &mat, int p=1)
#define _IFT_SuperLUSolver_Permcspec

This page is part of the OOFEM-3.0 documentation. Copyright Copyright (C) 1994-2025 Borek Patzak Bořek Patzák
Project e-mail: oofem@fsv.cvut.cz
Generated at for OOFEM by doxygen 1.15.0 written by Dimitri van Heesch, © 1997-2011