Actual source code: ex5s.c

  1: /*$Id: ex5s.c,v 1.29 2001/08/07 21:31:17 bsmith Exp $*/

  3: static char help[] = "2d Bratu problem in shared memory parallel with SNES.\n\
  4: We solve the  Bratu (SFI - solid fuel ignition) problem in a 2D rectangular\n\
  5: domain, uses SHARED MEMORY to evaluate the user function.\n\
  6: The command line options include:\n\
  7:   -par <parameter>, where <parameter> indicates the problem's nonlinearity\n\
  8:      problem SFI:  <parameter> = Bratu parameter (0 <= par <= 6.81)\n\
  9:   -mx <xg>, where <xg> = number of grid points in the x-direction\n\
 10:   -my <yg>, where <yg> = number of grid points in the y-direction\n\
 11:   -use_fortran_function: use Fortran coded function, rather than C\n";

 13: /*
 14:              This code compiles ONLY on SGI systems
 15:             ========================================
 16: */
 17: /*T
 18:    Concepts: SNES^parallel Bratu example
 19:    Concepts: shared memory
 20:    Processors: n
 21: T*/

 23: /*

 25:      Programming model: Combination of 
 26:         1) MPI message passing for PETSc routines
 27:         2) automatic loop parallism (using shared memory) for user
 28:            provided function.

 30:        While the user function is being evaluated all MPI processes except process
 31:      0 blocks. Process zero spawns nt threads to evaluate the user function. Once 
 32:      the user function is complete, the worker threads are suspended and all the MPI processes
 33:      continue.

 35:      Other useful options:

 37:        -snes_mf : use matrix free operator and no preconditioner
 38:        -snes_mf_operator : use matrix free operator but compute Jacobian via
 39:                            finite differences to form preconditioner

 41:        Environmental variable:

 43:          setenv MPC_NUM_THREADS nt <- set number of threads processor 0 should 
 44:                                       use to evaluate user provided function

 46:        Note: The number of MPI processes (set with the mpirun option -np) can 
 47:        be set completely independently from the number of threads process 0 
 48:        uses to evaluate the function (though usually one would make them the same).
 49: */
 50: 
 51: /* ------------------------------------------------------------------------

 53:     Solid Fuel Ignition (SFI) problem.  This problem is modeled by
 54:     the partial differential equation
 55:   
 56:             -Laplacian u - lambda*exp(u) = 0,  0 < x,y < 1,
 57:   
 58:     with boundary conditions
 59:    
 60:              u = 0  for  x = 0, x = 1, y = 0, y = 1.
 61:   
 62:     A finite difference approximation with the usual 5-point stencil
 63:     is used to discretize the boundary value problem to obtain a nonlinear 
 64:     system of equations.

 66:     The uniprocessor version of this code is snes/examples/tutorials/ex4.c
 67:     A parallel distributed memory version is snes/examples/tutorials/ex5.c and ex5f.F

 69:   ------------------------------------------------------------------------- */

 71: /* 
 72:    Include "petscsnes.h" so that we can use SNES solvers.  Note that this
 73:    file automatically includes:
 74:      petsc.h       - base PETSc routines   petscvec.h - vectors
 75:      petscsys.h    - system routines       petscmat.h - matrices
 76:      petscis.h     - index sets            petscksp.h - Krylov subspace methods
 77:      petscviewer.h - viewers               petscpc.h  - preconditioners
 78:      petscksp.h   - linear solvers
 79: */
 80:  #include petscsnes.h

 82: /* 
 83:    User-defined application context - contains data needed by the 
 84:    application-provided call-back routines   FormFunction().
 85: */
 86: typedef struct {
 87:    PetscReal   param;          /* test problem parameter */
 88:    int         mx,my;          /* discretization in x, y directions */
 89:    int         rank;           /* processor rank */
 90: } AppCtx;

 92: /* 
 93:    User-defined routines
 94: */
 95: extern int FormFunction(SNES,Vec,Vec,void*),FormInitialGuess(AppCtx*,Vec);
 96: extern int FormFunctionFortran(SNES,Vec,Vec,void*);

100: /* 
101:     The main program is written in C while the user provided function
102:  is given in both Fortran and C. The main program could also be written 
103:  in Fortran; the ONE PROBLEM is that VecGetArray() cannot be called from 
104:  Fortran on the SGI machines; thus the routine FormFunctionFortran() must
105:  be written in C.
106: */
107: int main(int argc,char **argv)
108: {
109:   SNES           snes;                /* nonlinear solver */
110:   Vec            x,r;                 /* solution, residual vectors */
111:   AppCtx         user;                /* user-defined work context */
112:   int            its;                 /* iterations for convergence */
113:   int            N,ierr,rstart,rend,*colors,i,ii,ri,rj;
114:   int            (*fnc)(SNES,Vec,Vec,void*);
115:   PetscReal      bratu_lambda_max = 6.81,bratu_lambda_min = 0.;
116:   MatFDColoring  fdcoloring;
117:   ISColoring     iscoloring;
118:   Mat            J;
119:   PetscScalar    zero = 0.0;
120:   PetscTruth     flg;

122:   PetscInitialize(&argc,&argv,(char *)0,help);
123:   MPI_Comm_rank(PETSC_COMM_WORLD,&user.rank);

125:   /*
126:      Initialize problem parameters
127:   */
128:   user.mx = 4; user.my = 4; user.param = 6.0;
129:   PetscOptionsGetInt(PETSC_NULL,"-mx",&user.mx,PETSC_NULL);
130:   PetscOptionsGetInt(PETSC_NULL,"-my",&user.my,PETSC_NULL);
131:   PetscOptionsGetReal(PETSC_NULL,"-par",&user.param,PETSC_NULL);
132:   if (user.param >= bratu_lambda_max || user.param <= bratu_lambda_min) {
133:     SETERRQ(1,"Lambda is out of range");
134:   }
135:   N = user.mx*user.my;

137:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
138:      Create nonlinear solver context
139:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

141:   SNESCreate(PETSC_COMM_WORLD,&snes);

143:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
144:      Create vector data structures; set function evaluation routine
145:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

147:   /*
148:       The routine VecCreateShared() creates a parallel vector with each processor
149:     assigned its own segment, BUT, in addition, the first processor has access to the 
150:     entire array. This is to allow the users function to be based on loop level
151:     parallelism rather than MPI.
152:   */
153:   VecCreateShared(PETSC_COMM_WORLD,PETSC_DECIDE,N,&x);
154:   VecDuplicate(x,&r);

156:   PetscOptionsHasName(PETSC_NULL,"-use_fortran_function",&flg);
157:   if (flg) {
158:     fnc = FormFunctionFortran;
159:   } else {
160:     fnc = FormFunction;
161:   }

163:   /* 
164:      Set function evaluation routine and vector
165:   */
166:   SNESSetFunction(snes,r,fnc,&user);

168:   /*
169:        Currently when using VecCreateShared() and using loop level parallelism
170:     to automatically parallelise the user function it makes no sense for the 
171:     Jacobian to be computed via loop level parallelism, because all the threads
172:     would be simultaneously calling MatSetValues() causing a bottle-neck.

174:     Thus this example uses the PETSc Jacobian calculations via finite differencing
175:     to approximate the Jacobian
176:   */

178:   /*

180:   */
181:   VecGetOwnershipRange(r,&rstart,&rend);
182:   PetscMalloc((rend-rstart)*sizeof(int),&colors);
183:   for (i=rstart; i<rend; i++) {
184:     colors[i - rstart] = 3*((i/user.mx) % 3) + (i % 3);
185:   }
186:   ISColoringCreate(PETSC_COMM_WORLD,rend-rstart,colors,&iscoloring);
187:   PetscFree(colors);

189:   /*
190:      Create and set the nonzero pattern for the Jacobian: This is not done 
191:      particularly efficiently. One should process the boundary nodes seperately and 
192:      then use a simple loop for the interior nodes.
193:        Note that for this code we use the "natural" number of the nodes on the 
194:      grid (since that is what is good for the user provided function). In the 
195:      DA examples we must use the DA numbering where each processor is assigned a
196:      chunk of data.
197:   */
198:   MatCreateMPIAIJ(PETSC_COMM_WORLD,rend-rstart,rend-rstart,N,
199:                          N,5,0,0,0,&J);
200:   for (i=rstart; i<rend; i++) {
201:     rj = i % user.mx;         /* column in grid */
202:     ri = i / user.mx;         /* row in grid */
203:     if (ri != 0) {     /* first row does not have neighbor below */
204:       ii   = i - user.mx;
205:       MatSetValues(J,1,&i,1,&ii,&zero,INSERT_VALUES);
206:     }
207:     if (ri != user.my - 1) { /* last row does not have neighbors above */
208:       ii   = i + user.mx;
209:       MatSetValues(J,1,&i,1,&ii,&zero,INSERT_VALUES);
210:     }
211:     if (rj != 0) {     /* first column does not have neighbor to left */
212:       ii   = i - 1;
213:       MatSetValues(J,1,&i,1,&ii,&zero,INSERT_VALUES);
214:     }
215:     if (rj != user.mx - 1) {     /* last column does not have neighbor to right */
216:       ii   = i + 1;
217:       MatSetValues(J,1,&i,1,&ii,&zero,INSERT_VALUES);
218:     }
219:     MatSetValues(J,1,&i,1,&i,&zero,INSERT_VALUES);
220:   }
221:   MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
222:   MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);

224:   /*
225:        Create the data structure that SNESDefaultComputeJacobianColor() uses
226:        to compute the actual Jacobians via finite differences.
227:   */
228:   MatFDColoringCreate(J,iscoloring,&fdcoloring);
229:   MatFDColoringSetFunction(fdcoloring,(int (*)(void))fnc,&user);
230:   MatFDColoringSetFromOptions(fdcoloring);
231:   /*
232:         Tell SNES to use the routine SNESDefaultComputeJacobianColor()
233:       to compute Jacobians.
234:   */
235:   SNESSetJacobian(snes,J,J,SNESDefaultComputeJacobianColor,fdcoloring);
236:   ISColoringDestroy(iscoloring);


239:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
240:      Customize nonlinear solver; set runtime options
241:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

243:   /*
244:      Set runtime options (e.g., -snes_monitor -snes_rtol <rtol> -ksp_type <type>)
245:   */
246:   SNESSetFromOptions(snes);

248:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
249:      Evaluate initial guess; then solve nonlinear system
250:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
251:   /*
252:      Note: The user should initialize the vector, x, with the initial guess
253:      for the nonlinear solver prior to calling SNESSolve().  In particular,
254:      to employ an initial guess of zero, the user should explicitly set
255:      this vector to zero by calling VecSet().
256:   */
257:   FormInitialGuess(&user,x);
258:   SNESSolve(snes,x);
259:   SNESGetIterationNumber(snes,&its);
260:   PetscPrintf(PETSC_COMM_WORLD,"Number of Newton iterations = %d\n",its);

262:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
263:      Free work space.  All PETSc objects should be destroyed when they
264:      are no longer needed.
265:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
266:   VecDestroy(x);
267:   VecDestroy(r);
268:   SNESDestroy(snes);
269:   PetscFinalize();

271:   return 0;
272: }
273: /* ------------------------------------------------------------------- */

277: /* 
278:    FormInitialGuess - Forms initial approximation.

280:    Input Parameters:
281:    user - user-defined application context
282:    X - vector

284:    Output Parameter:
285:    X - vector
286:  */
287: int FormInitialGuess(AppCtx *user,Vec X)
288: {
289:   int          i,j,row,mx,my,ierr;
290:   PetscReal    one = 1.0,lambda,temp1,temp,hx,hy,hxdhy,hydhx,sc;
291:   PetscScalar  *x;

293:   /*
294:       Process 0 has to wait for all other processes to get here 
295:    before proceeding to write in the shared vector
296:   */
297:   PetscBarrier((PetscObject)X);
298:   if (user->rank) {
299:      /*
300:         All the non-busy processors have to wait here for process 0 to finish
301:         evaluating the function; otherwise they will start using the vector values
302:         before they have been computed
303:      */
304:      PetscBarrier((PetscObject)X);
305:      return 0;
306:   }

308:   mx = user->mx;            my = user->my;            lambda = user->param;
309:   hx = one/(PetscReal)(mx-1);  hy = one/(PetscReal)(my-1);
310:   sc = hx*hy*lambda;        hxdhy = hx/hy;            hydhx = hy/hx;
311:   temp1 = lambda/(lambda + one);

313:   /*
314:      Get a pointer to vector data.
315:        - For default PETSc vectors, VecGetArray() returns a pointer to
316:          the data array.  Otherwise, the routine is implementation dependent.
317:        - You MUST call VecRestoreArray() when you no longer need access to
318:          the array.
319:   */
320:   VecGetArray(X,&x);

322:   /*
323:      Compute initial guess over the locally owned part of the grid
324:   */
325: #pragma arl(4)
326: #pragma distinct (*x,*f)
327: #pragma no side effects (sqrt)
328:   for (j=0; j<my; j++) {
329:     temp = (PetscReal)(PetscMin(j,my-j-1))*hy;
330:     for (i=0; i<mx; i++) {
331:       row = i + j*mx;
332:       if (i == 0 || j == 0 || i == mx-1 || j == my-1) {
333:         x[row] = 0.0;
334:         continue;
335:       }
336:       x[row] = temp1*sqrt(PetscMin((PetscReal)(PetscMin(i,mx-i-1))*hx,temp));
337:     }
338:   }

340:   /*
341:      Restore vector
342:   */
343:   VecRestoreArray(X,&x);

345:   PetscBarrier((PetscObject)X);
346:   return 0;
347: }
348: /* ------------------------------------------------------------------- */
351: /* 
352:    FormFunction - Evaluates nonlinear function, F(x).

354:    Input Parameters:
355: .  snes - the SNES context
356: .  X - input vector
357: .  ptr - optional user-defined context, as set by SNESSetFunction()

359:    Output Parameter:
360: .  F - function vector
361:  */
362: int FormFunction(SNES snes,Vec X,Vec F,void *ptr)
363: {
364:   AppCtx       *user = (AppCtx*)ptr;
365:   int          ierr,i,j,row,mx,my;
366:   PetscReal    two = 2.0,one = 1.0,lambda,hx,hy,hxdhy,hydhx,sc;
367:   PetscScalar  u,uxx,uyy,*x,*f;

369:   /*
370:       Process 0 has to wait for all other processes to get here 
371:    before proceeding to write in the shared vector
372:   */
373:   PetscBarrier((PetscObject)X);

375:   if (user->rank) {
376:      /*
377:         All the non-busy processors have to wait here for process 0 to finish
378:         evaluating the function; otherwise they will start using the vector values
379:         before they have been computed
380:      */
381:      PetscBarrier((PetscObject)X);
382:      return 0;
383:   }

385:   mx = user->mx;            my = user->my;            lambda = user->param;
386:   hx = one/(PetscReal)(mx-1);  hy = one/(PetscReal)(my-1);
387:   sc = hx*hy*lambda;        hxdhy = hx/hy;            hydhx = hy/hx;

389:   /*
390:      Get pointers to vector data
391:   */
392:   VecGetArray(X,&x);
393:   VecGetArray(F,&f);

395:   /*
396:       The next line tells the SGI compiler that x and f contain no overlapping 
397:     regions and thus it can use addition optimizations.
398:   */
399: #pragma arl(4)
400: #pragma distinct (*x,*f)
401: #pragma no side effects (exp)

403:   /*
404:      Compute function over the entire  grid
405:   */
406:   for (j=0; j<my; j++) {
407:     for (i=0; i<mx; i++) {
408:       row = i + j*mx;
409:       if (i == 0 || j == 0 || i == mx-1 || j == my-1) {
410:         f[row] = x[row];
411:         continue;
412:       }
413:       u = x[row];
414:       uxx = (two*u - x[row-1] - x[row+1])*hydhx;
415:       uyy = (two*u - x[row-mx] - x[row+mx])*hxdhy;
416:       f[row] = uxx + uyy - sc*exp(u);
417:     }
418:   }

420:   /*
421:      Restore vectors
422:   */
423:   VecRestoreArray(X,&x);
424:   VecRestoreArray(F,&f);

426:   PetscLogFlops(11*(mx-2)*(my-2))
427:   PetscBarrier((PetscObject)X);
428:   return 0;
429: }

431: #if defined(PETSC_HAVE_FORTRAN_CAPS)
432: #define applicationfunctionfortran_ APPLICATIONFUNCTIONFORTRAN
433: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
434: #define applicationfunctionfortran_ applicationfunctionfortran
435: #endif

437: /* ------------------------------------------------------------------- */
440: /* 
441:    FormFunctionFortran - Evaluates nonlinear function, F(x) in Fortran.

443: */
444: int FormFunctionFortran(SNES snes,Vec X,Vec F,void *ptr)
445: {
446:   AppCtx  *user = (AppCtx*)ptr;
447:   int     ierr;
448:   PetscScalar  *x,*f;

450:   /*
451:       Process 0 has to wait for all other processes to get here 
452:    before proceeding to write in the shared vector
453:   */
454:   PetscBarrier((PetscObject)snes);
455:   if (!user->rank) {
456:     VecGetArray(X,&x);
457:     VecGetArray(F,&f);
458:     applicationfunctionfortran_(&user->param,&user->mx,&user->my,x,f,&ierr);
459:     VecRestoreArray(X,&x);
460:     VecRestoreArray(F,&f);
461:     PetscLogFlops(11*(user->mx-2)*(user->my-2))
462:   }
463:   /*
464:       All the non-busy processors have to wait here for process 0 to finish
465:       evaluating the function; otherwise they will start using the vector values
466:       before they have been computed
467:   */
468:   PetscBarrier((PetscObject)snes);
469:   return 0;
470: }