1 | /*************************************** 2 | $Header: /cvsroot/petscgraphics/illumulti.c,v 1.25 2003/08/04 13:58:45 hazelsct Exp $ 3 | 4 | This file contains the functions 5 | +latex+{\tt IlluMultiSave()}, {\tt IlluMultiLoad()} and {\tt IlluMultiRead()} 6 | +html+ <tt>IlluMultiSave()</tt>, <tt>IlluMultiLoad()</tt> and 7 | +html+ <tt>IlluMultiRead()</tt> 8 | designed to handle distributed storage and retrieval of data on local drives 9 | of machines in a Beowulf cluster. This should allow rapid loading of 10 | timestep data for "playback" from what is essentially a giant RAID-0 array of 11 | distributed disks, at enormously higher speeds than via NFS from a hard drive 12 | or RAID array on the head node. The danger of course is that if one node's 13 | disk goes down, you don't have a valid data set any more, but that's the 14 | nature of RAID-0, right? 15 | +latex+\par 16 | +html+ <p> 17 | The filenames saved are: 18 | +latex+\begin{itemize} \item {\tt $<$basename$>$.cpu\#\#\#\#.meta}, where 19 | +latex+{\tt \#\#\#\#} 20 | +html+ <ul><li><tt><basename>.cpu####.meta</tt>, where <tt>####</tt> 21 | is replaced by the CPU number (more than four digits if there are more than 22 | 9999 CPUs :-), which has the metadata for the whole thing in XML format 23 | (written by 24 | +latex+{\tt GNOME libxml}), 25 | +html+ <tt>GNOME libxml</tt>), 26 | as described in the notes on the 27 | +latex+{\tt IlluMultiStoreXML()} function. 28 | +html+ <tt>IlluMultiStoreXML()</tt> function. 29 | +latex+\item {\tt $<$basename$>$.cpu\#\#\#\#.data} 30 | +html+ </li><li><tt><basename>.cpu####.data</tt> 31 | which is simply a stream of the raw data, optionally compressed by 32 | +latex+{\tt gzip}. \end{itemize} 33 | +html+ <tt>gzip</tt>.</li></ul> 34 | If one were saving timesteps, one might include a timestep number in the 35 | basename, and also timestep and simulation time in the metadata. The 36 | metadata can also hold simulation parameters, etc. 37 | +latex+\par 38 | +html+ <p> 39 | This supports 1-D, 2-D and 3-D distributed arrays. As an extra feature, you 40 | can load a multi-CPU distributed array scattered over lots of files into a 41 | single CPU, to facilitate certain modes of data visualization. 42 | ***************************************/ 43 | 44 | 45 | #include "illuminator.h" /* Also includes petscda.h */ 46 | #include <glib.h> /* for guint*, GUINT*_SWAP_LE_BE */ 47 | #include <petscblaslapack.h> /* for BLcopy_() */ 48 | #include <tree.h> /* To build the XML tree to store */ 49 | #include <parser.h> /* XML parsing */ 50 | #include <stdio.h> /* FILE, etc. */ 51 | #include <string.h> /* strncpy(), etc. */ 52 | #include <stdlib.h> /* malloc() and free() */ 53 | 54 | /* Build with -DDEBUG for debugging output */ 55 | #undef DPRINTF 56 | #ifdef DEBUG 57 | #define DPRINTF(fmt, args...) ierr = PetscPrintf (PETSC_COMM_WORLD, "%s: " fmt, __FUNCT__, args); CHKERRQ (ierr) 58 | #else 59 | #define DPRINTF(fmt, args...) 60 | #endif 61 | 62 | 63 | #undef __FUNCT__ 64 | #define __FUNCT__ "IlluMultiParseXML" 65 | 66 | /*++++++++++++++++++++++++++++++++++++++ 67 | This function reads in the XML metadata document and returns the various 68 | parameter values in the addresses pointed to by the arguments. It is called 69 | by IlluMultiLoad() and IlluMultiRead(). 70 | 71 | int IlluMultiParseXML It returns zero or an error code. 72 | 73 | char *basename Base file name. 74 | 75 | int rank CPU number to read data for. 76 | 77 | int *compressed Data compression: if zero then no compression (fastest), 1-9 78 | then gzip compression level, 10-15 then gzip --best. If 16-31 then save 79 | guint32s representing relative values between min and max for each field, 80 | compressed according to this value minus 16. Likewise for 32-47 and 81 | guint16s, and 48-63 for guint8s. Yes, these alternative formats lose 82 | information and can't be used for accurate checkpointing, but they should 83 | retain enough data for visualization (except perhaps for the guint8s, which 84 | are possibly acceptable for vectors but likely not contours). 85 | 86 | int *wrongendian Tells whether the data are stored in the opposite endian 87 | format from this platform, and thus must be switched when the data are 88 | streamed in. 89 | 90 | int *dim Dimensionality of the space. 91 | 92 | int *px Number of processors in the 93 | +latex+$x$-direction. 94 | +html+ <i>x</i>-direction. 95 | 96 | int *py Number of processors in the 97 | +latex+$y$-direction. 98 | +html+ <i>y</i>-direction. 99 | 100 | int *pz Number of processors in the 101 | +latex+$z$-direction. 102 | +html+ <i>z</i>-direction. 103 | 104 | int *nx Number of grid points over the entire array in the 105 | +latex+$x$-direction. 106 | +html+ <i>x</i>-direction. 107 | 108 | int *ny Number of grid points over the entire array in the 109 | +latex+$y$-direction. 110 | +html+ <i>y</i>-direction. 111 | 112 | int *nz Number of grid points over the entire array in the 113 | +latex+$z$-direction. 114 | +html+ <i>z</i>-direction. 115 | 116 | PetscScalar *wx Physical overall width in the 117 | +latex+$x$-direction, {\tt PETSC\_NULL} if not needed. 118 | +html+ <i>x</i>-direction, <tt>PETSC_NULL</tt> if not needed. 119 | 120 | PetscScalar *wy Physical overall width in the 121 | +latex+$y$-direction, {\tt PETSC\_NULL} if not needed. 122 | +html+ <i>y</i>-direction, <tt>PETSC_NULL</tt> if not needed. 123 | 124 | PetscScalar *wz Physical overall width in the 125 | +latex+$z$-direction, {\tt PETSC\_NULL} if not needed. 126 | +html+ <i>z</i>-direction, <tt>PETSC_NULL</tt> if not needed. 127 | 128 | int *xm Number of grid points over the local part of the array in the 129 | +latex+$x$-direction. 130 | +html+ <i>x</i>-direction. 131 | 132 | int *ym Number of grid points over the local part of the array in the 133 | +latex+$y$-direction. 134 | +html+ <i>y</i>-direction. 135 | 136 | int *zm Number of grid points over the local part of the array in the 137 | +latex+$z$-direction. 138 | +html+ <i>z</i>-direction. 139 | 140 | int *dof Degrees of freedom at each node, a.k.a. number of field variables. 141 | 142 | int *sw Stencil width. 143 | 144 | DAStencilType *st Stencil type, given by the 145 | +latex+{\tt PETSc enum} values. 146 | +html+ <tt>PETSc enum</tt> values. 147 | 148 | DAPeriodicType *wrap Periodic type, given by the 149 | +latex+{\tt PETSc enum} values. 150 | +html+ <tt>PETSc enum</tt> values. 151 | 152 | char ***fieldnames Names of the field variables. 153 | 154 | field_plot_type **fieldtypes Data (plot) types for field variables, 155 | +latex+{\tt PETSC\_NULL} if not needed. 156 | +html+ <tt>PETSC_NULL</tt> if not needed. 157 | 158 | PetscScalar **fieldmin Minimum value of each field variable. 159 | 160 | PetscScalar **fieldmax Maximum value of each field variable. 161 | 162 | int *usermetacount Number of user metadata parameters. 163 | 164 | char ***usermetanames User metadata parameter names. 165 | 166 | char ***usermetadata User metadata parameter strings. 167 | ++++++++++++++++++++++++++++++++++++++*/ 168 | 169 | static int IlluMultiParseXML 170 | (char *basename, int rank, int *compressed, int *wrongendian, 171 | int *dim, int *px,int *py,int *pz, int *nx,int *ny,int *nz, 172 | PetscScalar *wx,PetscScalar *wy,PetscScalar *wz, int *xm,int *ym,int *zm, 173 | int *dof, int *sw, DAStencilType *st, DAPeriodicType *wrap, 174 | char ***fieldnames, field_plot_type **fieldtypes, PetscScalar **fieldmin, 175 | PetscScalar **fieldmax, 176 | int *usermetacount, char ***usermetanames, char ***usermetadata) 177 | { 178 | xmlDocPtr thedoc; 179 | xmlNodePtr thenode; 180 | char filename [1000]; 181 | xmlChar *buffer, *version; 182 | int field=0, ierr; 183 | 184 | if (snprintf (filename, 999, "%s.cpu%.4d.meta", basename, rank) > 999) 185 | return 1; 186 | 187 | DPRINTF ("Parsing XML file %s\n", filename); 188 | thedoc = xmlParseFile (filename); 189 | if (thedoc == NULL) 190 | IllErrorHandler (PETSC_ERR_FILE_OPEN, "xmlParseFile returned NULL\n"); 191 | 192 | version = xmlGetProp (thedoc -> root, "version"); 193 | if (strncmp (version, "0.1", 3) && strncmp (version, "0.2", 3)) 194 | { 195 | free (version); 196 | IllErrorHandler (PETSC_ERR_FILE_UNEXPECTED, 197 | "Version is neither 0.1 nor 0.2, can't parse\n"); 198 | } 199 | 200 | *wrongendian = 0; 201 | buffer = xmlGetProp (thedoc -> root, "endian"); 202 | #ifdef WORDS_BIGENDIAN 203 | if (strncasecmp (buffer, "big", 3)) 204 | *wrongendian = 1; 205 | #else 206 | if (!strncasecmp (buffer, "big", 3)) 207 | *wrongendian = 1; 208 | #endif 209 | free (buffer); 210 | 211 | buffer = xmlGetProp (thedoc -> root, "compression"); 212 | /* Have to handle all of the various compressed string values */ 213 | /* TODO: make this deal with "float" and "complex" */ 214 | if (buffer[0] == 'l' || buffer[0] == 'L') 215 | { 216 | if (buffer[4] == 'n' || buffer[4] == 'N') 217 | *compressed = COMPRESS_INT_LONG | COMPRESS_GZIP_NONE; 218 | else if (buffer[4] >= '1' && buffer[4] <= '9') 219 | { 220 | sscanf (buffer+4, "%d", compressed); 221 | *compressed |= COMPRESS_INT_LONG; 222 | } 223 | else if (buffer[4] == 'b' || buffer[4] == 'B') 224 | *compressed = COMPRESS_INT_LONG | COMPRESS_GZIP_BEST; 225 | else 226 | return 1; 227 | } 228 | else if (buffer[0] == 's' || buffer[0] == 'S') 229 | { 230 | if (buffer[5] == 'n' || buffer[5] == 'N') 231 | *compressed = COMPRESS_INT_SHORT | COMPRESS_GZIP_NONE; 232 | else if (buffer[5] >= '1' && buffer[5] <= '9') 233 | { 234 | sscanf (buffer+5, "%d", compressed); 235 | *compressed |= COMPRESS_INT_SHORT; 236 | } 237 | else if (buffer[5] == 'b' || buffer[5] == 'B') 238 | *compressed = COMPRESS_INT_SHORT | COMPRESS_GZIP_BEST; 239 | else 240 | return 1; 241 | } 242 | else if (buffer[0] == 'c' || buffer[0] == 'C') 243 | { 244 | if (buffer[4] == 'n' || buffer[4] == 'N') 245 | *compressed = COMPRESS_INT_CHAR | COMPRESS_GZIP_NONE; 246 | else if (buffer[4] >= '1' && buffer[4] <= '9') 247 | { 248 | sscanf (buffer+4, "%d", compressed); 249 | *compressed |= COMPRESS_INT_CHAR; 250 | } 251 | else if (buffer[4] == 'b' || buffer[4] == 'B') 252 | *compressed = COMPRESS_INT_CHAR | COMPRESS_GZIP_BEST; 253 | else 254 | return 1; 255 | } 256 | else 257 | { 258 | if (buffer[0] == 'n' || buffer[0] == 'N') 259 | *compressed = COMPRESS_INT_NONE | COMPRESS_GZIP_NONE; 260 | else if (buffer[0] >= '1' && buffer[0] <= '9') 261 | { 262 | sscanf (buffer, "%d", compressed); 263 | *compressed |= COMPRESS_INT_NONE; 264 | } 265 | else if (buffer[0] == 'b' || buffer[0] == 'B') 266 | *compressed = COMPRESS_INT_NONE | COMPRESS_GZIP_BEST; 267 | else 268 | return 1; 269 | } 270 | free (buffer); 271 | DPRINTF ("Root node: version %s, %s endian, compression %d|%d\n", 272 | version, *wrongendian ? "switched" : "native", 273 | *compressed & COMPRESS_INT_MASK, *compressed & COMPRESS_GZIP_MASK); 274 | 275 | *fieldnames = PETSC_NULL; 276 | if (fieldtypes != PETSC_NULL) 277 | *fieldtypes = PETSC_NULL; 278 | *fieldmin = *fieldmax = PETSC_NULL; 279 | *usermetacount = 0; 280 | *usermetanames = *usermetadata = PETSC_NULL; 281 | 282 | /* The main parsing loop; because xmlStringGetNodeList() doesn't work. */ 283 | for (thenode = thedoc->root->xmlChildrenNode; 284 | thenode; 285 | thenode = thenode->next) 286 | { 287 | DPRINTF ("Looping through nodes, this is %s, fieldnames 0x%lx\n", 288 | thenode->name, *fieldnames); 289 | if (!strncasecmp (thenode->name, "GlobalCPUs", 10)) 290 | { 291 | buffer = xmlGetProp (thenode, "dimensions"); 292 | sscanf (buffer, "%d", dim); 293 | free (buffer); 294 | 295 | buffer = xmlGetProp (thenode, "xwidth"); 296 | sscanf (buffer, "%d", px); 297 | free (buffer); 298 | 299 | if (*dim > 1) 300 | { 301 | buffer = xmlGetProp (thenode, "ywidth"); 302 | sscanf (buffer, "%d", py); 303 | free (buffer); 304 | 305 | if (*dim > 2) 306 | { 307 | buffer = xmlGetProp (thenode, "zwidth"); 308 | sscanf (buffer, "%d", pz); 309 | free (buffer); 310 | } 311 | else 312 | *pz = 1; 313 | } 314 | else 315 | *py = 1; 316 | } 317 | 318 | else if (!strncasecmp (thenode->name, "GlobalSize", 10)) 319 | { 320 | buffer = xmlGetProp (thenode, "xwidth"); 321 | sscanf (buffer, "%d", nx); 322 | free (buffer); 323 | /*+ For GlobalSize, since there's no *size attribute (for an 0.1 324 | version document), assume 1. +*/ 325 | buffer = xmlGetProp (thenode, "xsize"); 326 | if (wx != PETSC_NULL) 327 | { 328 | if (buffer) 329 | #ifdef PETSC_USE_SINGLE 330 | sscanf (buffer, "%f", wx); 331 | #else 332 | sscanf (buffer, "%lf", wx); 333 | #endif 334 | else 335 | *wx = 1.; 336 | } 337 | if (buffer) 338 | free (buffer); 339 | 340 | if (*dim > 1) 341 | { 342 | buffer = xmlGetProp (thenode, "ywidth"); 343 | sscanf (buffer, "%d", ny); 344 | free (buffer); 345 | buffer = xmlGetProp (thenode, "ysize"); 346 | if (wy != PETSC_NULL) 347 | { 348 | if (buffer) 349 | #ifdef PETSC_USE_SINGLE 350 | sscanf (buffer, "%f", wy); 351 | #else 352 | sscanf (buffer, "%lf", wy); 353 | #endif 354 | else 355 | *wy = 1.; 356 | } 357 | if (buffer) 358 | free (buffer); 359 | 360 | if (*dim > 2) 361 | { 362 | buffer = xmlGetProp (thenode, "zwidth"); 363 | sscanf (buffer, "%d", nz); 364 | free (buffer); 365 | buffer = xmlGetProp (thenode, "zsize"); 366 | if (wz != PETSC_NULL) 367 | { 368 | if (buffer) 369 | #ifdef PETSC_USE_SINGLE 370 | sscanf (buffer, "%f", wz); 371 | #else 372 | sscanf (buffer, "%lf", wz); 373 | #endif 374 | else 375 | *wz = 1.; 376 | } 377 | if (buffer) 378 | free (buffer); 379 | } 380 | else 381 | *nz = 1; 382 | } 383 | else 384 | *ny = *nz = 1; 385 | 386 | buffer = xmlGetProp (thenode, "fields"); 387 | sscanf (buffer, "%d", dof); 388 | free (buffer); 389 | 390 | if (*dof > field) 391 | { 392 | if (!(*fieldnames = (char **) realloc 393 | (*fieldnames, *dof * sizeof (char *)))) 394 | return 1; 395 | if (fieldtypes != PETSC_NULL) 396 | if (!(*fieldtypes = (field_plot_type *) realloc 397 | (*fieldtypes, *dof * sizeof(field_plot_type)))) 398 | return 1; 399 | if (!(*fieldmin = (PetscScalar *) realloc 400 | (*fieldmin, *dof * sizeof(PetscScalar)))) 401 | return 1; 402 | if (!(*fieldmax = (PetscScalar *) realloc 403 | (*fieldmax, *dof * sizeof(PetscScalar)))) 404 | return 1; 405 | } 406 | } 407 | 408 | else if (!strncasecmp (thenode->name, "LocalSize", 9)) 409 | { 410 | buffer = xmlGetProp (thenode, "xwidth"); 411 | sscanf (buffer, "%d", xm); 412 | free (buffer); 413 | 414 | if (*dim > 1) 415 | { 416 | buffer = xmlGetProp (thenode, "ywidth"); 417 | sscanf (buffer, "%d", ym); 418 | free (buffer); 419 | 420 | if (*dim > 2) 421 | { 422 | buffer = xmlGetProp (thenode, "zwidth"); 423 | sscanf (buffer, "%d", zm); 424 | free (buffer); 425 | } 426 | else 427 | *zm = 1; 428 | } 429 | else 430 | *ym = 1; 431 | } 432 | 433 | else if (!strncasecmp (thenode->name, "Stencil", 7)) 434 | { 435 | buffer = xmlGetProp (thenode, "width"); 436 | sscanf (buffer, "%d", sw); 437 | free (buffer); 438 | 439 | buffer = xmlGetProp (thenode, "type"); 440 | sscanf (buffer, "%d", st); 441 | free (buffer); 442 | 443 | buffer = xmlGetProp (thenode, "periodic"); 444 | sscanf (buffer, "%d", wrap); 445 | free (buffer); 446 | } 447 | 448 | else if (!strncasecmp (thenode->name, "Field", 5)) 449 | { 450 | if (!*fieldnames || !*fieldmax || !*fieldmin) 451 | { 452 | printf ("Warning: reading a Field, but the number of them is unknown!\n"); 453 | } 454 | 455 | if (field >= *dof) 456 | { 457 | printf ("Warning: more <Field> tags than declared degrees of freedom.\nThis may be because tags are out of order.\n"); 458 | *fieldnames = (char **) realloc 459 | (*fieldnames, (field+1) * sizeof (char *)); 460 | if (fieldtypes != PETSC_NULL) 461 | *fieldtypes = (field_plot_type *) realloc 462 | (*fieldtypes, (field+1) * sizeof (field_plot_type)); 463 | *fieldmin = (PetscScalar *) realloc 464 | (*fieldmin, (field+1) * sizeof (PetscScalar)); 465 | *fieldmax = (PetscScalar *) realloc 466 | (*fieldmax, (field+1) * sizeof (PetscScalar)); 467 | } 468 | 469 | (*fieldnames) [field] = xmlGetProp (thenode, "name"); 470 | /*+ If the type attribute is missing from the Field node (as it is in 471 | version 0.1 documents), assume 472 | +latex+{\tt FIELD\_SCALAR\_COLORS}. 473 | +html+ <tt>FIELD_SCALAR_COLORS</tt>. +*/ 474 | buffer = xmlGetProp (thenode, "type"); 475 | if (fieldtypes) 476 | { 477 | if (buffer) 478 | sscanf (buffer, "0x%x", *fieldtypes + field); 479 | else 480 | (*fieldtypes) [field] = FIELD_SCALAR_COLORS; 481 | } 482 | if (buffer) 483 | free(buffer); 484 | buffer = xmlGetProp (thenode, "min"); 485 | #ifdef PETSC_USE_SINGLE 486 | sscanf (buffer, "%f", *fieldmin + field); 487 | #else 488 | sscanf (buffer, "%lf", *fieldmin + field); 489 | #endif 490 | free (buffer); 491 | buffer = xmlGetProp (thenode, "max"); 492 | #ifdef PETSC_USE_SINGLE 493 | sscanf (buffer, "%f", *fieldmax + field); 494 | #else 495 | sscanf (buffer, "%lf", *fieldmax + field); 496 | #endif 497 | free (buffer); 498 | field++; 499 | } 500 | 501 | else if (!strncasecmp (thenode->name, "User", 4)) 502 | { 503 | (*usermetacount)++; 504 | (*usermetanames) = (char **) realloc 505 | (*usermetanames, (*usermetacount) * sizeof (char *)); 506 | (*usermetadata) = (char **) realloc 507 | (*usermetadata, (*usermetacount) * sizeof (char *)); 508 | (*usermetanames) [(*usermetacount)-1] = xmlGetProp (thenode, "name"); 509 | (*usermetadata) [(*usermetacount)-1] = xmlGetProp (thenode, "value"); 510 | } 511 | } 512 | 513 | /* This is another way to do things, which would be a whole lot easier, if it 514 | worked... (See Debian's libxml bug list.) 515 | thenode = xmlStringGetNodeList (thedoc, "GlobalCPUs"); 516 | printf ("thenode = 0x%lx\n", thenode); 517 | if (!thenode) 518 | return 1; 519 | thenode = xmlStringGetNodeList (thedoc, "GlobalSize"); 520 | if (!thenode) 521 | return 1; 522 | thenode = xmlStringGetNodeList (thedoc, "LocalSize"); 523 | if (!thenode) 524 | return 1; 525 | thenode = xmlStringGetNodeList (thedoc, "Stencil"); 526 | if (!thenode) 527 | return 1; 528 | 529 | i = 0; 530 | thenode = xmlStringGetNodeList (thedoc, "Field"); 531 | if (!thenode) 532 | return 1; 533 | while (thenode) 534 | { 535 | i++; 536 | thenode = thenode -> next; 537 | } 538 | 539 | thenode = xmlStringGetNodeList (thedoc, "User"); 540 | while (thenode) 541 | { 542 | thenode = thenode -> next; 543 | } */ 544 | 545 | free (version); 546 | xmlFreeDoc (thedoc); 547 | 548 | return 0; 549 | } 550 | 551 | 552 | #undef __FUNCT__ 553 | #define __FUNCT__ "IlluMultiParseData" 554 | 555 | /*++++++++++++++++++++++++++++++++++++++ 556 | This function reads in the data stored by IlluMultiStoreData(), complete with 557 | int/gzip compression. 558 | 559 | int IlluMultiParseData It returns zero or an error code. 560 | 561 | PetscScalar *globalarray Array into which to load the (local) data. 562 | 563 | char *basename Base file name. 564 | 565 | int rank CPU number to read data for. 566 | 567 | int compressed Data compression: if zero then no compression (fastest), 1-9 568 | then gzip compression level, 10-15 then gzip --best. If 16-31 then save 569 | guint32s representing relative values between min and max for each field, 570 | compressed according to this value minus 16. Likewise for 32-47 and 571 | guint16s, and 48-63 for guint8s. Yes, these alternative formats lose 572 | information and can't be used for accurate checkpointing, but they should 573 | retain enough data for visualization (except perhaps for the guint8s, which 574 | are possibly acceptable for vectors but likely not contours). 575 | 576 | int gridpoints Number of gridpoints to read data for. 577 | 578 | int dof Degrees of freedom at each node, a.k.a. number of field variables. 579 | 580 | int wrongendian Tells whether the data are stored in the opposite endian 581 | format from this platform, and thus must be switched when the data are 582 | streamed in. 583 | 584 | PetscScalar *fieldmin Minimum value of each field variable. 585 | 586 | PetscScalar *fieldmax Maximum value of each field variable. 587 | ++++++++++++++++++++++++++++++++++++++*/ 588 | 589 | static int IlluMultiParseData 590 | (PetscScalar *globalarray, char *basename, int rank, int compressed, 591 | int gridpoints, int dof, int wrongendian, PetscScalar *fieldmin, 592 | PetscScalar *fieldmax) 593 | { 594 | int i; 595 | char filename [1000]; 596 | FILE *infile; 597 | 598 | if (compressed & COMPRESS_GZIP_MASK) 599 | { 600 | if (snprintf (filename, 999, "gunzip -c < %s.cpu%.4d.data", 601 | basename, rank) > 999) 602 | return 1; 603 | if (!(infile = popen (filename, "r"))) 604 | return 1; 605 | } 606 | else 607 | { 608 | if (snprintf (filename, 999, "%s.cpu%.4d.data", basename, rank) > 999) 609 | return 1; 610 | if (!(infile = fopen (filename, "r"))) 611 | return 1; 612 | } 613 | 614 | /* Read and store the data */ 615 | /* TODO: make this deal with float and complex */ 616 | switch (compressed & COMPRESS_INT_MASK) 617 | { 618 | /* Yes, this way of doing it is a bit redundant, but it seems better than 619 | the multitude of subconditionals which would be required if I did it 620 | the other way. */ 621 | case COMPRESS_INT_LONG: 622 | { 623 | guint32 *storage; 624 | if (!(storage = (guint32 *) malloc 625 | (gridpoints * dof * sizeof (guint32)))) 626 | return 1; 627 | fread (storage, sizeof (guint32), gridpoints * dof, infile); 628 | 629 | if (wrongendian) 630 | { 631 | for (i=0; i<gridpoints*dof; i++) 632 | storage[i] = GUINT32_SWAP_LE_BE_CONSTANT (storage[i]); 633 | } 634 | for (i=0; i<gridpoints*dof; i++) 635 | globalarray[i] = fieldmin [i%dof] + 636 | ((fieldmax [i%dof] - fieldmin [i%dof]) * storage[i]) / 4294967295.; 637 | free (storage); 638 | break; 639 | } 640 | case COMPRESS_INT_SHORT: 641 | { 642 | guint16 *storage; 643 | if (!(storage = (guint16 *) malloc 644 | (gridpoints * dof * sizeof (guint16)))) 645 | return 1; 646 | fread (storage, sizeof (guint16), gridpoints * dof, infile); 647 | 648 | if (wrongendian) 649 | { 650 | for (i=0; i<gridpoints*dof; i++) 651 | storage[i] = GUINT16_SWAP_LE_BE_CONSTANT (storage[i]); 652 | } 653 | for (i=0; i<gridpoints*dof; i++) 654 | globalarray[i] = fieldmin [i%dof] + 655 | ((fieldmax [i%dof] - fieldmin [i%dof]) * storage[i]) / 65535.; 656 | free (storage); 657 | break; 658 | } 659 | case COMPRESS_INT_CHAR: 660 | { 661 | guint8 *storage; 662 | if (!(storage = (guint8 *) malloc 663 | (gridpoints * dof * sizeof (guint8)))) 664 | return 1; 665 | fread (storage, sizeof (guint8), gridpoints * dof, infile); 666 | 667 | for (i=0; i<gridpoints*dof; i++) 668 | globalarray[i] = fieldmin [i%dof] + 669 | ((fieldmax [i%dof] - fieldmin [i%dof]) * storage[i]) / 255.; 670 | free (storage); 671 | break; 672 | } 673 | default: 674 | { 675 | /* TODO: check for different size data, complex */ 676 | fread (globalarray, sizeof (PetscScalar), gridpoints * dof, infile); 677 | 678 | if (wrongendian) 679 | { 680 | if (sizeof (PetscScalar) == 8) 681 | { 682 | for (i=0; i<gridpoints*dof; i++) 683 | globalarray[i]= GUINT64_SWAP_LE_BE_CONSTANT (globalarray[i]); 684 | } 685 | else if (sizeof (PetscScalar) == 4) 686 | { 687 | for (i=0; i<gridpoints*dof; i++) 688 | globalarray[i]= GUINT32_SWAP_LE_BE_CONSTANT (globalarray[i]); 689 | } 690 | else return 1; 691 | } 692 | 693 | } 694 | } 695 | 696 | if (compressed & COMPRESS_GZIP_MASK) 697 | pclose (infile); 698 | else 699 | fclose (infile); 700 | 701 | return 0; 702 | } 703 | 704 | 705 | #undef __FUNCT__ 706 | #define __FUNCT__ "IlluMultiStoreXML" 707 | 708 | /*++++++++++++++++++++++++++++++++++++++ 709 | This function opens, stores and closes the XML metadata file for IlluMulti 710 | format storage. It is called by IlluMultiSave(). 711 | 712 | int IlluMultiStoreXML It returns zero or an error code. 713 | 714 | char *basename Base file name. 715 | 716 | int rank CPU number to store data for. 717 | 718 | int compressed Data compression: if zero then no compression (fastest), 1-9 719 | then gzip compression level, 10-15 then gzip --best. If 16-31 then save 720 | guint32s representing relative values between min and max for each field, 721 | compressed according to this value minus 16. Likewise for 32-47 and 722 | guint16s, and 48-63 for guint8s. Yes, these alternative formats lose 723 | information and can't be used for accurate checkpointing, but they should 724 | retain enough data for visualization (except perhaps for the guint8s, which 725 | are possibly acceptable for vectors but certainly not contours). 726 | 727 | int dim Dimensionality of the space. 728 | 729 | int px Number of processors in the 730 | +latex+$x$-direction. 731 | +html+ <i>x</i>-direction. 732 | 733 | int py Number of processors in the 734 | +latex+$y$-direction. 735 | +html+ <i>y</i>-direction. 736 | 737 | int pz Number of processors in the 738 | +latex+$z$-direction. 739 | +html+ <i>z</i>-direction. 740 | 741 | int nx Number of grid points over the entire array in the 742 | +latex+$x$-direction. 743 | +html+ <i>x</i>-direction. 744 | 745 | int ny Number of grid points over the entire array in the 746 | +latex+$y$-direction. 747 | +html+ <i>y</i>-direction. 748 | 749 | int nz Number of grid points over the entire array in the 750 | +latex+$z$-direction. 751 | +html+ <i>z</i>-direction. 752 | 753 | PetscScalar wx Physical overall width in the 754 | +latex+$x$-direction. 755 | +html+ <i>x</i>-direction. 756 | 757 | PetscScalar wy Physical overall width in the 758 | +latex+$y$-direction. 759 | +html+ <i>y</i>-direction. 760 | 761 | PetscScalar wz Physical overall width in the 762 | +latex+$z$-direction. 763 | +html+ <i>z</i>-direction. 764 | 765 | int xm Number of grid points over the local part of the array in the 766 | +latex+$x$-direction. 767 | +html+ <i>x</i>-direction. 768 | 769 | int ym Number of grid points over the local part of the array in the 770 | +latex+$y$-direction. 771 | +html+ <i>y</i>-direction. 772 | 773 | int zm Number of grid points over the local part of the array in the 774 | +latex+$z$-direction. 775 | +html+ <i>z</i>-direction. 776 | 777 | int dof Degrees of freedom at each node, a.k.a. number of field variables. 778 | 779 | int sw Stencil width. 780 | 781 | int st Stencil type, given by the 782 | +latex+{\tt PETSc enum} values. 783 | +html+ <tt>PETSc enum</tt> values. 784 | 785 | int wrap Periodic type, given by the 786 | +latex+{\tt PETSc enum} values. 787 | +html+ <tt>PETSc enum</tt> values. 788 | 789 | char **fieldnames Names of the field variables. 790 | 791 | field_plot_type *fieldtypes Data (plot) types for field variables. 792 | 793 | PetscReal *fieldmin Minimum value of each field variable. 794 | 795 | PetscReal *fieldmax Maximum value of each field variable. 796 | 797 | int usermetacount Number of user metadata parameters. 798 | 799 | char **usermetanames User metadata parameter names. 800 | 801 | char **usermetadata User metadata parameter strings. 802 | ++++++++++++++++++++++++++++++++++++++*/ 803 | 804 | static int IlluMultiStoreXML 805 | (char *basename, int rank, int compressed, 806 | int dim, int px,int py,int pz, int nx,int ny,int nz, 807 | PetscScalar wx,PetscScalar wy,PetscScalar wz, int xm,int ym,int zm, 808 | int dof, int sw, int st, int wrap, char **fieldnames, 809 | field_plot_type *fieldtypes, PetscReal *fieldmin, PetscReal *fieldmax, 810 | int usermetacount, char **usermetanames, char **usermetadata) 811 | { 812 | xmlDocPtr thedoc; 813 | xmlNodePtr thenode; 814 | FILE *outfile; 815 | char filename [1000], buffer [1000]; 816 | int i; 817 | 818 | /*+The XML tags in the .meta file consist of: 819 | +latex+\begin{center} \begin{tabular}{|l|l|} \hline 820 | +html+ <center><table BORDER> 821 | +latex+{\tt IlluMulti} & Primary tag, with attributes {\tt version}, \\ 822 | +latex+& {\tt endian} ({\tt big} or {\tt little}) and 823 | +latex+{\tt compression} \\ & (none, 1-9, best, float*, long*, short* or 824 | +latex+char*\footnote{longnone, long1-long9 or longbest compresses each 825 | +latex+double to a 32-bit unsigned int, with 0 representing the minimum for 826 | +latex+that field and 2$^{32}-1$ representing the maximum; likewise 827 | +latex+shortnone, short1-short9, shortbest uses 16-bit unsigned ints, and 828 | +latex+char* uses 8-bit unsigned ints. float is there to indicate that 829 | +latex+PetscScalar is 4 bytes at save time, loading should adjust 830 | +latex+accordingly; that's not fully supported just yet. At some point 831 | +latex+I'll have to figure out how to represent complex.}). \\ \hline 832 | +html+ <tr><td><tt>IlluMulti</tt></td><td>Primary tag, with attributes 833 | +html+ <tt>version</tt>,<br><tt>endian</tt> (<tt>big</tt> or 834 | +html+ <tt>little</tt>) and <tt>compression</tt><br>(none, 1-9, best; 835 | +html+ long*, short* or char*)</td></tr> 836 | +*/ 837 | 838 | thedoc = xmlNewDoc ("1.0"); 839 | thedoc->root = xmlNewDocNode (thedoc, NULL, "IlluMulti", NULL); 840 | xmlSetProp (thedoc->root, "version", "0.2"); 841 | #ifdef WORDS_BIGENDIAN 842 | xmlSetProp (thedoc->root, "endian", "big"); 843 | #else 844 | xmlSetProp (thedoc->root, "endian", "little"); 845 | #endif 846 | if ((compressed & COMPRESS_INT_MASK) == COMPRESS_INT_LONG) 847 | strcpy (buffer, "long"); 848 | else if ((compressed & COMPRESS_INT_MASK) == COMPRESS_INT_SHORT) 849 | strcpy (buffer, "short"); 850 | else if ((compressed & COMPRESS_INT_MASK) == COMPRESS_INT_CHAR) 851 | strcpy (buffer, "char"); 852 | /* Note: this doesn't really support complex types yet... */ 853 | else if (sizeof (PetscScalar) == 4) 854 | strcpy (buffer, "float"); 855 | else 856 | *buffer = '\0'; 857 | if ((compressed & COMPRESS_GZIP_MASK) == 0) 858 | strcat (buffer, "none"); 859 | else if ((compressed & COMPRESS_GZIP_MASK) >= 1 && 860 | (compressed & COMPRESS_GZIP_MASK) <= 9) 861 | sprintf (buffer + strlen (buffer), "%d", compressed & COMPRESS_GZIP_MASK); 862 | else 863 | strcat (buffer, "best"); 864 | xmlSetProp (thedoc->root, "compression", buffer); 865 | 866 | /*+ 867 | +latex+{\tt GlobalCPUs} & Number of CPUs in each direction, with \\ 868 | +latex+& attributes {\tt dimensions}, {\tt xwidth}, {\tt ywidth} and 869 | +latex+{\tt zwidth} \\ \hline 870 | +html+ <tr><td><tt>GlobalCPUs</tt></td><td>Number of CPUs in each 871 | +html+ direction, with<br>attributes <tt>dimensions</tt>, <tt>xwidth</tt>, 872 | +html+ <tt>ywidth</tt> and <tt>zwidth</tt></td></tr> 873 | +*/ 874 | 875 | thenode = xmlNewChild (thedoc->root, NULL, "GlobalCPUs", NULL); 876 | snprintf (buffer, 999, "%d", dim); 877 | xmlSetProp (thenode, "dimensions", buffer); 878 | snprintf (buffer, 999, "%d", px); 879 | xmlSetProp (thenode, "xwidth", buffer); 880 | if (dim > 1) 881 | { 882 | snprintf (buffer, 999, "%d", py); 883 | xmlSetProp (thenode, "ywidth", buffer); 884 | if (dim > 2) 885 | { 886 | snprintf (buffer, 999, "%d", pz); 887 | xmlSetProp (thenode, "zwidth", buffer); 888 | } 889 | } 890 | 891 | /*+ 892 | +latex+{\tt GlobalSize} & Size of the entire distributed array, with \\ 893 | +latex+& attributes {\tt xwidth}, {\tt ywidth}, 894 | +latex+{\tt zwidth}, {\tt xsize}**, {\tt ysize}**, {\tt zsize}** and 895 | +latex+{\tt fields} \\ \hline 896 | +html+ <tr><td><tt>GlobalSize</tt></td><td>Size of the entire distributed 897 | +html+ array, with<br>attributes <tt>xwidth</tt>, <tt>ywidth</tt>, 898 | +html+ <tt>zwidth</tt>, <tt>xsize</tt>**, <tt>ysize</tt>**, 899 | +html+ <tt>zsize</tt>** and <tt>fields</tt></td></tr> 900 | +*/ 901 | 902 | thenode = xmlNewChild (thedoc->root, NULL, "GlobalSize", NULL); 903 | snprintf (buffer, 999, "%d", nx); 904 | xmlSetProp (thenode, "xwidth", buffer); 905 | snprintf (buffer, 999, "%g", wx); 906 | xmlSetProp (thenode, "xsize", buffer); 907 | if (dim > 1) 908 | { 909 | snprintf (buffer, 999, "%d", ny); 910 | xmlSetProp (thenode, "ywidth", buffer); 911 | snprintf (buffer, 999, "%g", wy); 912 | xmlSetProp (thenode, "ysize", buffer); 913 | if (dim > 2) 914 | { 915 | snprintf (buffer, 999, "%d", nz); 916 | xmlSetProp (thenode, "zwidth", buffer); 917 | snprintf (buffer, 999, "%g", wz); 918 | xmlSetProp (thenode, "zsize", buffer); 919 | } 920 | } 921 | snprintf (buffer, 999, "%d", dof); 922 | xmlSetProp (thenode, "fields", buffer); 923 | 924 | /*+ 925 | +latex+{\tt LocalSize} & Size of the entire local part of the array, \\ 926 | +latex+& with attributes {\tt xwidth}, {\tt ywidth} and {\tt zwidth} \\ 927 | +latex+\hline 928 | +html+ <tr><td><tt>LocalSize</tt></td><td>Size of the local part of the 929 | +html+ array, with<br>attributes <tt>xwidth</tt>, <tt>ywidth</tt> and 930 | +html+ <tt>zwidth</tt></td></tr> 931 | +*/ 932 | 933 | thenode = xmlNewChild (thedoc->root, NULL, "LocalSize", NULL); 934 | snprintf (buffer, 999, "%d", xm); 935 | xmlSetProp (thenode, "xwidth", buffer); 936 | if (dim > 1) 937 | { 938 | snprintf (buffer, 999, "%d", ym); 939 | xmlSetProp (thenode, "ywidth", buffer); 940 | if (dim > 2) 941 | { 942 | snprintf (buffer, 999, "%d", zm); 943 | xmlSetProp (thenode, "zwidth", buffer); 944 | } 945 | } 946 | 947 | /*+ 948 | +latex+{\tt Stencil} & Stencil and periodic data, with attributes \\ 949 | +latex+& {\tt width}, {\tt type} and {\tt periodic} (using {\tt PETSc 950 | +latex+enum} values) \\ \hline 951 | +html+ <tr><td><tt>Stencil</tt></td><td>Stencil and periodic data, with 952 | +html+ attributes<br><tt>width</tt>, <tt>type</tt> and <tt>periodic</tt> 953 | +html+ (using <tt>PETSc enum</tt> values)</td></tr> 954 | +*/ 955 | 956 | thenode = xmlNewChild (thedoc->root, NULL, "Stencil", NULL); 957 | snprintf (buffer, 999, "%d", sw); 958 | xmlSetProp (thenode, "width", buffer); 959 | snprintf (buffer, 999, "%d", (int) st); 960 | xmlSetProp (thenode, "type", buffer); 961 | snprintf (buffer, 999, "%d", (int) wrap); 962 | xmlSetProp (thenode, "periodic", buffer); 963 | 964 | /*+ 965 | +latex+{\tt Field} & Data on each field, attributes {\tt name}, 966 | +latex+{\tt type}**, {\tt min} and {\tt max} \\ \hline 967 | +html+ <tr><td><tt>Field</tt></td><td>Data on each field, attributes 968 | +html+ <tt>name</tt>, <tt>type</tt>*, <tt>min</tt> and 969 | +html+ <tt>max</tt></td></tr> 970 | +*/ 971 | 972 | for (i=0; i<dof; i++) 973 | { 974 | thenode = xmlNewChild (thedoc->root, NULL, "Field", NULL); 975 | xmlSetProp (thenode, "name", fieldnames [i]); 976 | snprintf (buffer, 999, "0x%.2x", fieldtypes [i]); 977 | xmlSetProp (thenode, "type", buffer); 978 | snprintf (buffer, 999, "%g", fieldmin [i]); 979 | xmlSetProp (thenode, "min", buffer); 980 | snprintf (buffer, 999, "%g", fieldmax [i]); 981 | xmlSetProp (thenode, "max", buffer); 982 | } 983 | 984 | /*+ 985 | +latex+{\tt User} & User parameters, attributes {\tt name} and {\tt value} 986 | +latex+\\ \hline \end{tabular} \end{center} 987 | +html+ <tr><td><tt>User</tt></td><td>User parameters, attributes 988 | +html+ <tt>name</tt> and <tt>value</tt></td></tr></table></center> 989 | *Lossy compression to smaller data types. 990 | +latex+\par 991 | +html+ <br> 992 | **Represents new attribute for IlluMulti 0.2 file format. 993 | +*/ 994 | 995 | for (i=0; i<usermetacount; i++) 996 | { 997 | thenode = xmlNewChild (thedoc->root, NULL, "User", NULL); 998 | xmlSetProp (thenode, "name", usermetanames [i]); 999 | xmlSetProp (thenode, "value", usermetadata [i]); 1000 | } 1001 | 1002 | if (snprintf (filename, 999, "%s.cpu%.4d.meta", basename, rank) > 999) 1003 | return 1; 1004 | if (!(outfile = fopen (filename, "w"))) 1005 | return 1; 1006 | xmlDocDump (outfile, thedoc); 1007 | xmlFreeDoc (thedoc); 1008 | fclose(outfile); 1009 | 1010 | return 0; 1011 | } 1012 | 1013 | 1014 | #undef __FUNCT__ 1015 | #define __FUNCT__ "IlluMultiStoreData" 1016 | 1017 | /*++++++++++++++++++++++++++++++++++++++ 1018 | This function stores the data file. 1019 | 1020 | int IlluMultiStoreData It returns zero or an error code. 1021 | 1022 | PetscScalar *globalarray Array from which to save the (local) data. 1023 | 1024 | char *basename Base file name. 1025 | 1026 | int rank CPU number to read data for. 1027 | 1028 | int compressed Data compression: if zero then no compression (fastest), 1-9 1029 | then gzip compression level, 10-15 then gzip --best. If 16-31 then save 1030 | guint32s representing relative values between min and max for each field, 1031 | compressed according to this value minus 16. Likewise for 32-47 and 1032 | guint16s, and 48-63 for guint8s. Yes, these alternative formats lose 1033 | information and can't be used for accurate checkpointing, but they should 1034 | retain enough data for visualization (except perhaps for the guint8s, which 1035 | are possibly acceptable for vectors but likely not contours). 1036 | 1037 | int gridpoints Number of gridpoints to store data for. 1038 | 1039 | int dof Degrees of freedom at each node, a.k.a. number of field variables. 1040 | 1041 | PetscScalar *fieldmin Minimum value of each field variable. 1042 | 1043 | PetscScalar *fieldmax Maximum value of each field variable. 1044 | ++++++++++++++++++++++++++++++++++++++*/ 1045 | 1046 | static int IlluMultiStoreData 1047 | (PetscScalar *globalarray, char *basename, int rank, int compressed, 1048 | int gridpoints, int dof, PetscScalar *fieldmin, PetscScalar *fieldmax) 1049 | { 1050 | int i; 1051 | char filename [1000]; 1052 | FILE *outfile; 1053 | 1054 | if (compressed & COMPRESS_GZIP_MASK) 1055 | { 1056 | if ((compressed & COMPRESS_GZIP_MASK) >= 1 && 1057 | (compressed & COMPRESS_GZIP_MASK) <= 9) 1058 | { 1059 | if (snprintf (filename, 999, "gzip -c -%d > %s.cpu%.4d.data", 1060 | compressed & COMPRESS_GZIP_MASK, basename, rank) > 999) 1061 | return 1; 1062 | } 1063 | else 1064 | { 1065 | if (snprintf (filename,999, "gzip -c --best > %s.cpu%.4d.data", 1066 | basename,rank) > 999) 1067 | return 1; 1068 | } 1069 | if (!(outfile = popen (filename, "w"))) 1070 | return 1; 1071 | } 1072 | else 1073 | { 1074 | if (snprintf (filename, 999, "%s.cpu%.4d.data", basename, rank) > 999) 1075 | return 1; 1076 | if (!(outfile = fopen (filename, "w"))) 1077 | return 1; 1078 | } 1079 | 1080 | switch (compressed & COMPRESS_INT_MASK) 1081 | { 1082 | case COMPRESS_INT_LONG: 1083 | { 1084 | guint32 *storage; 1085 | if (!(storage = (guint32 *) malloc 1086 | (gridpoints * dof * sizeof (guint32)))) 1087 | return 1; 1088 | for (i=0; i<gridpoints*dof; i++) 1089 | storage [i] = (guint32) 1090 | (4294967295. * (globalarray [i] - fieldmin [i%dof]) / 1091 | (fieldmax [i%dof] - fieldmin [i%dof])); 1092 | fwrite (storage, sizeof (guint32), gridpoints * dof, outfile); 1093 | free (storage); 1094 | break; 1095 | } 1096 | case COMPRESS_INT_SHORT: 1097 | { 1098 | guint16 *storage; 1099 | if (!(storage = (guint16 *) malloc 1100 | (gridpoints * dof * sizeof (guint16)))) 1101 | return 1; 1102 | for (i=0; i<gridpoints*dof; i++) 1103 | storage [i] = (guint16) 1104 | (65535. * (globalarray [i] - fieldmin [i%dof]) / 1105 | (fieldmax [i%dof] - fieldmin [i%dof])); 1106 | fwrite (storage, sizeof (guint16), gridpoints * dof, outfile); 1107 | free (storage); 1108 | break; 1109 | } 1110 | case COMPRESS_INT_CHAR: 1111 | { 1112 | guint8 *storage; 1113 | if (!(storage = (guint8 *) malloc 1114 | (gridpoints * dof * sizeof (guint8)))) 1115 | return 1; 1116 | for (i=0; i<gridpoints*dof; i++) 1117 | storage [i] = (guint8) 1118 | (255. * (globalarray [i] - fieldmin [i%dof]) / 1119 | (fieldmax [i%dof] - fieldmin [i%dof])); 1120 | fwrite (storage, sizeof (guint8), gridpoints * dof, outfile); 1121 | free (storage); 1122 | break; 1123 | } 1124 | default: 1125 | fwrite (globalarray, sizeof (PetscScalar), gridpoints * dof, outfile); 1126 | } 1127 | 1128 | if (compressed & COMPRESS_GZIP_MASK) 1129 | pclose (outfile); 1130 | else 1131 | fclose (outfile); 1132 | } 1133 | 1134 | 1135 | #undef __FUNCT__ 1136 | #define __FUNCT__ "checkagree" 1137 | 1138 | /*++++++++++++++++++++++++++++++++++++++ 1139 | Ancillary routine for 1140 | +latex+{\tt IlluMultiRead()}: 1141 | +html+ <tt>IlluMultiRead()</tt>: 1142 | checks agreement of parameters and reports disagreement if necessary. 1143 | 1144 | int checkagree Returns 0 if they agree, 1 otherwise. 1145 | 1146 | int da Integer parameter from the existing DA. 1147 | 1148 | int file Integer parameter read from the file. 1149 | 1150 | char *parameter Parameter name for reporting. 1151 | ++++++++++++++++++++++++++++++++++++++*/ 1152 | 1153 | static inline int checkagree (int da, int file, char *parameter) 1154 | { 1155 | int ierr; 1156 | 1157 | if (da == file) 1158 | return 0; 1159 | 1160 | ierr = PetscPrintf (PETSC_COMM_WORLD, 1161 | "Disagreement in %s: da has %d, file %d\n", 1162 | parameter, da, file); CHKERRQ (ierr); 1163 | return 1; 1164 | } 1165 | 1166 | 1167 | #undef __FUNCT__ 1168 | #define __FUNCT__ "IlluMultiRead" 1169 | 1170 | /*++++++++++++++++++++++++++++++++++++++ 1171 | This reads the data into an existing distributed array and vector, checking 1172 | that the sizes are right etc. 1173 | 1174 | int IlluMultiRead It returns zero or an error code. 1175 | 1176 | DA theda Distributed array object controlling the data to read. 1177 | 1178 | Vec X Vector into which to read the data. 1179 | 1180 | char *basename Base file name. 1181 | 1182 | int *usermetacount Pointer to an int where we put the number of user metadata 1183 | parameters loaded. 1184 | 1185 | char ***usermetanames Pointer to a char ** where the loaded parameter names 1186 | are stored. This is 1187 | +latex+{\tt malloc}ed by this function, so a call to {\tt free()} 1188 | +html+ <tt>malloc</tt>ed by this function, so a call to <tt>free()</tt> 1189 | is needed to free up its data. 1190 | 1191 | char ***usermetadata Pointer to a char ** where the loaded parameter strings 1192 | are stored. This is 1193 | +latex+{\tt malloc}ed by this function, so a call to {\tt free()} 1194 | +html+ <tt>malloc</tt>ed by this function, so a call to <tt>free()</tt> 1195 | is needed to free up its data. 1196 | ++++++++++++++++++++++++++++++++++++++*/ 1197 | 1198 | int IlluMultiRead (DA theda, Vec X, char *basename, int *usermetacount, 1199 | char ***usermetanames, char ***usermetadata) 1200 | { 1201 | int dim, px,py,pz, nx,ny,nz, xm,ym,zm, dof, sw, size, rank, ierr; 1202 | DAPeriodicType wrap, fwrap; 1203 | DAStencilType st, fst; 1204 | int fdim, fpx,fpy,fpz, fnx,fny,fnz, fxm,fym,fzm, fdof, fsw, fsize = 1; 1205 | int compressed, wrongendian; 1206 | char filename[1000], **fieldnames; 1207 | FILE *infile; 1208 | PetscScalar *globalarray, *fieldmin, *fieldmax; 1209 | 1210 | /*+ First it gets the properties of the distributed array for comparison with 1211 | the metadata. +*/ 1212 | ierr = DAGetInfo (theda, &dim, &nx,&ny,&nz, &px,&py,&pz, &dof, &sw, &wrap, 1213 | &st); CHKERRQ (ierr); 1214 | ierr = DAGetCorners (theda, PETSC_NULL,PETSC_NULL,PETSC_NULL, &xm,&ym,&zm); 1215 | CHKERRQ (ierr); 1216 | 1217 | /*+ Next it parses the XML metadata file into the document tree, and reads 1218 | its content into the appropriate structures, comparing parameters with 1219 | those of the existing distributed array structure. +*/ 1220 | ierr = MPI_Comm_size (PETSC_COMM_WORLD, &size); CHKERRQ (ierr); 1221 | ierr = MPI_Comm_rank (PETSC_COMM_WORLD, &rank); CHKERRQ (ierr); 1222 | 1223 | DPRINTF ("Parsing XML metadata file from basename %s\n", basename); 1224 | if (ierr = IlluMultiParseXML 1225 | (basename, rank, &compressed, &wrongendian, &fdim, &fpx,&fpy,&fpz, 1226 | &fnx,&fny,&fnz, PETSC_NULL,PETSC_NULL,PETSC_NULL, &fxm,&fym,&fzm, &fdof, 1227 | &fsw, &fst, &fwrap, &fieldnames, PETSC_NULL, &fieldmin, &fieldmax, 1228 | usermetacount, usermetanames, usermetadata)) 1229 | return ierr; 1230 | 1231 | /* The size>1 checks are because we support loading multiple CPUs' data into 1232 | a 1-CPU distributed array, as long as the global dimensions are right. */ 1233 | DPRINTF ("Checking size agreement between DA and data to read\n",0); 1234 | fsize = fpx * ((dim > 1) ? fpy : 1) * ((dim > 2) ? fpz : 1); 1235 | if (ierr = checkagree (dim, fdim, "dimensions")) return ierr; 1236 | if (size > 1 || fsize == 1) { 1237 | if (ierr = checkagree (px, fpx, "cpu xwidth")) return ierr; 1238 | if (dim > 1) { 1239 | if (ierr = checkagree (py, fpy, "cpu ywidth")) return ierr; 1240 | if (dim > 2) { 1241 | if (ierr = checkagree (pz, fpz, "cpu zwidth")) return ierr; }}} 1242 | if (ierr = checkagree (nx, fnx, "global xwidth")) return ierr; 1243 | if (dim > 1) { 1244 | if (ierr = checkagree (ny, fny, "global ywidth")) return ierr; 1245 | if (dim > 2) { 1246 | if (ierr = checkagree (nz, fnz, "global zwidth")) return ierr; }} 1247 | if (size > 1 || fsize == 1) { 1248 | if (ierr = checkagree (xm, fxm, "local xwidth")) return ierr; 1249 | if (dim > 1) { 1250 | if (ierr = checkagree (ym, fym, "local ywidth")) return ierr; 1251 | if (dim > 2) { 1252 | if (ierr = checkagree (zm, fzm, "local zwidth")) return ierr; }}} 1253 | if (ierr = checkagree (dof, fdof, "degrees of freedom")) return ierr; 1254 | if (ierr = checkagree (sw, fsw, "stencil width")) return ierr; 1255 | if (ierr = checkagree ((int)st, (int)fst, "stencil type")) return ierr; 1256 | if (ierr = checkagree ((int)wrap, (int)fwrap, "periodic type")) return ierr; 1257 | 1258 | /*+ Then it streams in the data in one big slurp. +*/ 1259 | ierr = VecGetArray (X, &globalarray); CHKERRQ (ierr); 1260 | if (size > 1 || fsize == 1) /* Default condition */ 1261 | { 1262 | DPRINTF ("Reading data\n",0); 1263 | ierr = IlluMultiParseData 1264 | (globalarray, basename, rank, compressed, 1265 | xm * ((dim>1)?ym:1) * ((dim>2)?zm:1), dof, wrongendian, fieldmin, 1266 | fieldmax); 1267 | if (ierr) 1268 | { 1269 | xm = VecRestoreArray (X, &globalarray); CHKERRQ (xm); 1270 | return ierr; 1271 | } 1272 | } 1273 | else /* Getting distributed data into a single array */ 1274 | { 1275 | int i,j,k, xs,ys,zs; 1276 | PetscScalar *storage; 1277 | 1278 | /* Incrementally read in data to storage, store it in globalarray */ 1279 | /* First loop through the stored CPUs */ 1280 | for (pz=0, zs=0; pz<((dim>2)?fpz:1); pz++, zs+=fzm) 1281 | for (py=0, ys=0; py<((dim>1)?fpy:1); py++, ys+=fym) 1282 | for (px=0, xs=0; px<fpx; px++, xs+=fxm, rank++) 1283 | { 1284 | int gridpoints; 1285 | 1286 | DPRINTF ("Parsing XML metadata file for CPU %d\n", rank); 1287 | if (ierr = IlluMultiParseXML 1288 | (basename, rank, &compressed, &wrongendian, &fdim, 1289 | &fpx,&fpy,&fpz, &fnx,&fny,&fnz, 1290 | PETSC_NULL,PETSC_NULL,PETSC_NULL, 1291 | &fxm,&fym,&fzm, &fdof, &fsw, &fst, &fwrap, 1292 | &fieldnames, PETSC_NULL, &fieldmin, &fieldmax, 1293 | usermetacount, usermetanames, usermetadata)) 1294 | return ierr; 1295 | gridpoints = fxm * ((dim>1)?fym:1) * ((dim>2)?fzm:1); 1296 | 1297 | /* This allocate/read/copy/free storage business is annoying. 1298 | Replacing it with "zero-copy" would involve changing the 1299 | IlluMultiParseData() API to allow loading into part of the 1300 | local array. But I'll leave that for future expansion, when 1301 | this will be able to do arbitrary m->n CPU loading as long as 1302 | the global sizes match. */ 1303 | storage = (PetscScalar *) malloc 1304 | (gridpoints * dof * sizeof (PetscScalar)); 1305 | DPRINTF ("Reading data for CPU %d\n", rank); 1306 | ierr = IlluMultiParseData 1307 | (storage, basename, rank, compressed, gridpoints, dof, 1308 | wrongendian, fieldmin, fieldmax); 1309 | 1310 | fxm *= dof; /* so fxm is the number of PetscScalars to copy */ 1311 | i=1; 1312 | /* Now distribute */ 1313 | for (k=0; k<((dim>2)?fzm:1); k++) 1314 | for (j=0; j<((dim>1)?fym:1); j++) 1315 | BLcopy_ (&fxm, storage + (k*fym + j)*fxm /* *dof */, &i, 1316 | globalarray + (((zs+k)*ny + ys+j)*nx + xs)*dof, &i); 1317 | 1318 | free (storage); 1319 | fxm /= dof; 1320 | } 1321 | } 1322 | 1323 | ierr = VecRestoreArray (X, &globalarray); CHKERRQ (ierr); 1324 | 1325 | return 0; 1326 | } 1327 | 1328 | 1329 | #undef __FUNCT__ 1330 | #define __FUNCT__ "IlluMultiLoad" 1331 | 1332 | /*++++++++++++++++++++++++++++++++++++++ 1333 | This creates a new distributed array of the appropriate size and loads the 1334 | data into the vector contained in it (as retrieved by 1335 | +latex+{\tt DAGetVector()}). 1336 | +html+ <tt>DAGetVector()</tt>). 1337 | It also reads the user metadata parameters into arrays stored at the supplied 1338 | pointers. 1339 | 1340 | int IlluMultiLoad It returns zero or an error code. 1341 | 1342 | char *basename Base file name. 1343 | 1344 | DA *theda Pointer to a DA object (to be created by this function). 1345 | 1346 | PetscScalar *wx Physical overall width in the 1347 | +latex+$x$-direction. 1348 | +html+ <i>x</i>-direction. 1349 | 1350 | PetscScalar *wy Physical overall width in the 1351 | +latex+$y$-direction. 1352 | +html+ <i>y</i>-direction. 1353 | 1354 | PetscScalar *wz Physical overall width in the 1355 | +latex+$z$-direction. 1356 | +html+ <i>z</i>-direction. 1357 | 1358 | field_plot_type **fieldtypes Data (plot) types for field variables. 1359 | 1360 | int *usermetacount Pointer to an int where we put the number of user metadata 1361 | parameters loaded. 1362 | 1363 | char ***usermetanames Pointer to a char ** where the loaded parameter names 1364 | are stored. This is 1365 | +latex+{\tt malloc}ed by this function, so a call to {\tt free()} 1366 | +html+ <tt>malloc</tt>ed by this function, so a call to <tt>free()</tt> 1367 | is needed to free up its data. 1368 | 1369 | char ***usermetadata Pointer to a char ** where the loaded parameter strings 1370 | are stored. This is 1371 | +latex+{\tt malloc}ed by this function, so a call to {\tt free()} 1372 | +html+ <tt>malloc</tt>ed by this function, so a call to <tt>free()</tt> 1373 | is needed to free up its data. 1374 | ++++++++++++++++++++++++++++++++++++++*/ 1375 | 1376 | int IlluMultiLoad 1377 | (char *basename, DA *theda, PetscScalar *wx,PetscScalar *wy,PetscScalar *wz, 1378 | field_plot_type **fieldtypes, int *usermetacount, char ***usermetanames, 1379 | char ***usermetadata) 1380 | { 1381 | int dim, px,py,pz, nx,ny,nz, dof, sw, fxm,fym,fzm, rank,size, ierr; 1382 | int compressed, wrongendian, fpx,fpy,fpz, fsize; 1383 | DAPeriodicType wrap; 1384 | DAStencilType st; 1385 | char filename[1000], **fieldnames; 1386 | xmlChar *buffer; 1387 | FILE *infile; 1388 | xmlDocPtr thedoc; 1389 | xmlNodePtr thenode; 1390 | PetscScalar *globalarray, *fieldmin, *fieldmax; 1391 | Vec X; 1392 | 1393 | ierr = MPI_Comm_rank (PETSC_COMM_WORLD, &rank); CHKERRQ (ierr); 1394 | ierr = MPI_Comm_size (PETSC_COMM_WORLD, &size); CHKERRQ (ierr); 1395 | 1396 | /*+ First it gets the parameters from the XML file. +*/ 1397 | DPRINTF ("Parsing XML metadata file from basename %s\n", basename); 1398 | if (ierr = IlluMultiParseXML 1399 | (basename, rank, &compressed, &wrongendian, &dim, &fpx,&fpy,&fpz, 1400 | &nx,&ny,&nz, wx,wy,wz, &fxm,&fym,&fzm, &dof, &sw, &st, &wrap, 1401 | &fieldnames, fieldtypes, &fieldmin, &fieldmax, 1402 | usermetacount, usermetanames, usermetadata)) 1403 | return ierr; 1404 | 1405 | fsize = fpx * ((dim > 1) ? fpy : 1) * ((dim > 2) ? fpz : 1); 1406 | if (size == fsize) { /* Default condition: n->n */ 1407 | px = fpx; py = fpy; pz = fpz; } 1408 | else if (size == 1) /* Special condition: n->1 */ 1409 | px = py = pz = 1; 1410 | else /* Error: incorrect number of CPUs */ 1411 | return 1; 1412 | 1413 | /*+ Next it creates a distributed array based on those parameters, and sets 1414 | the names of its fields. +*/ 1415 | switch (dim) 1416 | { 1417 | case 1: 1418 | { 1419 | DPRINTF ("Creating %d-%d 1-D DA\n", nx, dof); 1420 | ierr = DACreate1d (PETSC_COMM_WORLD, wrap, nx, dof, sw, PETSC_NULL, 1421 | theda); CHKERRQ (ierr); 1422 | break; 1423 | } 1424 | case 2: 1425 | { 1426 | DPRINTF ("Creating %dx%d-%d 2-D DA\n", nx,ny, dof); 1427 | ierr = DACreate2d (PETSC_COMM_WORLD, wrap, st, nx,ny, px,py, dof, sw, 1428 | PETSC_NULL, PETSC_NULL, theda); CHKERRQ (ierr); 1429 | break; 1430 | } 1431 | case 3: 1432 | { 1433 | DPRINTF ("Creating %dx%dx%d-%d 3-D DA\n", nx,ny,nz, dof); 1434 | ierr = DACreate3d (PETSC_COMM_WORLD, wrap, st, nx,ny,nz, px,py,pz, dof, 1435 | sw, PETSC_NULL, PETSC_NULL, PETSC_NULL, theda); 1436 | CHKERRQ (ierr); 1437 | break; 1438 | } 1439 | default: 1440 | return 1; 1441 | } 1442 | 1443 | for (px=0; px<dof; px++) 1444 | DASetFieldName (*theda, px, fieldnames [px]); 1445 | 1446 | /*+ Then it streams the data into the distributed array's vector in one big 1447 | slurp. +*/ 1448 | DPRINTF ("Getting global vector and array\n",0); 1449 | ierr = DAGetGlobalVector (*theda, &X); CHKERRQ (ierr); 1450 | ierr = VecGetArray (X, &globalarray); CHKERRQ (ierr); 1451 | if (size > 1 || fsize == 1) /* Default condition */ 1452 | { 1453 | DPRINTF ("Loading data in parallel\n",0); 1454 | ierr = IlluMultiParseData 1455 | (globalarray, basename, rank, compressed, 1456 | fxm * ((dim>1)?fym:1) * ((dim>2)?fzm:1), dof, wrongendian, fieldmin, 1457 | fieldmax); 1458 | if (ierr) 1459 | { 1460 | fxm = VecRestoreArray (X, &globalarray); CHKERRQ (fxm); 1461 | return ierr; 1462 | } 1463 | } 1464 | else /* Getting distributed data into a single array */ 1465 | { 1466 | int i,j,k, xs,ys,zs; 1467 | PetscScalar *storage; 1468 | 1469 | /* Incrementally read in data to storage, store it in globalarray */ 1470 | /* First loop through the stored CPUs */ 1471 | DPRINTF ("Loading data into a single array on 1 CPU\n",0); 1472 | for (pz=0, zs=0; pz<((dim>2)?fpz:1); pz++, zs+=fzm) 1473 | for (py=0, ys=0; py<((dim>1)?fpy:1); py++, ys+=fym) 1474 | for (px=0, xs=0; px<fpx; px++, xs+=fxm, rank++) 1475 | { 1476 | int gridpoints, fdim, fnx,fny,fnz, fdof, fsw; 1477 | DAPeriodicType fwrap; 1478 | DAStencilType fst; 1479 | 1480 | if (ierr = IlluMultiParseXML 1481 | (basename, rank, &compressed, &wrongendian, &fdim, 1482 | &fpx,&fpy,&fpz, &fnx,&fny,&fnz, 1483 | PETSC_NULL,PETSC_NULL,PETSC_NULL, 1484 | &fxm,&fym,&fzm, &fdof, &fsw, &fst, &fwrap, 1485 | &fieldnames, PETSC_NULL, &fieldmin, &fieldmax, 1486 | usermetacount, usermetanames, usermetadata)) 1487 | return 1; 1488 | gridpoints = fxm * ((dim>1)?fym:1) * ((dim>2)?fzm:1); 1489 | 1490 | /* This allocate/read/copy/free storage business is annoying. 1491 | Replacing it with "zero-copy" would involve changing the 1492 | IlluMultiParseData() API to allow loading into part of the 1493 | local array. But I'll leave that for future expansion, when 1494 | this will be able to do arbitrary m->n CPU loading as long as 1495 | the global sizes match. */ 1496 | storage = (PetscScalar *) malloc 1497 | (gridpoints * dof * sizeof (PetscScalar)); 1498 | ierr = IlluMultiParseData 1499 | (storage, basename, rank, compressed, gridpoints, dof, 1500 | wrongendian, fieldmin, fieldmax); 1501 | 1502 | fxm *= dof; /* so fxm is the number of PetscScalars to copy */ 1503 | i=1; 1504 | /* Now distribute */ 1505 | for (k=0; k<((dim>2)?fzm:1); k++) 1506 | for (j=0; j<((dim>1)?fym:1); j++) 1507 | BLcopy_ (&fxm, storage + (k*fym + j)*fxm /* *dof */, &i, 1508 | globalarray + (((zs+k)*ny + ys+j)*nx + xs)*dof, &i); 1509 | 1510 | free (storage); 1511 | fxm /= dof; 1512 | } 1513 | } 1514 | 1515 | DPRINTF ("Done.\n",0); 1516 | ierr = VecRestoreArray (X, &globalarray); CHKERRQ (ierr); 1517 | ierr = DARestoreGlobalVector (*theda, &X); CHKERRQ (ierr); 1518 | 1519 | return 0; 1520 | } 1521 | 1522 | 1523 | #undef __FUNCT__ 1524 | #define __FUNCT__ "IlluMultiSave" 1525 | 1526 | /*++++++++++++++++++++++++++++++++++++++ 1527 | This saves the vector X in multiple files, two per process. Note the use of 1528 | +latex+{\tt PETSC\_COMM\_WORLD}, 1529 | +html+ <tt>PETSC_COMM_WORLD</tt>, 1530 | that will have to be corrected if somebody wants a different communicator 1531 | (maybe get it from the DA?). 1532 | 1533 | int IlluMultiSave it returns zero or an error code. 1534 | 1535 | DA theda Distributed array object controlling data saved. 1536 | 1537 | Vec X Vector whose data are actually being saved. 1538 | 1539 | char *basename Base file name. 1540 | 1541 | int usermetacount Number of user metadata parameters. 1542 | 1543 | char **usermetanames User metadata parameter names. 1544 | 1545 | char **usermetadata User metadata parameter strings. 1546 | 1547 | int compressed Data compression: if zero then no compression (fastest), 1-9 1548 | then gzip compression level, 10-15 then gzip --best. If 16-31 then save 1549 | guint32s representing relative values between min and max for each field, 1550 | compressed according to this value minus 16. Likewise for 32-47 and 1551 | guint16s, and 48-63 for guint8s. Yes, these alternative formats lose 1552 | information and can't be used for accurate checkpointing, but they should 1553 | retain enough data for visualization (except perhaps for the guint8s, which 1554 | are possibly acceptable for vectors but certainly not contours). 1555 | ++++++++++++++++++++++++++++++++++++++*/ 1556 | 1557 | int IlluMultiSave 1558 | (DA theda, Vec X, char *basename, PetscScalar wx,PetscScalar wy,PetscScalar wz, 1559 | field_plot_type *fieldtypes, int usermetacount, char **usermetanames, 1560 | char **usermetadata, int compressed) 1561 | { 1562 | int dim, nx,ny,nz, px,py,pz, dof, sw, xs,ys,zs, xm,ym,zm, rank, i, ierr; 1563 | DAPeriodicType wrap; 1564 | DAStencilType st; 1565 | FILE *outfile; 1566 | char filename[1000], **fieldnames; 1567 | PetscReal *fieldmin, *fieldmax; 1568 | PetscScalar *globalarray; 1569 | guint64 *array64; 1570 | guint32 *array32; 1571 | guint16 *array16; 1572 | guint8 *array8; 1573 | 1574 | /*+First a check to verify a supported value of 1575 | +latex+{\tt compressed}, 1576 | +html+ <tt>compressed</tt>, 1577 | but no fancy guint* compression for complex! 1578 | +*/ 1579 | #ifdef PETSC_USE_COMPLEX 1580 | if (compressed < 0 || compressed > COMPRESS_GZIP_MASK) 1581 | return 1; 1582 | #else 1583 | if (compressed < 0 || compressed > (COMPRESS_GZIP_MASK | COMPRESS_INT_MASK)) 1584 | return 1; 1585 | #endif 1586 | 1587 | /*+Then get the distributed array parameters and processor number, and store 1588 | all this data in the XML .meta file. +*/ 1589 | ierr = DAGetInfo (theda, &dim, &nx,&ny,&nz, &px,&py,&pz, &dof, &sw, &wrap, 1590 | &st); CHKERRQ (ierr); 1591 | ierr = DAGetCorners (theda, &xs,&ys,&zs, &xm,&ym,&zm); CHKERRQ (ierr); 1592 | ierr = MPI_Comm_rank (PETSC_COMM_WORLD, &rank); CHKERRQ (ierr); 1593 | 1594 | if (!(fieldnames = (char **) malloc (dof * sizeof (char *)))) 1595 | return 1; 1596 | if (!(fieldmin = (PetscScalar *) malloc (2 * dof * sizeof (PetscScalar)))) 1597 | return 1; 1598 | fieldmax = fieldmin + dof; 1599 | for (i=0; i<dof; i++) 1600 | { 1601 | ierr = DAGetFieldName (theda, i, fieldnames + i); CHKERRQ (ierr); 1602 | ierr = VecStrideMin (X, i, PETSC_NULL, fieldmin + i); CHKERRQ (ierr); 1603 | ierr = VecStrideMax (X, i, PETSC_NULL, fieldmax + i); CHKERRQ (ierr); 1604 | } 1605 | 1606 | if (ierr = IlluMultiStoreXML 1607 | (basename, rank, compressed, dim, px,py,pz, nx,ny,nz, wx,wy,wz, xm,ym,zm, 1608 | dof, sw, (int) st, (int) wrap, fieldnames,fieldtypes, fieldmin,fieldmax, 1609 | usermetacount, usermetanames, usermetadata)) 1610 | return ierr; 1611 | 1612 | /*+ Finally, the data just stream out to the data file or gzip pipe in one 1613 | big lump. +*/ 1614 | ierr = VecGetArray (X, &globalarray); CHKERRQ (ierr); 1615 | IlluMultiStoreData (globalarray, basename, rank, compressed, 1616 | xm * ((dim>1)?ym:1) * ((dim>2)?zm:1), dof, 1617 | fieldmin, fieldmax); 1618 | ierr = VecRestoreArray (X, &globalarray); CHKERRQ (ierr); 1619 | 1620 | free (fieldmin); 1621 | free (fieldnames); 1622 | 1623 | return 0; 1624 | }