Source for gnu.xml.util.DoParse

   1: /* DoParse.java -- 
   2:    Copyright (C) 1999,2000,2001 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: package gnu.xml.util;
  39: 
  40: import java.io.IOException;
  41: 
  42: import org.xml.sax.ErrorHandler;
  43: import org.xml.sax.InputSource;
  44: import org.xml.sax.SAXException;
  45: import org.xml.sax.SAXParseException;
  46: import org.xml.sax.XMLReader;
  47: import org.xml.sax.helpers.XMLReaderFactory;
  48: 
  49: import gnu.xml.pipeline.EventConsumer;
  50: import gnu.xml.pipeline.EventFilter;
  51: import gnu.xml.pipeline.NSFilter;
  52: import gnu.xml.pipeline.PipelineFactory;
  53: import gnu.xml.pipeline.TeeConsumer;
  54: import gnu.xml.pipeline.ValidationConsumer;
  55: import gnu.xml.pipeline.WellFormednessFilter;
  56: 
  57: /**
  58:  * This class provides a driver which may be invoked from the command line
  59:  * to process a document using a SAX2 parser and a specified XML processing
  60:  * pipeline.
  61:  * This facilitates some common types of command line tools, such as parsing an
  62:  * XML document in order test it for well formedness or validity.
  63:  *
  64:  * <p>The SAX2 XMLReaderFactory should return a SAX2 XML parser which
  65:  * supports both of the standardized extension handlers (for declaration
  66:  * and lexical events).  That parser will be used to produce events.
  67:  *
  68:  * <p>The first parameter to the command gives the name of the document that
  69:  * will be given to that processor.  If it is a file name, it is converted
  70:  * to a URL first.
  71:  *
  72:  * <p>The second parameter describes a simple processing pipeline, and will
  73:  * be used as input to {@link gnu.xml.pipeline.PipelineFactory}
  74:  * methods which identify the processing to be done.  Examples of such a
  75:  * pipeline include <pre>
  76:  *
  77:  *    nsfix | validate                <em>to validate the input document </em>
  78:  *    nsfix | write ( stdout )        <em>to echo the file as XML text</em>
  79:  *    dom | nsfix | write ( stdout )  <em>parse into DOM, print the result</em>
  80:  * </pre>
  81:  *
  82:  * <p> Relatively complex pipelines can be described on the command line, but
  83:  * not all interesting ones will require as little configuration as can be done
  84:  * in that way.  Put filters like "nsfix", perhaps followed by "validate",
  85:  * at the front of the pipeline so they can be optimized out if a parser
  86:  * supports those modes natively.
  87:  *
  88:  * <p> If the parsing is aborted for any reason, the JVM will exit with a
  89:  * failure code.  If a validating parse was done then both validation and
  90:  * well formedness errors will cause a failure.  A non-validating parse
  91:  * will report failure on well formedness errors.
  92:  *
  93:  * @see gnu.xml.pipeline.PipelineFactory
  94:  *
  95:  * @author David Brownell
  96:  */
  97: final public class DoParse
  98: {
  99:     private DoParse () { /* no instances allowed */ }
 100: 
 101:     // first reported nonrecoverable error
 102:     private static SAXParseException    fatal;
 103: 
 104:     // error categories
 105:     private static int            errorCount;
 106:     private static int            fatalCount;
 107: 
 108:     /**
 109:      * Command line invoker for this class; pass a filename or URL
 110:      * as the first argument, and a pipeline description as the second.
 111:      * Make sure to use filters to condition the input to stages that
 112:      * require it; an <em>nsfix</em> filter will be a common requirement,
 113:      * to restore syntax that SAX2 parsers delete by default.  Some
 114:      * conditioning filters may be eliminated by setting parser options.
 115:      * (For example, "nsfix" can set the "namespace-prefixes" feature to
 116:      * a non-default value of "true".  In the same way, "validate" can set
 117:      * the "validation" feature to "true".)
 118:      */
 119:     public static void main (String argv [])
 120:     throws IOException
 121:     {
 122:     int        exitStatus = 1;
 123: 
 124:     if (argv.length != 2) {
 125:         System.err.println ("Usage: DoParse [filename|URL] pipeline-spec");
 126:         System.err.println ("Example pipeline specs:");
 127:         System.err.println ("  'nsfix | validate'");
 128:         System.err.println (
 129:         "       ... restore namespace syntax, validate");
 130:         System.err.println ("  'nsfix | write ( stdout )'");
 131:         System.err.println (
 132:         "       ... restore namespace syntax, write to stdout as XML"
 133:         );
 134:         System.exit (1);
 135:     }
 136: 
 137:     try {
 138:         //
 139:         // Get input source for specified document (or try ;-)
 140:         //
 141:         argv [0] = Resolver.getURL (argv [0]);
 142:         InputSource input = new InputSource (argv [0]);
 143: 
 144:         //
 145:         // Get the producer, using the system default parser (which
 146:         // can be overridden for this particular invocation).
 147:         //
 148:         // And the pipeline, using commandline options.
 149:         //
 150:         XMLReader        producer;
 151:         EventConsumer    consumer;
 152: 
 153:         producer = XMLReaderFactory.createXMLReader ();
 154: 
 155:         //
 156:         // XXX pipeline factory now has a pre-tokenized input
 157:         // method, use it ... that way at least some params
 158:         // can be written using quotes (have spaces, ...)
 159:         //
 160:         consumer = PipelineFactory.createPipeline (argv [1]);
 161: 
 162:         //
 163:         // XXX want commandline option for tweaking error handler.
 164:         // Want to be able to present warnings.
 165:         //
 166:         producer.setErrorHandler (new MyErrorHandler ());
 167: 
 168:         // XXX need facility enabling resolving to local DTDs
 169: 
 170:         //
 171:         // Parse.  The pipeline may get optimized a bit, so we
 172:         // can't always fail cleanly for validation without taking
 173:         // a look at the filter stages.
 174:         //
 175:         EventFilter.bind (producer, consumer);
 176:         producer.parse (input);
 177: 
 178:         try {
 179:         if (producer.getFeature (
 180:             "http://org.xml/sax/features/validation"))
 181:             exitStatus = ((errorCount + fatalCount) > 0) ? 1 : 0;
 182:         else if (fatalCount == 0)
 183:             exitStatus = 0;
 184:         } catch (SAXException e) {
 185:         if (hasValidator (consumer))
 186:             exitStatus = ((errorCount + fatalCount) > 0) ? 1 : 0;
 187:         else if (fatalCount == 0)
 188:             exitStatus = 0;
 189:         }
 190: 
 191:     } catch (java.net.MalformedURLException e) {
 192:         System.err.println ("** Malformed URL: " + e.getMessage ());
 193:         System.err.println ("Is '" + argv [0] + "' a non-existent file?");
 194:         e.printStackTrace ();
 195:         // e.g. FNF
 196: 
 197:     } catch (SAXParseException e) {
 198:         if (e != fatal) {
 199:         System.err.print (printParseException ("Parsing Aborted", e));
 200:         e.printStackTrace ();
 201:         if (e.getException () != null) {
 202:             System.err.println ("++ Wrapped exception:");
 203:             e.getException ().printStackTrace ();
 204:         }
 205:         }
 206: 
 207:     } catch (SAXException e) {
 208:         Exception    x = e;
 209:         if (e.getException () != null)
 210:         x = e.getException ();
 211:         x.printStackTrace ();
 212: 
 213:     } catch (Throwable t) {
 214:         t.printStackTrace ();
 215:     }
 216: 
 217:     System.exit (exitStatus);
 218:     }
 219: 
 220:     // returns true if saw a validator (before end or unrecognized node)
 221:     // false otherwise
 222:     private static boolean hasValidator (EventConsumer e)
 223:     {
 224:     if (e == null)
 225:         return false;
 226:     if (e instanceof ValidationConsumer)
 227:         return true;
 228:     if (e instanceof TeeConsumer) {
 229:         TeeConsumer    t = (TeeConsumer) e;
 230:         return hasValidator (t.getFirst ())
 231:         || hasValidator (t.getRest ());
 232:     }
 233:     if (e instanceof WellFormednessFilter
 234:         || e instanceof NSFilter
 235:         )
 236:         return hasValidator (((EventFilter)e).getNext ());
 237:     
 238:     // else ... gee, we can't know.  Assume not.
 239: 
 240:     return false;
 241:     }
 242: 
 243:     static class MyErrorHandler implements ErrorHandler
 244:     {
 245:     // dump validation errors, but continue
 246:     public void error (SAXParseException e)
 247:     throws SAXParseException
 248:     {
 249:         errorCount++;
 250:         System.err.print (printParseException ("Error", e));
 251:     }
 252: 
 253:     public void warning (SAXParseException e)
 254:     throws SAXParseException
 255:     {
 256:         // System.err.print (printParseException ("Warning", e));
 257:     }
 258: 
 259:     // try to continue fatal errors, in case a parser reports more
 260:     public void fatalError (SAXParseException e)
 261:     throws SAXParseException
 262:     {
 263:         fatalCount++;
 264:         if (fatal == null)
 265:         fatal = e;
 266:         System.err.print (printParseException ("Nonrecoverable Error", e));
 267:     }
 268:     }
 269: 
 270:     static private String printParseException (
 271:     String            label,
 272:     SAXParseException    e
 273:     ) {
 274:     StringBuffer    buf = new StringBuffer ();
 275:     int        temp;
 276: 
 277:     buf.append ("** ");
 278:     buf.append (label);
 279:     buf.append (": ");
 280:     buf.append (e.getMessage ());
 281:     buf.append ('\n');
 282:     if (e.getSystemId () != null) {
 283:         buf.append ("   URI:  ");
 284:         buf.append (e.getSystemId ());
 285:         buf.append ('\n');
 286:     }
 287:     if ((temp = e.getLineNumber ()) != -1) {
 288:         buf.append ("   line: ");
 289:         buf.append (temp);
 290:         buf.append ('\n');
 291:     }
 292:     if ((temp = e.getColumnNumber ()) != -1) {
 293:         buf.append ("   char: ");
 294:         buf.append (temp);
 295:         buf.append ('\n');
 296:     }
 297: 
 298:     return buf.toString ();
 299:     }
 300: }