Source for gnu.xml.pipeline.WellFormednessFilter

   1: /* WellFormednessFilter.java -- 
   2:    Copyright (C) 1999,2000,2001 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: package gnu.xml.pipeline;
  39: 
  40: import java.util.EmptyStackException;
  41: import java.util.Stack;
  42: 
  43: import org.xml.sax.Attributes;
  44: import org.xml.sax.ErrorHandler;
  45: import org.xml.sax.Locator;
  46: import org.xml.sax.SAXException;
  47: import org.xml.sax.SAXParseException;
  48: 
  49: /**
  50:  * This filter reports fatal exceptions in the case of event streams that
  51:  * are not well formed.  The rules currently tested include: <ul>
  52:  *
  53:  *    <li>setDocumentLocator ... may be called only before startDocument
  54:  *
  55:  *    <li>startDocument/endDocument ... must be paired, and all other
  56:  *    calls (except setDocumentLocator) must be nested within these.
  57:  *
  58:  *    <li>startElement/endElement ... must be correctly paired, and
  59:  *    may never appear within CDATA sections.
  60:  *
  61:  *    <li>comment ... can't contain "--"
  62:  *
  63:  *    <li>character data ... can't contain "]]&gt;"
  64:  *
  65:  *    <li>whitespace ... can't contain CR
  66:  *
  67:  *    <li>whitespace and character data must be within an element
  68:  *
  69:  *    <li>processing instruction ... can't contain "?&gt;" or CR
  70:  *
  71:  *    <li>startCDATA/endCDATA ... must be correctly paired.
  72:  *
  73:  *    </ul>
  74:  *
  75:  * <p> Other checks for event stream correctness may be provided in
  76:  * the future.  For example, insisting that
  77:  * entity boundaries nest correctly,
  78:  * namespace scopes nest correctly,
  79:  * namespace values never contain relative URIs,
  80:  * attributes don't have "&lt;" characters;
  81:  * and more.
  82:  *
  83:  * @author David Brownell
  84:  */
  85: public final class WellFormednessFilter extends EventFilter
  86: {
  87:     private boolean        startedDoc;
  88:     private Stack        elementStack = new Stack ();
  89:     private boolean        startedCDATA;
  90:     private String        dtdState = "before";
  91: 
  92:     
  93:     /**
  94:      * Swallows all events after performing well formedness checks.
  95:      */
  96:     // constructor used by PipelineFactory
  97:     public WellFormednessFilter ()
  98:     { this (null); }
  99: 
 100: 
 101:     /**
 102:      * Passes events through to the specified consumer, after first
 103:      * processing them.
 104:      */
 105:     // constructor used by PipelineFactory
 106:     public WellFormednessFilter (EventConsumer consumer)
 107:     {
 108:     super (consumer);
 109: 
 110:     setContentHandler (this);
 111:     setDTDHandler (this);
 112:     
 113:     try {
 114:         setProperty (LEXICAL_HANDLER, this);
 115:     } catch (SAXException e) { /* can't happen */ }
 116:     }
 117: 
 118:     /**
 119:      * Resets state as if any preceding event stream was well formed.
 120:      * Particularly useful if it ended through some sort of error,
 121:      * and the endDocument call wasn't made.
 122:      */
 123:     public void reset ()
 124:     {
 125:     startedDoc = false;
 126:     startedCDATA = false;
 127:     elementStack.removeAllElements ();
 128:     }
 129: 
 130: 
 131:     private SAXParseException getException (String message)
 132:     {
 133:     SAXParseException    e;
 134:     Locator            locator = getDocumentLocator ();
 135: 
 136:     if (locator == null)
 137:         return new SAXParseException (message, null, null, -1, -1);
 138:     else
 139:         return new SAXParseException (message, locator);
 140:     }
 141: 
 142:     private void fatalError (String message)
 143:     throws SAXException
 144:     {
 145:     SAXParseException    e = getException (message);
 146:     ErrorHandler        handler = getErrorHandler ();
 147: 
 148:     if (handler != null)
 149:         handler.fatalError (e);
 150:     throw e;
 151:     }
 152: 
 153:     /**
 154:      * Throws an exception when called after startDocument.
 155:      *
 156:      * @param locator the locator, to be used in error reporting or relative
 157:      *    URI resolution.
 158:      *
 159:      * @exception IllegalStateException when called after the document
 160:      *    has already been started
 161:      */
 162:     public void setDocumentLocator (Locator locator)
 163:     {
 164:     if (startedDoc)
 165:         throw new IllegalStateException (
 166:             "setDocumentLocator called after startDocument");
 167:     super.setDocumentLocator (locator);
 168:     }
 169: 
 170:     public void startDocument () throws SAXException
 171:     {
 172:     if (startedDoc)
 173:         fatalError ("startDocument called more than once");
 174:     startedDoc = true;
 175:     startedCDATA = false;
 176:     elementStack.removeAllElements ();
 177:     super.startDocument ();
 178:     }
 179: 
 180:     public void startElement (
 181:     String uri, String localName,
 182:     String qName, Attributes atts
 183:     ) throws SAXException
 184:     {
 185:     if (!startedDoc)
 186:         fatalError ("callback outside of document?");
 187:     if ("inside".equals (dtdState))
 188:         fatalError ("element inside DTD?");
 189:     else
 190:         dtdState = "after";
 191:     if (startedCDATA)
 192:         fatalError ("element inside CDATA section");
 193:     if (qName == null || "".equals (qName))
 194:         fatalError ("startElement name missing");
 195:     elementStack.push (qName);
 196:     super.startElement (uri, localName, qName, atts);
 197:     }
 198: 
 199:     public void endElement (String uri, String localName, String qName)
 200:     throws SAXException
 201:     {
 202:     if (!startedDoc)
 203:         fatalError ("callback outside of document?");
 204:     if (startedCDATA)
 205:         fatalError ("element inside CDATA section");
 206:     if (qName == null || "".equals (qName))
 207:         fatalError ("endElement name missing");
 208:     
 209:     try {
 210:         String    top = (String) elementStack.pop ();
 211: 
 212:         if (!qName.equals (top))
 213:         fatalError ("<" + top + " ...>...</" + qName + ">");
 214:         // XXX could record/test namespace info
 215:     } catch (EmptyStackException e) {
 216:         fatalError ("endElement without startElement:  </" + qName + ">");
 217:     }
 218:     super.endElement (uri, localName, qName);
 219:     }
 220: 
 221:     public void endDocument () throws SAXException
 222:     {
 223:     if (!startedDoc)
 224:         fatalError ("callback outside of document?");
 225:     dtdState = "before";
 226:     startedDoc = false;
 227:     super.endDocument ();
 228:     }
 229: 
 230: 
 231:     public void startDTD (String root, String publicId, String systemId)
 232:     throws SAXException
 233:     {
 234:     if (!startedDoc)
 235:         fatalError ("callback outside of document?");
 236:     if ("before" != dtdState)
 237:         fatalError ("two DTDs?");
 238:     if (!elementStack.empty ())
 239:         fatalError ("DTD must precede root element");
 240:     dtdState = "inside";
 241:     super.startDTD (root, publicId, systemId);
 242:     }
 243: 
 244:     public void notationDecl (String name, String publicId, String systemId)
 245:     throws SAXException
 246:     {
 247: // FIXME: not all parsers will report startDTD() ...
 248: // we'd rather insist we're "inside".
 249:     if ("after" == dtdState)
 250:         fatalError ("not inside DTD");
 251:     super.notationDecl (name, publicId, systemId);
 252:     }
 253: 
 254:     public void unparsedEntityDecl (String name,
 255:         String publicId, String systemId, String notationName)
 256:     throws SAXException
 257:     {
 258: // FIXME: not all parsers will report startDTD() ...
 259: // we'd rather insist we're "inside".
 260:     if ("after" == dtdState)
 261:         fatalError ("not inside DTD");
 262:     super.unparsedEntityDecl (name, publicId, systemId, notationName);
 263:     }
 264: 
 265:     // FIXME:  add the four DeclHandler calls too
 266: 
 267:     public void endDTD ()
 268:     throws SAXException
 269:     {
 270:     if (!startedDoc)
 271:         fatalError ("callback outside of document?");
 272:     if ("inside" != dtdState)
 273:         fatalError ("DTD ends without start?");
 274:     dtdState = "after";
 275:     super.endDTD ();
 276:     }
 277: 
 278:     public void characters (char ch [], int start, int length)
 279:     throws SAXException
 280:     {
 281:     int here = start, end = start + length;
 282:     if (elementStack.empty ())
 283:         fatalError ("characters must be in an element");
 284:     while (here < end) {
 285:         if (ch [here++] != ']')
 286:         continue;
 287:         if (here == end)    // potential problem ...
 288:         continue;
 289:         if (ch [here++] != ']')
 290:         continue;
 291:         if (here == end)    // potential problem ...
 292:         continue;
 293:         if (ch [here++] == '>')
 294:         fatalError ("character data can't contain \"]]>\"");
 295:     }
 296:     super.characters (ch, start, length);
 297:     }
 298: 
 299:     public void ignorableWhitespace (char ch [], int start, int length)
 300:     throws SAXException
 301:     {
 302:     int here = start, end = start + length;
 303:     if (elementStack.empty ())
 304:         fatalError ("characters must be in an element");
 305:     while (here < end) {
 306:         if (ch [here++] == '\r')
 307:         fatalError ("whitespace can't contain CR");
 308:     }
 309:     super.ignorableWhitespace (ch, start, length);
 310:     }
 311: 
 312:     public void processingInstruction (String target, String data)
 313:     throws SAXException
 314:     {
 315:     if (data.indexOf ('\r') > 0)
 316:         fatalError ("PIs can't contain CR");
 317:     if (data.indexOf ("?>") > 0)
 318:         fatalError ("PIs can't contain \"?>\"");
 319:     }
 320: 
 321:     public void comment (char ch [], int start, int length)
 322:     throws SAXException
 323:     {
 324:     if (!startedDoc)
 325:         fatalError ("callback outside of document?");
 326:     if (startedCDATA)
 327:         fatalError ("comments can't nest in CDATA");
 328:     int here = start, end = start + length;
 329:     while (here < end) {
 330:         if (ch [here] == '\r')
 331:         fatalError ("comments can't contain CR");
 332:         if (ch [here++] != '-')
 333:         continue;
 334:         if (here == end)
 335:         fatalError ("comments can't end with \"--->\"");
 336:         if (ch [here++] == '-')
 337:         fatalError ("comments can't contain \"--\"");
 338:     }
 339:     super.comment (ch, start, length);
 340:     }
 341: 
 342:     public void startCDATA ()
 343:     throws SAXException
 344:     {
 345:     if (!startedDoc)
 346:         fatalError ("callback outside of document?");
 347:     if (startedCDATA)
 348:         fatalError ("CDATA starts can't nest");
 349:     startedCDATA = true;
 350:     super.startCDATA ();
 351:     }
 352: 
 353:     public void endCDATA ()
 354:     throws SAXException
 355:     {
 356:     if (!startedDoc)
 357:         fatalError ("callback outside of document?");
 358:     if (!startedCDATA)
 359:         fatalError ("CDATA end without start?");
 360:     startedCDATA = false;
 361:     super.endCDATA ();
 362:     }
 363: }