1:
53:
54: package ;
55:
56: import ;
57: import ;
58: import ;
59: import ;
60: import ;
61: import ;
62: import ;
63: import ;
64: import ;
65: import ;
66: import ;
67: import ;
68: import ;
69: import ;
70: import ;
71: import ;
72: import ;
73: import ;
74: import ;
75: import ;
76: import ;
77: import ;
78:
79: import ;
80: import ;
81: import ;
82: import ;
83: import ;
84: import ;
85: import ;
86: import ;
87: import ;
88: import ;
89:
90: import ;
91: import ;
92: import ;
93:
94:
117: public class XMLParser
118: implements XMLStreamReader, NamespaceContext
119: {
120:
121:
122: private static final int INIT = 0;
123: private static final int PROLOG = 1;
124: private static final int CONTENT = 2;
125: private static final int EMPTY_ELEMENT = 3;
126: private static final int MISC = 4;
127:
128:
129: private final static int LIT_ENTITY_REF = 2;
130: private final static int LIT_NORMALIZE = 4;
131: private final static int LIT_ATTRIBUTE = 8;
132: private final static int LIT_DISABLE_PE = 16;
133: private final static int LIT_DISABLE_CREF = 32;
134: private final static int LIT_DISABLE_EREF = 64;
135: private final static int LIT_PUBID = 256;
136:
137:
138: final static int ATTRIBUTE_DEFAULT_UNDECLARED = 30;
139: final static int ATTRIBUTE_DEFAULT_SPECIFIED = 31;
140: final static int ATTRIBUTE_DEFAULT_IMPLIED = 32;
141: final static int ATTRIBUTE_DEFAULT_REQUIRED = 33;
142: final static int ATTRIBUTE_DEFAULT_FIXED = 34;
143:
144:
145: final static int START_ENTITY = 50;
146: final static int END_ENTITY = 51;
147:
148:
151: private Input input;
152:
153:
158: private LinkedList inputStack = new LinkedList();
159:
160:
163: private LinkedList startEntityStack = new LinkedList();
164:
165:
168: private LinkedList endEntityStack = new LinkedList();
169:
170:
173: private int state = INIT;
174:
175:
178: private int event;
179:
180:
183: private boolean lookahead;
184:
185:
189: private LinkedList stack = new LinkedList();
190:
191:
196: private LinkedList namespaces = new LinkedList();
197:
198:
203: private LinkedList bases = new LinkedList();
204:
205:
209: private ArrayList attrs = new ArrayList();
210:
211:
214: private StringBuffer buf = new StringBuffer();
215:
216:
219: private StringBuffer nmtokenBuf = new StringBuffer();
220:
221:
224: private StringBuffer literalBuf = new StringBuffer();
225:
226:
229: private int[] tmpBuf = new int[1024];
230:
231:
234: private ContentModel currentContentModel;
235:
236:
242: private LinkedList validationStack;
243:
244:
249: private HashSet ids, idrefs;
250:
251:
255: private String piTarget, piData;
256:
257:
260: private String xmlVersion;
261:
262:
265: private String xmlEncoding;
266:
267:
270: private Boolean xmlStandalone;
271:
272:
275: Doctype doctype;
276:
277:
280: private boolean expandPE, peIsError;
281:
282:
285: private final boolean validating;
286:
287:
290: private final boolean stringInterning;
291:
292:
296: private final boolean coalescing;
297:
298:
303: private final boolean replaceERefs;
304:
305:
308: private final boolean externalEntities;
309:
310:
313: private final boolean supportDTD;
314:
315:
320: private final boolean namespaceAware;
321:
322:
326: private final boolean baseAware;
327:
328:
332: private final boolean extendedEventTypes;
333:
334:
337: final XMLReporter reporter;
338:
339:
342: final XMLResolver resolver;
343:
344:
345: private static final String TEST_START_ELEMENT = "<";
346: private static final String TEST_END_ELEMENT = "</";
347: private static final String TEST_COMMENT = "<!--";
348: private static final String TEST_PI = "<?";
349: private static final String TEST_CDATA = "<![CDATA[";
350: private static final String TEST_XML_DECL = "<?xml";
351: private static final String TEST_DOCTYPE_DECL = "<!DOCTYPE";
352: private static final String TEST_ELEMENT_DECL = "<!ELEMENT";
353: private static final String TEST_ATTLIST_DECL = "<!ATTLIST";
354: private static final String TEST_ENTITY_DECL = "<!ENTITY";
355: private static final String TEST_NOTATION_DECL = "<!NOTATION";
356: private static final String TEST_KET = ">";
357: private static final String TEST_END_COMMENT = "--";
358: private static final String TEST_END_PI = "?>";
359: private static final String TEST_END_CDATA = "]]>";
360:
361:
364: private static final LinkedHashMap PREDEFINED_ENTITIES = new LinkedHashMap();
365: static
366: {
367: PREDEFINED_ENTITIES.put("amp", "&");
368: PREDEFINED_ENTITIES.put("lt", "<");
369: PREDEFINED_ENTITIES.put("gt", ">");
370: PREDEFINED_ENTITIES.put("apos", "'");
371: PREDEFINED_ENTITIES.put("quot", "\"");
372: }
373:
374:
397: public XMLParser(InputStream in, String systemId,
398: boolean validating,
399: boolean namespaceAware,
400: boolean coalescing,
401: boolean replaceERefs,
402: boolean externalEntities,
403: boolean supportDTD,
404: boolean baseAware,
405: boolean stringInterning,
406: boolean extendedEventTypes,
407: XMLReporter reporter,
408: XMLResolver resolver)
409: {
410: this.validating = validating;
411: this.namespaceAware = namespaceAware;
412: this.coalescing = coalescing;
413: this.replaceERefs = replaceERefs;
414: this.externalEntities = externalEntities;
415: this.supportDTD = supportDTD;
416: this.baseAware = baseAware;
417: this.stringInterning = stringInterning;
418: this.extendedEventTypes = extendedEventTypes;
419: this.reporter = reporter;
420: this.resolver = resolver;
421: if (validating)
422: {
423: validationStack = new LinkedList();
424: ids = new HashSet();
425: idrefs = new HashSet();
426: }
427: String debug = System.getProperty("gnu.xml.debug.input");
428: if (debug != null)
429: {
430: try
431: {
432: File file = File.createTempFile(debug, ".xml");
433: in = new TeeInputStream(in, new FileOutputStream(file));
434: }
435: catch (IOException e)
436: {
437: RuntimeException e2 = new RuntimeException();
438: e2.initCause(e);
439: throw e2;
440: }
441: }
442: systemId = canonicalize(systemId);
443: pushInput(new Input(in, null, null, systemId, null, null, false, true));
444: }
445:
446:
472: public XMLParser(Reader reader, String systemId,
473: boolean validating,
474: boolean namespaceAware,
475: boolean coalescing,
476: boolean replaceERefs,
477: boolean externalEntities,
478: boolean supportDTD,
479: boolean baseAware,
480: boolean stringInterning,
481: boolean extendedEventTypes,
482: XMLReporter reporter,
483: XMLResolver resolver)
484: {
485: this.validating = validating;
486: this.namespaceAware = namespaceAware;
487: this.coalescing = coalescing;
488: this.replaceERefs = replaceERefs;
489: this.externalEntities = externalEntities;
490: this.supportDTD = supportDTD;
491: this.baseAware = baseAware;
492: this.stringInterning = stringInterning;
493: this.extendedEventTypes = extendedEventTypes;
494: this.reporter = reporter;
495: this.resolver = resolver;
496: if (validating)
497: {
498: validationStack = new LinkedList();
499: ids = new HashSet();
500: idrefs = new HashSet();
501: }
502: String debug = System.getProperty("gnu.xml.debug.input");
503: if (debug != null)
504: {
505: try
506: {
507: File file = File.createTempFile(debug, ".xml");
508: reader = new TeeReader(reader, new FileWriter(file));
509: }
510: catch (IOException e)
511: {
512: RuntimeException e2 = new RuntimeException();
513: e2.initCause(e);
514: throw e2;
515: }
516: }
517: systemId = canonicalize(systemId);
518: pushInput(new Input(null, reader, null, systemId, null, null, false, true));
519: }
520:
521:
522:
523: public String getNamespaceURI(String prefix)
524: {
525: if (XMLConstants.XML_NS_PREFIX.equals(prefix))
526: return XMLConstants.XML_NS_URI;
527: if (XMLConstants.XMLNS_ATTRIBUTE.equals(prefix))
528: return XMLConstants.XMLNS_ATTRIBUTE_NS_URI;
529: for (Iterator i = namespaces.iterator(); i.hasNext(); )
530: {
531: LinkedHashMap ctx = (LinkedHashMap) i.next();
532: String namespaceURI = (String) ctx.get(prefix);
533: if (namespaceURI != null)
534: return namespaceURI;
535: }
536: return null;
537: }
538:
539: public String getPrefix(String namespaceURI)
540: {
541: if (XMLConstants.XML_NS_URI.equals(namespaceURI))
542: return XMLConstants.XML_NS_PREFIX;
543: if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(namespaceURI))
544: return XMLConstants.XMLNS_ATTRIBUTE;
545: for (Iterator i = namespaces.iterator(); i.hasNext(); )
546: {
547: LinkedHashMap ctx = (LinkedHashMap) i.next();
548: if (ctx.containsValue(namespaceURI))
549: {
550: for (Iterator j = ctx.entrySet().iterator(); j.hasNext(); )
551: {
552: Map.Entry entry = (Map.Entry) i.next();
553: String uri = (String) entry.getValue();
554: if (uri.equals(namespaceURI))
555: return (String) entry.getKey();
556: }
557: }
558: }
559: return null;
560: }
561:
562: public Iterator getPrefixes(String namespaceURI)
563: {
564: if (XMLConstants.XML_NS_URI.equals(namespaceURI))
565: return Collections.singleton(XMLConstants.XML_NS_PREFIX).iterator();
566: if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(namespaceURI))
567: return Collections.singleton(XMLConstants.XMLNS_ATTRIBUTE).iterator();
568: LinkedList acc = new LinkedList();
569: for (Iterator i = namespaces.iterator(); i.hasNext(); )
570: {
571: LinkedHashMap ctx = (LinkedHashMap) i.next();
572: if (ctx.containsValue(namespaceURI))
573: {
574: for (Iterator j = ctx.entrySet().iterator(); j.hasNext(); )
575: {
576: Map.Entry entry = (Map.Entry) i.next();
577: String uri = (String) entry.getValue();
578: if (uri.equals(namespaceURI))
579: acc.add(entry.getKey());
580: }
581: }
582: }
583: return acc.iterator();
584: }
585:
586:
587:
588: public void close()
589: throws XMLStreamException
590: {
591: stack = null;
592: namespaces = null;
593: bases = null;
594: buf = null;
595: attrs = null;
596: doctype = null;
597:
598: inputStack = null;
599: validationStack = null;
600: ids = null;
601: idrefs = null;
602: }
603:
604: public NamespaceContext getNamespaceContext()
605: {
606: return this;
607: }
608:
609: public int getAttributeCount()
610: {
611: return attrs.size();
612: }
613:
614: public String getAttributeLocalName(int index)
615: {
616: Attribute a = (Attribute) attrs.get(index);
617: return a.localName;
618: }
619:
620: public String getAttributeNamespace(int index)
621: {
622: String prefix = getAttributePrefix(index);
623: return getNamespaceURI(prefix);
624: }
625:
626: public String getAttributePrefix(int index)
627: {
628: Attribute a = (Attribute) attrs.get(index);
629: return a.prefix;
630: }
631:
632: public QName getAttributeName(int index)
633: {
634: Attribute a = (Attribute) attrs.get(index);
635: String namespaceURI = getNamespaceURI(a.prefix);
636: return new QName(namespaceURI, a.localName, a.prefix);
637: }
638:
639: public String getAttributeType(int index)
640: {
641: Attribute a = (Attribute) attrs.get(index);
642: return a.type;
643: }
644:
645: private String getAttributeType(String elementName, String attName)
646: {
647: if (doctype != null)
648: {
649: AttributeDecl att = doctype.getAttributeDecl(elementName, attName);
650: if (att != null)
651: return att.type;
652: }
653: return "CDATA";
654: }
655:
656: public String getAttributeValue(int index)
657: {
658: Attribute a = (Attribute) attrs.get(index);
659: return a.value;
660: }
661:
662: public String getAttributeValue(String namespaceURI, String localName)
663: {
664: for (Iterator i = attrs.iterator(); i.hasNext(); )
665: {
666: Attribute a = (Attribute) i.next();
667: if (a.localName.equals(localName))
668: {
669: String uri = getNamespaceURI(a.prefix);
670: if ((uri == null && namespaceURI == null) ||
671: (uri != null && uri.equals(namespaceURI)))
672: return a.value;
673: }
674: }
675: return null;
676: }
677:
678: boolean isAttributeDeclared(int index)
679: {
680: if (doctype == null)
681: return false;
682: Attribute a = (Attribute) attrs.get(index);
683: String qn = ("".equals(a.prefix)) ? a.localName :
684: a.prefix + ":" + a.localName;
685: String elementName = buf.toString();
686: return doctype.isAttributeDeclared(elementName, qn);
687: }
688:
689: public String getCharacterEncodingScheme()
690: {
691: return xmlEncoding;
692: }
693:
694: public String getElementText()
695: throws XMLStreamException
696: {
697: if (event != XMLStreamConstants.START_ELEMENT)
698: throw new XMLStreamException("current event must be START_ELEMENT");
699: StringBuffer elementText = new StringBuffer();
700: int depth = stack.size();
701: while (event != XMLStreamConstants.END_ELEMENT || stack.size() > depth)
702: {
703: switch (next())
704: {
705: case XMLStreamConstants.CHARACTERS:
706: case XMLStreamConstants.SPACE:
707: elementText.append(buf.toString());
708: }
709: }
710: return elementText.toString();
711: }
712:
713: public String getEncoding()
714: {
715: return (input.inputEncoding == null) ? "UTF-8" : input.inputEncoding;
716: }
717:
718: public int getEventType()
719: {
720: return event;
721: }
722:
723: public String getLocalName()
724: {
725: switch (event)
726: {
727: case XMLStreamConstants.START_ELEMENT:
728: case XMLStreamConstants.END_ELEMENT:
729: String qName = buf.toString();
730: int ci = qName.indexOf(':');
731: return (ci == -1) ? qName : qName.substring(ci + 1);
732: default:
733: return null;
734: }
735: }
736:
737: public Location getLocation()
738: {
739: return input;
740: }
741:
742: public QName getName()
743: {
744: switch (event)
745: {
746: case XMLStreamConstants.START_ELEMENT:
747: case XMLStreamConstants.END_ELEMENT:
748: String qName = buf.toString();
749: int ci = qName.indexOf(':');
750: String localName = (ci == -1) ? qName : qName.substring(ci + 1);
751: String prefix = (ci == -1) ?
752: (namespaceAware ? XMLConstants.DEFAULT_NS_PREFIX : null) :
753: qName.substring(0, ci);
754: String namespaceURI = getNamespaceURI(prefix);
755: return new QName(namespaceURI, localName, prefix);
756: default:
757: return null;
758: }
759: }
760:
761: public int getNamespaceCount()
762: {
763: if (!namespaceAware || namespaces.isEmpty())
764: return 0;
765: switch (event)
766: {
767: case XMLStreamConstants.START_ELEMENT:
768: case XMLStreamConstants.END_ELEMENT:
769: LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
770: return ctx.size();
771: default:
772: return 0;
773: }
774: }
775:
776: public String getNamespacePrefix(int index)
777: {
778: LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
779: int count = 0;
780: for (Iterator i = ctx.keySet().iterator(); i.hasNext(); )
781: {
782: String prefix = (String) i.next();
783: if (count++ == index)
784: return prefix;
785: }
786: return null;
787: }
788:
789: public String getNamespaceURI()
790: {
791: switch (event)
792: {
793: case XMLStreamConstants.START_ELEMENT:
794: case XMLStreamConstants.END_ELEMENT:
795: String qName = buf.toString();
796: int ci = qName.indexOf(':');
797: if (ci == -1)
798: return null;
799: String prefix = qName.substring(0, ci);
800: return getNamespaceURI(prefix);
801: default:
802: return null;
803: }
804: }
805:
806: public String getNamespaceURI(int index)
807: {
808: LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
809: int count = 0;
810: for (Iterator i = ctx.values().iterator(); i.hasNext(); )
811: {
812: String uri = (String) i.next();
813: if (count++ == index)
814: return uri;
815: }
816: return null;
817: }
818:
819: public String getPIData()
820: {
821: return piData;
822: }
823:
824: public String getPITarget()
825: {
826: return piTarget;
827: }
828:
829: public String getPrefix()
830: {
831: switch (event)
832: {
833: case XMLStreamConstants.START_ELEMENT:
834: case XMLStreamConstants.END_ELEMENT:
835: String qName = buf.toString();
836: int ci = qName.indexOf(':');
837: return (ci == -1) ?
838: (namespaceAware ? XMLConstants.DEFAULT_NS_PREFIX : null) :
839: qName.substring(0, ci);
840: default:
841: return null;
842: }
843: }
844:
845: public Object getProperty(String name)
846: throws IllegalArgumentException
847: {
848: if (name == null)
849: throw new IllegalArgumentException("name is null");
850: if (XMLInputFactory.ALLOCATOR.equals(name))
851: return null;
852: if (XMLInputFactory.IS_COALESCING.equals(name))
853: return coalescing ? Boolean.TRUE : Boolean.FALSE;
854: if (XMLInputFactory.IS_NAMESPACE_AWARE.equals(name))
855: return namespaceAware ? Boolean.TRUE : Boolean.FALSE;
856: if (XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES.equals(name))
857: return replaceERefs ? Boolean.TRUE : Boolean.FALSE;
858: if (XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES.equals(name))
859: return externalEntities ? Boolean.TRUE : Boolean.FALSE;
860: if (XMLInputFactory.IS_VALIDATING.equals(name))
861: return Boolean.FALSE;
862: if (XMLInputFactory.REPORTER.equals(name))
863: return reporter;
864: if (XMLInputFactory.RESOLVER.equals(name))
865: return resolver;
866: if (XMLInputFactory.SUPPORT_DTD.equals(name))
867: return supportDTD ? Boolean.TRUE : Boolean.FALSE;
868: if ("gnu.xml.stream.stringInterning".equals(name))
869: return stringInterning ? Boolean.TRUE : Boolean.FALSE;
870: if ("gnu.xml.stream.xmlBase".equals(name))
871: return baseAware ? Boolean.TRUE : Boolean.FALSE;
872: if ("gnu.xml.stream.baseURI".equals(name))
873: return getXMLBase();
874: return null;
875: }
876:
877: public String getText()
878: {
879: return buf.toString();
880: }
881:
882: public char[] getTextCharacters()
883: {
884: return buf.toString().toCharArray();
885: }
886:
887: public int getTextCharacters(int sourceStart, char[] target,
888: int targetStart, int length)
889: throws XMLStreamException
890: {
891: length = Math.min(sourceStart + buf.length(), length);
892: int sourceEnd = sourceStart + length;
893: buf.getChars(sourceStart, sourceEnd, target, targetStart);
894: return length;
895: }
896:
897: public int getTextLength()
898: {
899: return buf.length();
900: }
901:
902: public int getTextStart()
903: {
904: return 0;
905: }
906:
907: public String getVersion()
908: {
909: return (xmlVersion == null) ? "1.0" : xmlVersion;
910: }
911:
912: public boolean hasName()
913: {
914: switch (event)
915: {
916: case XMLStreamConstants.START_ELEMENT:
917: case XMLStreamConstants.END_ELEMENT:
918: return true;
919: default:
920: return false;
921: }
922: }
923:
924: public boolean hasText()
925: {
926: switch (event)
927: {
928: case XMLStreamConstants.CHARACTERS:
929: case XMLStreamConstants.SPACE:
930: return true;
931: default:
932: return false;
933: }
934: }
935:
936: public boolean isAttributeSpecified(int index)
937: {
938: Attribute a = (Attribute) attrs.get(index);
939: return a.specified;
940: }
941:
942: public boolean isCharacters()
943: {
944: return (event == XMLStreamConstants.CHARACTERS);
945: }
946:
947: public boolean isEndElement()
948: {
949: return (event == XMLStreamConstants.END_ELEMENT);
950: }
951:
952: public boolean isStandalone()
953: {
954: return Boolean.TRUE.equals(xmlStandalone);
955: }
956:
957: public boolean isStartElement()
958: {
959: return (event == XMLStreamConstants.START_ELEMENT);
960: }
961:
962: public boolean isWhiteSpace()
963: {
964: return (event == XMLStreamConstants.SPACE);
965: }
966:
967: public int nextTag()
968: throws XMLStreamException
969: {
970: do
971: {
972: switch (next())
973: {
974: case XMLStreamConstants.START_ELEMENT:
975: case XMLStreamConstants.END_ELEMENT:
976: case XMLStreamConstants.CHARACTERS:
977: case XMLStreamConstants.SPACE:
978: case XMLStreamConstants.COMMENT:
979: case XMLStreamConstants.PROCESSING_INSTRUCTION:
980: break;
981: default:
982: throw new XMLStreamException("Unexpected event type: " + event);
983: }
984: }
985: while (event != XMLStreamConstants.START_ELEMENT &&
986: event != XMLStreamConstants.END_ELEMENT);
987: return event;
988: }
989:
990: public void require(int type, String namespaceURI, String localName)
991: throws XMLStreamException
992: {
993: if (event != type)
994: throw new XMLStreamException("Current event type is " + event);
995: if (event == XMLStreamConstants.START_ELEMENT ||
996: event == XMLStreamConstants.END_ELEMENT)
997: {
998: String ln = getLocalName();
999: if (!ln.equals(localName))
1000: throw new XMLStreamException("Current local-name is " + ln);
1001: String uri = getNamespaceURI();
1002: if ((uri == null && namespaceURI != null) ||
1003: (uri != null && !uri.equals(namespaceURI)))
1004: throw new XMLStreamException("Current namespace URI is " + uri);
1005: }
1006: }
1007:
1008: public boolean standaloneSet()
1009: {
1010: return (xmlStandalone != null);
1011: }
1012:
1013: public boolean hasNext()
1014: throws XMLStreamException
1015: {
1016: if (event == XMLStreamConstants.END_DOCUMENT)
1017: return false;
1018: if (!lookahead)
1019: {
1020: next();
1021: lookahead = true;
1022: }
1023: return event != -1;
1024: }
1025:
1026: public int next()
1027: throws XMLStreamException
1028: {
1029: if (lookahead)
1030: {
1031: lookahead = false;
1032: return event;
1033: }
1034: if (event == XMLStreamConstants.END_ELEMENT)
1035: {
1036:
1037: if (namespaceAware && !namespaces.isEmpty())
1038: namespaces.removeFirst();
1039:
1040: if (baseAware && !bases.isEmpty())
1041: bases.removeFirst();
1042: }
1043: if (!startEntityStack.isEmpty())
1044: {
1045: String entityName = (String) startEntityStack.removeFirst();
1046: buf.setLength(0);
1047: buf.append(entityName);
1048: event = START_ENTITY;
1049: return extendedEventTypes ? event : next();
1050: }
1051: else if (!endEntityStack.isEmpty())
1052: {
1053: String entityName = (String) endEntityStack.removeFirst();
1054: buf.setLength(0);
1055: buf.append(entityName);
1056: event = END_ENTITY;
1057: return extendedEventTypes ? event : next();
1058: }
1059: try
1060: {
1061: if (!input.initialized)
1062: input.init();
1063: switch (state)
1064: {
1065: case CONTENT:
1066: if (tryRead(TEST_END_ELEMENT))
1067: {
1068: readEndElement();
1069: if (stack.isEmpty())
1070: state = MISC;
1071: event = XMLStreamConstants.END_ELEMENT;
1072: }
1073: else if (tryRead(TEST_COMMENT))
1074: {
1075: readComment(false);
1076: event = XMLStreamConstants.COMMENT;
1077: }
1078: else if (tryRead(TEST_PI))
1079: {
1080: readPI(false);
1081: event = XMLStreamConstants.PROCESSING_INSTRUCTION;
1082: }
1083: else if (tryRead(TEST_CDATA))
1084: {
1085: readCDSect();
1086: event = XMLStreamConstants.CDATA;
1087: }
1088: else if (tryRead(TEST_START_ELEMENT))
1089: {
1090: state = readStartElement();
1091: event = XMLStreamConstants.START_ELEMENT;
1092: }
1093: else
1094: {
1095:
1096: mark(8);
1097: int c = readCh();
1098: if (c == 0x26)
1099: {
1100: c = readCh();
1101: if (c == 0x23)
1102: {
1103: reset();
1104: event = readCharData(null);
1105: }
1106: else
1107: {
1108:
1109: reset();
1110: readCh();
1111: readReference();
1112: String ref = buf.toString();
1113: String text = (String) PREDEFINED_ENTITIES.get(ref);
1114: if (text != null)
1115: {
1116: event = readCharData(text);
1117: }
1118: else if (replaceERefs && !isUnparsedEntity(ref))
1119: {
1120:
1121: boolean external = false;
1122: if (doctype != null)
1123: {
1124: Object entity = doctype.getEntity(ref);
1125: if (entity instanceof ExternalIds)
1126: external = true;
1127: }
1128: expandEntity(ref, false, external);
1129: event = next();
1130: }
1131: else
1132: {
1133: event = XMLStreamConstants.ENTITY_REFERENCE;
1134: }
1135: }
1136: }
1137: else
1138: {
1139: reset();
1140: event = readCharData(null);
1141: if (validating && doctype != null)
1142: validatePCData(buf.toString());
1143: }
1144: }
1145: break;
1146: case EMPTY_ELEMENT:
1147: String elementName = (String) stack.removeLast();
1148: buf.setLength(0);
1149: buf.append(elementName);
1150: state = stack.isEmpty() ? MISC : CONTENT;
1151: event = XMLStreamConstants.END_ELEMENT;
1152: if (validating && doctype != null)
1153: endElementValidationHook();
1154: break;
1155: case INIT:
1156: if (tryRead(TEST_XML_DECL))
1157: readXMLDecl();
1158: input.finalizeEncoding();
1159: event = XMLStreamConstants.START_DOCUMENT;
1160: state = PROLOG;
1161: break;
1162: case PROLOG:
1163: skipWhitespace();
1164: if (doctype == null && tryRead(TEST_DOCTYPE_DECL))
1165: {
1166: readDoctypeDecl();
1167: event = XMLStreamConstants.DTD;
1168: }
1169: else if (tryRead(TEST_COMMENT))
1170: {
1171: readComment(false);
1172: event = XMLStreamConstants.COMMENT;
1173: }
1174: else if (tryRead(TEST_PI))
1175: {
1176: readPI(false);
1177: event = XMLStreamConstants.PROCESSING_INSTRUCTION;
1178: }
1179: else if (tryRead(TEST_START_ELEMENT))
1180: {
1181: state = readStartElement();
1182: event = XMLStreamConstants.START_ELEMENT;
1183: }
1184: else
1185: {
1186: int c = readCh();
1187: error("no root element: U+" + Integer.toHexString(c));
1188: }
1189: break;
1190: case MISC:
1191: skipWhitespace();
1192: if (tryRead(TEST_COMMENT))
1193: {
1194: readComment(false);
1195: event = XMLStreamConstants.COMMENT;
1196: }
1197: else if (tryRead(TEST_PI))
1198: {
1199: readPI(false);
1200: event = XMLStreamConstants.PROCESSING_INSTRUCTION;
1201: }
1202: else
1203: {
1204: if (event == XMLStreamConstants.END_DOCUMENT)
1205: throw new NoSuchElementException();
1206: int c = readCh();
1207: if (c != -1)
1208: error("Only comments and PIs may appear after " +
1209: "the root element");
1210: event = XMLStreamConstants.END_DOCUMENT;
1211: }
1212: break;
1213: default:
1214: event = -1;
1215: }
1216: return event;
1217: }
1218: catch (IOException e)
1219: {
1220: XMLStreamException e2 = new XMLStreamException();
1221: e2.initCause(e);
1222: throw e2;
1223: }
1224: }
1225:
1226:
1227:
1228:
1231: String getCurrentElement()
1232: {
1233: return (String) stack.getLast();
1234: }
1235:
1236:
1237:
1238: private void mark(int limit)
1239: throws IOException
1240: {
1241: input.mark(limit);
1242: }
1243:
1244: private void reset()
1245: throws IOException
1246: {
1247: input.reset();
1248: }
1249:
1250: private int read()
1251: throws IOException
1252: {
1253: return input.read();
1254: }
1255:
1256: private int read(int[] b, int off, int len)
1257: throws IOException
1258: {
1259: return input.read(b, off, len);
1260: }
1261:
1262:
1265: private int readCh()
1266: throws IOException, XMLStreamException
1267: {
1268: int c = read();
1269: if (expandPE && c == 0x25)
1270: {
1271: if (peIsError)
1272: error("PE reference within decl in internal subset.");
1273: expandPEReference();
1274: return readCh();
1275: }
1276: return c;
1277: }
1278:
1279:
1285: private void require(char delim)
1286: throws IOException, XMLStreamException
1287: {
1288: mark(1);
1289: int c = readCh();
1290: if (delim != c)
1291: {
1292: reset();
1293: error("required character (got U+" + Integer.toHexString(c) + ")",
1294: new Character(delim));
1295: }
1296: }
1297:
1298:
1304: private void require(String delim)
1305: throws IOException, XMLStreamException
1306: {
1307: char[] chars = delim.toCharArray();
1308: int len = chars.length;
1309: mark(len);
1310: int off = 0;
1311: do
1312: {
1313: int l2 = read(tmpBuf, off, len - off);
1314: if (l2 == -1)
1315: {
1316: reset();
1317: error("EOF before required string", delim);
1318: }
1319: off += l2;
1320: }
1321: while (off < len);
1322: for (int i = 0; i < chars.length; i++)
1323: {
1324: if (chars[i] != tmpBuf[i])
1325: {
1326: reset();
1327: error("required string", delim);
1328: }
1329: }
1330: }
1331:
1332:
1337: private boolean tryRead(char delim)
1338: throws IOException, XMLStreamException
1339: {
1340: mark(1);
1341: int c = readCh();
1342: if (delim != c)
1343: {
1344: reset();
1345: return false;
1346: }
1347: return true;
1348: }
1349:
1350:
1357: private boolean tryRead(String test)
1358: throws IOException
1359: {
1360: char[] chars = test.toCharArray();
1361: int len = chars.length;
1362: mark(len);
1363: int count = 0;
1364: int l2 = read(tmpBuf, 0, len);
1365: if (l2 == -1)
1366: {
1367: reset();
1368: return false;
1369: }
1370: count += l2;
1371:
1372: for (int i = 0; i < count; i++)
1373: {
1374: if (chars[i] != tmpBuf[i])
1375: {
1376: reset();
1377: return false;
1378: }
1379: }
1380: while (count < len)
1381: {
1382:
1383: int c = read();
1384: if (c == -1)
1385: {
1386: reset();
1387: return false;
1388: }
1389: tmpBuf[count] = (char) c;
1390:
1391: if (chars[count] != tmpBuf[count])
1392: {
1393: reset();
1394: return false;
1395: }
1396: count++;
1397: }
1398: return true;
1399: }
1400:
1401:
1405: private void readUntil(String delim)
1406: throws IOException, XMLStreamException
1407: {
1408: int startLine = input.line;
1409: try
1410: {
1411: while (!tryRead(delim))
1412: {
1413: int c = readCh();
1414: if (c == -1)
1415: throw new EOFException();
1416: else if (input.xml11)
1417: {
1418: if (!isXML11Char(c) || isXML11RestrictedChar(c))
1419: error("illegal XML 1.1 character",
1420: "U+" + Integer.toHexString(c));
1421: }
1422: else if (!isChar(c))
1423: error("illegal XML character",
1424: "U+" + Integer.toHexString(c));
1425: buf.append(Character.toChars(c));
1426: }
1427: }
1428: catch (EOFException e)
1429: {
1430: error("end of input while looking for delimiter "+
1431: "(started on line " + startLine + ')', delim);
1432: }
1433: }
1434:
1435:
1439: private boolean tryWhitespace()
1440: throws IOException, XMLStreamException
1441: {
1442: boolean white;
1443: boolean ret = false;
1444: do
1445: {
1446: mark(1);
1447: int c = readCh();
1448: while (c == -1 && inputStack.size() > 1)
1449: {
1450: popInput();
1451: c = readCh();
1452: }
1453: white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d);
1454: if (white)
1455: ret = true;
1456: }
1457: while (white);
1458: reset();
1459: return ret;
1460: }
1461:
1462:
1465: private void skipWhitespace()
1466: throws IOException, XMLStreamException
1467: {
1468: boolean white;
1469: do
1470: {
1471: mark(1);
1472: int c = readCh();
1473: while (c == -1 && inputStack.size() > 1)
1474: {
1475: popInput();
1476: c = readCh();
1477: }
1478: white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d);
1479: }
1480: while (white);
1481: reset();
1482: }
1483:
1484:
1488: private void requireWhitespace()
1489: throws IOException, XMLStreamException
1490: {
1491: if (!tryWhitespace())
1492: error("whitespace required");
1493: }
1494:
1495:
1498: String getXMLBase()
1499: {
1500: if (baseAware)
1501: {
1502: for (Iterator i = bases.iterator(); i.hasNext(); )
1503: {
1504: String base = (String) i.next();
1505: if (base != null)
1506: return base;
1507: }
1508: }
1509: return input.systemId;
1510: }
1511:
1512:
1515: private void pushInput(String name, String text, boolean report,
1516: boolean normalize)
1517: throws IOException, XMLStreamException
1518: {
1519:
1520: if (name != null && !"".equals(name))
1521: {
1522: for (Iterator i = inputStack.iterator(); i.hasNext(); )
1523: {
1524: Input ctx = (Input) i.next();
1525: if (name.equals(ctx.name))
1526: error("entities may not be self-recursive", name);
1527: }
1528: }
1529: else
1530: report = false;
1531: pushInput(new Input(null, new StringReader(text), input.publicId,
1532: input.systemId, name, input.inputEncoding, report,
1533: normalize));
1534: }
1535:
1536:
1539: private void pushInput(String name, ExternalIds ids, boolean report,
1540: boolean normalize)
1541: throws IOException, XMLStreamException
1542: {
1543: if (!externalEntities)
1544: return;
1545: String url = canonicalize(absolutize(input.systemId, ids.systemId));
1546:
1547: for (Iterator i = inputStack.iterator(); i.hasNext(); )
1548: {
1549: Input ctx = (Input) i.next();
1550: if (url.equals(ctx.systemId))
1551: error("entities may not be self-recursive", url);
1552: if (name != null && !"".equals(name) && name.equals(ctx.name))
1553: error("entities may not be self-recursive", name);
1554: }
1555: if (name == null || "".equals(name))
1556: report = false;
1557: InputStream in = null;
1558: if (resolver != null)
1559: {
1560: Object obj = resolver.resolveEntity(ids.publicId, url, getXMLBase(),
1561: null);
1562: if (obj instanceof InputStream)
1563: in = (InputStream) obj;
1564: }
1565: if (in == null)
1566: in = resolve(url);
1567: if (in == null)
1568: error("unable to resolve external entity",
1569: (ids.systemId != null) ? ids.systemId : ids.publicId);
1570: pushInput(new Input(in, null, ids.publicId, url, name, null, report,
1571: normalize));
1572: input.init();
1573: if (tryRead(TEST_XML_DECL))
1574: readTextDecl();
1575: input.finalizeEncoding();
1576: }
1577:
1578:
1581: private void pushInput(Input input)
1582: {
1583: if (input.report)
1584: startEntityStack.addFirst(input.name);
1585: inputStack.addLast(input);
1586: if (this.input != null)
1587: input.xml11 = this.input.xml11;
1588: this.input = input;
1589: }
1590:
1591:
1596: static String canonicalize(String url)
1597: {
1598: if (url == null)
1599: return null;
1600: if (url.startsWith("file:") && !url.startsWith("file://"))
1601: url = "file://" + url.substring(5);
1602: return url;
1603: }
1604:
1605:
1610: public static String absolutize(String base, String href)
1611: {
1612: if (href == null)
1613: return null;
1614: int ci = href.indexOf(':');
1615: if (ci > 1 && isURLScheme(href.substring(0, ci)))
1616: {
1617:
1618: return href;
1619: }
1620: if (base == null)
1621: base = "";
1622: else
1623: {
1624: int i = base.lastIndexOf('/');
1625: if (i != -1)
1626: base = base.substring(0, i + 1);
1627: else
1628: base = "";
1629: }
1630: if ("".equals(base))
1631: {
1632:
1633: base = System.getProperty("user.dir");
1634: if (base.charAt(0) == '/')
1635: base = base.substring(1);
1636: base = "file:///" + base.replace(File.separatorChar, '/');
1637: if (!base.endsWith("/"))
1638: base += "/";
1639: }
1640:
1641:
1642:
1643:
1644:
1645: String basePrefix = null;
1646: ci = base.indexOf(':');
1647: if (ci > 1 && isURLScheme(base.substring(0, ci)))
1648: {
1649: if (base.length() > (ci + 3) &&
1650: base.charAt(ci + 1) == '/' &&
1651: base.charAt(ci + 2) == '/')
1652: {
1653: int si = base.indexOf('/', ci + 3);
1654: if (si == -1)
1655: base = null;
1656: else
1657: {
1658: basePrefix = base.substring(0, si);
1659: base = base.substring(si);
1660: }
1661: }
1662: else
1663: base = null;
1664: }
1665: if (base == null)
1666: return href;
1667: if (href.startsWith("/"))
1668: return (basePrefix == null) ? href : basePrefix + href;
1669:
1670: if (!base.endsWith("/"))
1671: {
1672: int lsi = base.lastIndexOf('/');
1673: if (lsi == -1)
1674: base = "/";
1675: else
1676: base = base.substring(0, lsi + 1);
1677: }
1678: while (href.startsWith("../") || href.startsWith("./"))
1679: {
1680: if (href.startsWith("../"))
1681: {
1682:
1683: int lsi = base.lastIndexOf('/', base.length() - 2);
1684: if (lsi > -1)
1685: base = base.substring(0, lsi + 1);
1686: href = href.substring(3);
1687: }
1688: else
1689: {
1690: href = href.substring(2);
1691: }
1692: }
1693: return (basePrefix == null) ? base + href : basePrefix + base + href;
1694: }
1695:
1696:
1701: private static boolean isURLScheme(String text)
1702: {
1703: int len = text.length();
1704: for (int i = 0; i < len; i++)
1705: {
1706: char c = text.charAt(i);
1707: if (c == '+' || c == '.' || c == '-')
1708: continue;
1709: if (c < 65 || (c > 90 && c < 97) || c > 122)
1710: return false;
1711: }
1712: return true;
1713: }
1714:
1715:
1718: static InputStream resolve(String url)
1719: throws IOException
1720: {
1721: try
1722: {
1723: return new URL(url).openStream();
1724: }
1725: catch (MalformedURLException e)
1726: {
1727: return null;
1728: }
1729: catch (IOException e)
1730: {
1731: IOException e2 = new IOException("error resolving " + url);
1732: e2.initCause(e);
1733: throw e2;
1734: }
1735: }
1736:
1737:
1740: private void popInput()
1741: {
1742: Input old = (Input) inputStack.removeLast();
1743: if (old.report)
1744: endEntityStack.addFirst(old.name);
1745: input = (Input) inputStack.getLast();
1746: }
1747:
1748:
1751: private void readTextDecl()
1752: throws IOException, XMLStreamException
1753: {
1754: final int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
1755: requireWhitespace();
1756: if (tryRead("version"))
1757: {
1758: readEq();
1759: String v = readLiteral(flags, false);
1760: if ("1.0".equals(v))
1761: input.xml11 = false;
1762: else if ("1.1".equals(v))
1763: {
1764: Input i1 = (Input) inputStack.getFirst();
1765: if (!i1.xml11)
1766: error("external entity specifies later version number");
1767: input.xml11 = true;
1768: }
1769: else
1770: throw new XMLStreamException("illegal XML version: " + v);
1771: requireWhitespace();
1772: }
1773: require("encoding");
1774: readEq();
1775: String enc = readLiteral(flags, false);
1776: skipWhitespace();
1777: require("?>");
1778: input.setInputEncoding(enc);
1779: }
1780:
1781:
1784: private void readXMLDecl()
1785: throws IOException, XMLStreamException
1786: {
1787: final int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
1788:
1789: requireWhitespace();
1790: require("version");
1791: readEq();
1792: xmlVersion = readLiteral(flags, false);
1793: if ("1.0".equals(xmlVersion))
1794: input.xml11 = false;
1795: else if ("1.1".equals(xmlVersion))
1796: input.xml11 = true;
1797: else
1798: throw new XMLStreamException("illegal XML version: " + xmlVersion);
1799:
1800: boolean white = tryWhitespace();
1801:
1802: if (tryRead("encoding"))
1803: {
1804: if (!white)
1805: error("whitespace required before 'encoding='");
1806: readEq();
1807: xmlEncoding = readLiteral(flags, false);
1808: white = tryWhitespace();
1809: }
1810:
1811: if (tryRead("standalone"))
1812: {
1813: if (!white)
1814: error("whitespace required before 'standalone='");
1815: readEq();
1816: String standalone = readLiteral(flags, false);
1817: if ("yes".equals(standalone))
1818: xmlStandalone = Boolean.TRUE;
1819: else if ("no".equals(standalone))
1820: xmlStandalone = Boolean.FALSE;
1821: else
1822: error("standalone flag must be 'yes' or 'no'", standalone);
1823: }
1824:
1825: skipWhitespace();
1826: require("?>");
1827: if (xmlEncoding != null)
1828: input.setInputEncoding(xmlEncoding);
1829: }
1830:
1831:
1834: private void readDoctypeDecl()
1835: throws IOException, XMLStreamException
1836: {
1837: if (!supportDTD)
1838: error("parser was configured not to support DTDs");
1839: requireWhitespace();
1840: String rootName = readNmtoken(true);
1841: skipWhitespace();
1842: ExternalIds ids = readExternalIds(false, true);
1843: doctype =
1844: this.new Doctype(rootName, ids.publicId, ids.systemId);
1845:
1846:
1847: skipWhitespace();
1848: if (tryRead('['))
1849: {
1850: while (true)
1851: {
1852: expandPE = true;
1853: skipWhitespace();
1854: expandPE = false;
1855: if (tryRead(']'))
1856: break;
1857: else
1858: readMarkupdecl(false);
1859: }
1860: }
1861: skipWhitespace();
1862: require('>');
1863:
1864:
1865: if (ids.systemId != null && externalEntities)
1866: {
1867: pushInput("", ">", false, false);
1868: pushInput("[dtd]", ids, true, true);
1869:
1870: while (true)
1871: {
1872: expandPE = true;
1873: skipWhitespace();
1874: expandPE = false;
1875: mark(1);
1876: int c = readCh();
1877: if (c == 0x3e)
1878: break;
1879: else if (c == -1)
1880: popInput();
1881: else
1882: {
1883: reset();
1884: expandPE = true;
1885: readMarkupdecl(true);
1886: expandPE = true;
1887: }
1888: }
1889: if (inputStack.size() != 2)
1890: error("external subset has unmatched '>'");
1891: popInput();
1892: }
1893: checkDoctype();
1894: if (validating)
1895: validateDoctype();
1896:
1897:
1898: buf.setLength(0);
1899: buf.append(rootName);
1900: }
1901:
1902:
1905: private void checkDoctype()
1906: throws XMLStreamException
1907: {
1908:
1909: }
1910:
1911:
1914: private void readMarkupdecl(boolean inExternalSubset)
1915: throws IOException, XMLStreamException
1916: {
1917: boolean saved = expandPE;
1918: mark(1);
1919: require('<');
1920: reset();
1921: expandPE = false;
1922: if (tryRead(TEST_ELEMENT_DECL))
1923: {
1924: expandPE = saved;
1925: readElementDecl();
1926: }
1927: else if (tryRead(TEST_ATTLIST_DECL))
1928: {
1929: expandPE = saved;
1930: readAttlistDecl();
1931: }
1932: else if (tryRead(TEST_ENTITY_DECL))
1933: {
1934: expandPE = saved;
1935: readEntityDecl(inExternalSubset);
1936: }
1937: else if (tryRead(TEST_NOTATION_DECL))
1938: {
1939: expandPE = saved;
1940: readNotationDecl(inExternalSubset);
1941: }
1942: else if (tryRead(TEST_PI))
1943: {
1944: readPI(true);
1945: expandPE = saved;
1946: }
1947: else if (tryRead(TEST_COMMENT))
1948: {
1949: readComment(true);
1950: expandPE = saved;
1951: }
1952: else if (tryRead("<!["))
1953: {
1954:
1955: expandPE = saved;
1956: if (inputStack.size() < 2)
1957: error("conditional sections illegal in internal subset");
1958: skipWhitespace();
1959: if (tryRead("INCLUDE"))
1960: {
1961: skipWhitespace();
1962: require('[');
1963: skipWhitespace();
1964: while (!tryRead("]]>"))
1965: {
1966: readMarkupdecl(inExternalSubset);
1967: skipWhitespace();
1968: }
1969: }
1970: else if (tryRead("IGNORE"))
1971: {
1972: skipWhitespace();
1973: require('[');
1974: expandPE = false;
1975: for (int nesting = 1; nesting > 0; )
1976: {
1977: int c = readCh();
1978: switch (c)
1979: {
1980: case 0x3c:
1981: if (tryRead("!["))
1982: nesting++;
1983: break;
1984: case 0x5d:
1985: if (tryRead("]>"))
1986: nesting--;
1987: break;
1988: case -1:
1989: throw new EOFException();
1990: }
1991: }
1992: expandPE = saved;
1993: }
1994: else
1995: error("conditional section must begin with INCLUDE or IGNORE");
1996: }
1997: else
1998: error("expected markup declaration");
1999: }
2000:
2001:
2004: private void readElementDecl()
2005: throws IOException, XMLStreamException
2006: {
2007: requireWhitespace();
2008: boolean saved = expandPE;
2009: expandPE = (inputStack.size() > 1);
2010: String name = readNmtoken(true);
2011: expandPE = saved;
2012: requireWhitespace();
2013: readContentspec(name);
2014: skipWhitespace();
2015: require('>');
2016: }
2017:
2018:
2021: private void readContentspec(String elementName)
2022: throws IOException, XMLStreamException
2023: {
2024: if (tryRead("EMPTY"))
2025: doctype.addElementDecl(elementName, "EMPTY", new EmptyContentModel());
2026: else if (tryRead("ANY"))
2027: doctype.addElementDecl(elementName, "ANY", new AnyContentModel());
2028: else
2029: {
2030: ContentModel model;
2031: StringBuffer acc = new StringBuffer();
2032: require('(');
2033: acc.append('(');
2034: skipWhitespace();
2035: if (tryRead("#PCDATA"))
2036: {
2037:
2038: acc.append("#PCDATA");
2039: MixedContentModel mm = new MixedContentModel();
2040: model = mm;
2041: skipWhitespace();
2042: if (tryRead(')'))
2043: {
2044: acc.append(")");
2045: if (tryRead('*'))
2046: {
2047: mm.min = 0;
2048: mm.max = -1;
2049: }
2050: }
2051: else
2052: {
2053: while (!tryRead(")"))
2054: {
2055: require('|');
2056: acc.append('|');
2057: skipWhitespace();
2058: String name = readNmtoken(true);
2059: acc.append(name);
2060: mm.addName(name);
2061: skipWhitespace();
2062: }
2063: require('*');
2064: acc.append(")*");
2065: mm.min = 0;
2066: mm.max = -1;
2067: }
2068: }
2069: else
2070: model = readElements(acc);
2071: doctype.addElementDecl(elementName, acc.toString(), model);
2072: }
2073: }
2074:
2075:
2078: private ElementContentModel readElements(StringBuffer acc)
2079: throws IOException, XMLStreamException
2080: {
2081: int separator;
2082: ElementContentModel model = new ElementContentModel();
2083:
2084:
2085: skipWhitespace();
2086: model.addContentParticle(readContentParticle(acc));
2087:
2088: skipWhitespace();
2089: int c = readCh();
2090: switch (c)
2091: {
2092: case 0x29:
2093: acc.append(')');
2094: mark(1);
2095: c = readCh();
2096: switch (c)
2097: {
2098: case 0x3f:
2099: acc.append('?');
2100: model.min = 0;
2101: model.max = 1;
2102: break;
2103: case 0x2a:
2104: acc.append('*');
2105: model.min = 0;
2106: model.max = -1;
2107: break;
2108: case 0x2b:
2109: acc.append('+');
2110: model.min = 1;
2111: model.max = -1;
2112: break;
2113: default:
2114: reset();
2115: }
2116: return model;
2117: case 0x7c:
2118: model.or = true;
2119:
2120: case 0x2c:
2121: separator = c;
2122: acc.append(Character.toChars(c));
2123: break;
2124: default:
2125: error("bad separator in content model",
2126: "U+" + Integer.toHexString(c));
2127: return model;
2128: }
2129:
2130: while (true)
2131: {
2132: skipWhitespace();
2133: model.addContentParticle(readContentParticle(acc));
2134: skipWhitespace();
2135: c = readCh();
2136: if (c == 0x29)
2137: {
2138: acc.append(')');
2139: break;
2140: }
2141: else if (c != separator)
2142: {
2143: error("bad separator in content model",
2144: "U+" + Integer.toHexString(c));
2145: return model;
2146: }
2147: else
2148: acc.append(c);
2149: }
2150:
2151: mark(1);
2152: c = readCh();
2153: switch (c)
2154: {
2155: case 0x3f:
2156: acc.append('?');
2157: model.min = 0;
2158: model.max = 1;
2159: break;
2160: case 0x2a:
2161: acc.append('*');
2162: model.min = 0;
2163: model.max = -1;
2164: break;
2165: case 0x2b:
2166: acc.append('+');
2167: model.min = 1;
2168: model.max = -1;
2169: break;
2170: default:
2171: reset();
2172: }
2173: return model;
2174: }
2175:
2176:
2179: private ContentParticle readContentParticle(StringBuffer acc)
2180: throws IOException, XMLStreamException
2181: {
2182: ContentParticle cp = new ContentParticle();
2183: if (tryRead('('))
2184: {
2185: acc.append('(');
2186: cp.content = readElements(acc);
2187: }
2188: else
2189: {
2190: String name = readNmtoken(true);
2191: acc.append(name);
2192: cp.content = name;
2193: mark(1);
2194: int c = readCh();
2195: switch (c)
2196: {
2197: case 0x3f:
2198: acc.append('?');
2199: cp.min = 0;
2200: cp.max = 1;
2201: break;
2202: case 0x2a:
2203: acc.append('*');
2204: cp.min = 0;
2205: cp.max = -1;
2206: break;
2207: case 0x2b:
2208: acc.append('+');
2209: cp.min = 1;
2210: cp.max = -1;
2211: break;
2212: default:
2213: reset();
2214: }
2215: }
2216: return cp;
2217: }
2218:
2219:
2222: private void readAttlistDecl()
2223: throws IOException, XMLStreamException
2224: {
2225: requireWhitespace();
2226: boolean saved = expandPE;
2227: expandPE = (inputStack.size() > 1);
2228: String elementName = readNmtoken(true);
2229: expandPE = saved;
2230: boolean white = tryWhitespace();
2231: while (!tryRead('>'))
2232: {
2233: if (!white)
2234: error("whitespace required before attribute definition");
2235: readAttDef(elementName);
2236: white = tryWhitespace();
2237: }
2238: }
2239:
2240:
2243: private void readAttDef(String elementName)
2244: throws IOException, XMLStreamException
2245: {
2246: String name = readNmtoken(true);
2247: requireWhitespace();
2248: StringBuffer acc = new StringBuffer();
2249: HashSet values = new HashSet();
2250: String type = readAttType(acc, values);
2251: if (validating)
2252: {
2253: if ("ID".equals(type))
2254: {
2255:
2256: for (Iterator i = doctype.attlistIterator(elementName);
2257: i.hasNext(); )
2258: {
2259: Map.Entry entry = (Map.Entry) i.next();
2260: AttributeDecl decl = (AttributeDecl) entry.getValue();
2261: if ("ID".equals(decl.type))
2262: error("element types must not have more than one ID " +
2263: "attribute");
2264: }
2265: }
2266: else if ("NOTATION".equals(type))
2267: {
2268:
2269: for (Iterator i = doctype.attlistIterator(elementName);
2270: i.hasNext(); )
2271: {
2272: Map.Entry entry = (Map.Entry) i.next();
2273: AttributeDecl decl = (AttributeDecl) entry.getValue();
2274: if ("NOTATION".equals(decl.type))
2275: error("element types must not have more than one NOTATION " +
2276: "attribute");
2277: }
2278:
2279: ContentModel model = doctype.getElementModel(elementName);
2280: if (model != null && model.type == ContentModel.EMPTY)
2281: error("attributes of type NOTATION must not be declared on an " +
2282: "element declared EMPTY");
2283: }
2284: }
2285: String enumer = null;
2286: if ("ENUMERATION".equals(type) || "NOTATION".equals(type))
2287: enumer = acc.toString();
2288: else
2289: values = null;
2290: requireWhitespace();
2291: readDefault(elementName, name, type, enumer, values);
2292: }
2293:
2294:
2297: private String readAttType(StringBuffer acc, HashSet values)
2298: throws IOException, XMLStreamException
2299: {
2300: if (tryRead('('))
2301: {
2302: readEnumeration(false, acc, values);
2303: return "ENUMERATION";
2304: }
2305: else
2306: {
2307: String typeString = readNmtoken(true);
2308: if ("NOTATION".equals(typeString))
2309: {
2310: readNotationType(acc, values);
2311: return typeString;
2312: }
2313: else if ("CDATA".equals(typeString) ||
2314: "ID".equals(typeString) ||
2315: "IDREF".equals(typeString) ||
2316: "IDREFS".equals(typeString) ||
2317: "ENTITY".equals(typeString) ||
2318: "ENTITIES".equals(typeString) ||
2319: "NMTOKEN".equals(typeString) ||
2320: "NMTOKENS".equals(typeString))
2321: return typeString;
2322: else
2323: {
2324: error("illegal attribute type", typeString);
2325: return null;
2326: }
2327: }
2328: }
2329:
2330:
2333: private void readEnumeration(boolean isNames, StringBuffer acc,
2334: HashSet values)
2335: throws IOException, XMLStreamException
2336: {
2337: acc.append('(');
2338:
2339: skipWhitespace();
2340: String token = readNmtoken(isNames);
2341: acc.append(token);
2342: values.add(token);
2343:
2344: skipWhitespace();
2345: while (!tryRead(')'))
2346: {
2347: require('|');
2348: acc.append('|');
2349: skipWhitespace();
2350: token = readNmtoken(isNames);
2351:
2352: if (validating && values.contains(token))
2353: error("duplicate token", token);
2354: acc.append(token);
2355: values.add(token);
2356: skipWhitespace();
2357: }
2358: acc.append(')');
2359: }
2360:
2361:
2364: private void readNotationType(StringBuffer acc, HashSet values)
2365: throws IOException, XMLStreamException
2366: {
2367: requireWhitespace();
2368: require('(');
2369: readEnumeration(true, acc, values);
2370: }
2371:
2372:
2375: private void readDefault(String elementName, String name,
2376: String type, String enumeration, HashSet values)
2377: throws IOException, XMLStreamException
2378: {
2379: int valueType = ATTRIBUTE_DEFAULT_SPECIFIED;
2380: int flags = LIT_ATTRIBUTE;
2381: String value = null, defaultType = null;
2382: boolean saved = expandPE;
2383:
2384: if (!"CDATA".equals(type))
2385: flags |= LIT_NORMALIZE;
2386:
2387: expandPE = false;
2388: if (tryRead('#'))
2389: {
2390: if (tryRead("FIXED"))
2391: {
2392: defaultType = "#FIXED";
2393: valueType = ATTRIBUTE_DEFAULT_FIXED;
2394: requireWhitespace();
2395: value = readLiteral(flags, false);
2396: }
2397: else if (tryRead("REQUIRED"))
2398: {
2399: defaultType = "#REQUIRED";
2400: valueType = ATTRIBUTE_DEFAULT_REQUIRED;
2401: }
2402: else if (tryRead("IMPLIED"))
2403: {
2404: defaultType = "#IMPLIED";
2405: valueType = ATTRIBUTE_DEFAULT_IMPLIED;
2406: }
2407: else
2408: error("illegal keyword for attribute default value");
2409: }
2410: else
2411: value = readLiteral(flags, false);
2412: expandPE = saved;
2413: if (validating)
2414: {
2415: if ("ID".equals(type))
2416: {
2417:
2418: if (value != null && !isNmtoken(value, true))
2419: error("default value must match Name production", value);
2420:
2421: if (valueType != ATTRIBUTE_DEFAULT_REQUIRED &&
2422: valueType != ATTRIBUTE_DEFAULT_IMPLIED)
2423: error("ID attributes must have a declared default of " +
2424: "#IMPLIED or #REQUIRED");
2425: }
2426: else if (value != null)
2427: {
2428:
2429: if ("IDREF".equals(type) || "ENTITY".equals(type))
2430: {
2431: if (!isNmtoken(value, true))
2432: error("default value must match Name production", value);
2433: }
2434: else if ("IDREFS".equals(type) || "ENTITIES".equals(type))
2435: {
2436: StringTokenizer st = new StringTokenizer(value);
2437: while (st.hasMoreTokens())
2438: {
2439: String token = st.nextToken();
2440: if (!isNmtoken(token, true))
2441: error("default value must match Name production", token);
2442: }
2443: }
2444: else if ("NMTOKEN".equals(type) || "ENUMERATION".equals(type))
2445: {
2446: if (!isNmtoken(value, false))
2447: error("default value must match Nmtoken production", value);
2448: }
2449: else if ("NMTOKENS".equals(type))
2450: {
2451: StringTokenizer st = new StringTokenizer(value);
2452: while (st.hasMoreTokens())
2453: {
2454: String token = st.nextToken();
2455: if (!isNmtoken(token, false))
2456: error("default value must match Nmtoken production",
2457: token);
2458: }
2459: }
2460: }
2461: }
2462:
2463: AttributeDecl attribute =
2464: new AttributeDecl(type, value, valueType, enumeration, values,
2465: inputStack.size() != 1);
2466: doctype.addAttributeDecl(elementName, name, attribute);
2467: }
2468:
2469:
2472: private void readEntityDecl(boolean inExternalSubset)
2473: throws IOException, XMLStreamException
2474: {
2475: int flags = 0;
2476:
2477: boolean peFlag = false;
2478: expandPE = false;
2479: requireWhitespace();
2480: if (tryRead('%'))
2481: {
2482: peFlag = true;
2483: requireWhitespace();
2484: }
2485: expandPE = true;
2486:
2487: String name = readNmtoken(true);
2488: if (name.indexOf(':') != -1)
2489: error("illegal character ':' in entity name", name);
2490: if (peFlag)
2491: name = "%" + name;
2492: requireWhitespace();
2493: mark(1);
2494: int c = readCh();
2495: reset();
2496: if (c == 0x22 || c == 0x27)
2497: {
2498:
2499: String value = readLiteral(flags | LIT_DISABLE_EREF, true);
2500: int ai = value.indexOf('&');
2501: while (ai != -1)
2502: {
2503: int sci = value.indexOf(';', ai);
2504: if (sci == -1)
2505: error("malformed reference in entity value", value);
2506: String ref = value.substring(ai + 1, sci);
2507: int[] cp = UnicodeReader.toCodePointArray(ref);
2508: if (cp.length == 0)
2509: error("malformed reference in entity value", value);
2510: if (cp[0] == 0x23)
2511: {
2512: if (cp.length == 1)
2513: error("malformed reference in entity value", value);
2514: if (cp[1] == 0x78)
2515: {
2516: if (cp.length == 2)
2517: error("malformed reference in entity value", value);
2518: for (int i = 2; i < cp.length; i++)
2519: {
2520: int x = cp[i];
2521: if (x < 0x30 ||
2522: (x > 0x39 && x < 0x41) ||
2523: (x > 0x46 && x < 0x61) ||
2524: x > 0x66)
2525: error("malformed character reference in entity value",
2526: value);
2527: }
2528: }
2529: else
2530: {
2531: for (int i = 1; i < cp.length; i++)
2532: {
2533: int x = cp[i];
2534: if (x < 0x30 || x > 0x39)
2535: error("malformed character reference in entity value",
2536: value);
2537: }
2538: }
2539: }
2540: else
2541: {
2542: if (!isNameStartCharacter(cp[0], input.xml11))
2543: error("malformed reference in entity value", value);
2544: for (int i = 1; i < cp.length; i++)
2545: {
2546: if (!isNameCharacter(cp[i], input.xml11))
2547: error("malformed reference in entity value", value);
2548: }
2549: }
2550: ai = value.indexOf('&', sci);
2551: }
2552: doctype.addEntityDecl(name, value, inExternalSubset);
2553: }
2554: else
2555: {
2556: ExternalIds ids = readExternalIds(false, false);
2557:
2558: boolean white = tryWhitespace();
2559: if (!peFlag && tryRead("NDATA"))
2560: {
2561: if (!white)
2562: error("whitespace required before NDATA");
2563: requireWhitespace();
2564: ids.notationName = readNmtoken(true);
2565: }
2566: doctype.addEntityDecl(name, ids, inExternalSubset);
2567: }
2568:
2569: skipWhitespace();
2570: require('>');
2571: }
2572:
2573:
2576: private void readNotationDecl(boolean inExternalSubset)
2577: throws IOException, XMLStreamException
2578: {
2579: requireWhitespace();
2580: String notationName = readNmtoken(true);
2581: if (notationName.indexOf(':') != -1)
2582: error("illegal character ':' in notation name", notationName);
2583: if (validating)
2584: {
2585:
2586: ExternalIds notation = doctype.getNotation(notationName);
2587: if (notation != null)
2588: error("duplicate notation name", notationName);
2589: }
2590: requireWhitespace();
2591: ExternalIds ids = readExternalIds(true, false);
2592: ids.notationName = notationName;
2593: doctype.addNotationDecl(notationName, ids, inExternalSubset);
2594: skipWhitespace();
2595: require('>');
2596: }
2597:
2598:
2601: private ExternalIds readExternalIds(boolean inNotation, boolean isSubset)
2602: throws IOException, XMLStreamException
2603: {
2604: int c;
2605: int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
2606: ExternalIds ids = new ExternalIds();
2607:
2608: if (tryRead("PUBLIC"))
2609: {
2610: requireWhitespace();
2611: ids.publicId = readLiteral(LIT_NORMALIZE | LIT_PUBID | flags, false);
2612: if (inNotation)
2613: {
2614: skipWhitespace();
2615: mark(1);
2616: c = readCh();
2617: reset();
2618: if (c == 0x22 || c == 0x27)
2619: {
2620: String href = readLiteral(flags, false);
2621: ids.systemId = absolutize(input.systemId, href);
2622: }
2623: }
2624: else
2625: {
2626: requireWhitespace();
2627: String href = readLiteral(flags, false);
2628: ids.systemId = absolutize(input.systemId, href);
2629: }
2630:
2631: for (int i = 0; i < ids.publicId.length(); i++)
2632: {
2633: char d = ids.publicId.charAt(i);
2634: if (d >= 'a' && d <= 'z')
2635: continue;
2636: if (d >= 'A' && d <= 'Z')
2637: continue;
2638: if (" \r\n0123456789-' ()+,./:=?;!*#@$_%".indexOf(d) != -1)
2639: continue;
2640: error("illegal PUBLIC id character",
2641: "U+" + Integer.toHexString(d));
2642: }
2643: }
2644: else if (tryRead("SYSTEM"))
2645: {
2646: requireWhitespace();
2647: String href = readLiteral(flags, false);
2648: ids.systemId = absolutize(input.systemId, href);
2649: }
2650: else if (!isSubset)
2651: {
2652: error("missing SYSTEM or PUBLIC keyword");
2653: }
2654: if (ids.systemId != null && !inNotation)
2655: {
2656: if (ids.systemId.indexOf('#') != -1)
2657: error("SYSTEM id has a URI fragment", ids.systemId);
2658: }
2659: return ids;
2660: }
2661:
2662:
2666: private int readStartElement()
2667: throws IOException, XMLStreamException
2668: {
2669:
2670: String elementName = readNmtoken(true);
2671: attrs.clear();
2672:
2673: if (namespaceAware)
2674: {
2675: if (elementName.charAt(0) == ':' ||
2676: elementName.charAt(elementName.length() - 1) == ':')
2677: error("not a QName", elementName);
2678: namespaces.addFirst(new LinkedHashMap());
2679: }
2680:
2681: boolean white = tryWhitespace();
2682: mark(1);
2683: int c = readCh();
2684: while (c != 0x2f && c != 0x3e)
2685: {
2686:
2687: reset();
2688: if (!white)
2689: error("need whitespace between attributes");
2690: readAttribute(elementName);
2691: white = tryWhitespace();
2692: mark(1);
2693: c = readCh();
2694: }
2695:
2696: if (doctype != null)
2697: {
2698: for (Iterator i = doctype.attlistIterator(elementName); i.hasNext(); )
2699: {
2700: Map.Entry entry = (Map.Entry) i.next();
2701: String attName = (String) entry.getKey();
2702: AttributeDecl decl = (AttributeDecl) entry.getValue();
2703: if (validating)
2704: {
2705: switch (decl.valueType)
2706: {
2707: case ATTRIBUTE_DEFAULT_REQUIRED:
2708:
2709: if (decl.value == null && !attributeSpecified(attName))
2710: error("value for " + attName + " attribute is required");
2711: break;
2712: case ATTRIBUTE_DEFAULT_FIXED:
2713:
2714: for (Iterator j = attrs.iterator(); j.hasNext(); )
2715: {
2716: Attribute a = (Attribute) j.next();
2717: if (attName.equals(a.name) &&
2718: !decl.value.equals(a.value))
2719: error("value for " + attName + " attribute must be " +
2720: decl.value);
2721: }
2722: break;
2723: }
2724: }
2725: if (namespaceAware && attName.equals("xmlns"))
2726: {
2727: LinkedHashMap ctx =
2728: (LinkedHashMap) namespaces.getFirst();
2729: if (ctx.containsKey(XMLConstants.DEFAULT_NS_PREFIX))
2730: continue;
2731: }
2732: else if (namespaceAware && attName.startsWith("xmlns:"))
2733: {
2734: LinkedHashMap ctx =
2735: (LinkedHashMap) namespaces.getFirst();
2736: if (ctx.containsKey(attName.substring(6)))
2737: continue;
2738: }
2739: else if (attributeSpecified(attName))
2740: continue;
2741: if (decl.value == null)
2742: continue;
2743:
2744: if (validating && decl.external && xmlStandalone == Boolean.TRUE)
2745: error("standalone must be 'no' if attributes inherit values " +
2746: "from externally declared markup declarations");
2747: Attribute attr =
2748: new Attribute(attName, decl.type, false, decl.value);
2749: if (namespaceAware)
2750: {
2751: if (!addNamespace(attr))
2752: attrs.add(attr);
2753: }
2754: else
2755: attrs.add(attr);
2756: }
2757: }
2758: if (baseAware)
2759: {
2760: String uri = getAttributeValue(XMLConstants.XML_NS_URI, "base");
2761: String base = getXMLBase();
2762: bases.addFirst(absolutize(base, uri));
2763: }
2764: if (namespaceAware)
2765: {
2766:
2767: int ci = elementName.indexOf(':');
2768: if (ci != -1)
2769: {
2770: String prefix = elementName.substring(0, ci);
2771: String uri = getNamespaceURI(prefix);
2772: if (uri == null)
2773: error("unbound element prefix", prefix);
2774: else if (input.xml11 && "".equals(uri))
2775: error("XML 1.1 unbound element prefix", prefix);
2776: }
2777: for (Iterator i = attrs.iterator(); i.hasNext(); )
2778: {
2779: Attribute attr = (Attribute) i.next();
2780: if (attr.prefix != null &&
2781: !XMLConstants.XMLNS_ATTRIBUTE.equals(attr.prefix))
2782: {
2783: String uri = getNamespaceURI(attr.prefix);
2784: if (uri == null)
2785: error("unbound attribute prefix", attr.prefix);
2786: else if (input.xml11 && "".equals(uri))
2787: error("XML 1.1 unbound attribute prefix", attr.prefix);
2788: }
2789: }
2790: }
2791: if (validating && doctype != null)
2792: {
2793: validateStartElement(elementName);
2794: currentContentModel = doctype.getElementModel(elementName);
2795: if (currentContentModel == null)
2796: error("no element declaration", elementName);
2797: validationStack.add(new LinkedList());
2798: }
2799:
2800: buf.setLength(0);
2801: buf.append(elementName);
2802:
2803: stack.addLast(elementName);
2804: switch (c)
2805: {
2806: case 0x3e:
2807: return CONTENT;
2808: case 0x2f:
2809: require('>');
2810: return EMPTY_ELEMENT;
2811: }
2812: return -1;
2813: }
2814:
2815:
2819: private boolean attributeSpecified(String attName)
2820: {
2821: for (Iterator j = attrs.iterator(); j.hasNext(); )
2822: {
2823: Attribute a = (Attribute) j.next();
2824: if (attName.equals(a.name))
2825: return true;
2826: }
2827: return false;
2828: }
2829:
2830:
2833: private void readAttribute(String elementName)
2834: throws IOException, XMLStreamException
2835: {
2836:
2837: String attributeName = readNmtoken(true);
2838: String type = getAttributeType(elementName, attributeName);
2839: readEq();
2840:
2841: final int flags = LIT_ATTRIBUTE | LIT_ENTITY_REF;
2842: String value = (type == null || "CDATA".equals(type)) ?
2843: readLiteral(flags, false) : readLiteral(flags | LIT_NORMALIZE, false);
2844:
2845: Attribute attr = this.new Attribute(attributeName, type, true, value);
2846: if (namespaceAware)
2847: {
2848: if (attributeName.charAt(0) == ':' ||
2849: attributeName.charAt(attributeName.length() - 1) == ':')
2850: error("not a QName", attributeName);
2851: else if (attributeName.equals("xmlns"))
2852: {
2853: LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2854: if (ctx.containsKey(XMLConstants.DEFAULT_NS_PREFIX))
2855: error("duplicate default namespace");
2856: }
2857: else if (attributeName.startsWith("xmlns:"))
2858: {
2859: LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2860: if (ctx.containsKey(attributeName.substring(6)))
2861: error("duplicate namespace", attributeName.substring(6));
2862: }
2863: else if (attrs.contains(attr))
2864: error("duplicate attribute", attributeName);
2865: }
2866: else if (attrs.contains(attr))
2867: error("duplicate attribute", attributeName);
2868: if (validating && doctype != null)
2869: {
2870:
2871: AttributeDecl decl =
2872: doctype.getAttributeDecl(elementName, attributeName);
2873: if (decl == null)
2874: error("attribute must be declared", attributeName);
2875: if ("ENUMERATION".equals(decl.type))
2876: {
2877:
2878: if (!decl.values.contains(value))
2879: error("value does not match enumeration " + decl.enumeration,
2880: value);
2881: }
2882: else if ("ID".equals(decl.type))
2883: {
2884:
2885: if (!isNmtoken(value, true))
2886: error("ID values must match the Name production");
2887: if (ids.contains(value))
2888: error("Duplicate ID", value);
2889: ids.add(value);
2890: }
2891: else if ("IDREF".equals(decl.type) || "IDREFS".equals(decl.type))
2892: {
2893: StringTokenizer st = new StringTokenizer(value);
2894: while (st.hasMoreTokens())
2895: {
2896: String token = st.nextToken();
2897:
2898: if (!isNmtoken(token, true))
2899: error("IDREF values must match the Name production");
2900: idrefs.add(token);
2901: }
2902: }
2903: else if ("NMTOKEN".equals(decl.type) || "NMTOKENS".equals(decl.type))
2904: {
2905: StringTokenizer st = new StringTokenizer(value);
2906: while (st.hasMoreTokens())
2907: {
2908: String token = st.nextToken();
2909:
2910: if (!isNmtoken(token, false))
2911: error("NMTOKEN values must match the Nmtoken production");
2912: }
2913: }
2914: else if ("ENTITY".equals(decl.type))
2915: {
2916:
2917: if (!isNmtoken(value, true))
2918: error("ENTITY values must match the Name production");
2919: Object entity = doctype.getEntity(value);
2920: if (entity == null || !(entity instanceof ExternalIds) ||
2921: ((ExternalIds) entity).notationName == null)
2922: error("ENTITY values must match the name of an unparsed " +
2923: "entity declared in the DTD");
2924: }
2925: else if ("NOTATION".equals(decl.type))
2926: {
2927: if (!decl.values.contains(value))
2928: error("NOTATION values must match a declared notation name",
2929: value);
2930:
2931: ExternalIds notation = doctype.getNotation(value);
2932: if (notation == null)
2933: error("NOTATION values must match the name of a notation " +
2934: "declared in the DTD", value);
2935: }
2936: }
2937: if (namespaceAware)
2938: {
2939: if (!addNamespace(attr))
2940: attrs.add(attr);
2941: }
2942: else
2943: attrs.add(attr);
2944: }
2945:
2946:
2951: private boolean addNamespace(Attribute attr)
2952: throws XMLStreamException
2953: {
2954: if ("xmlns".equals(attr.name))
2955: {
2956: LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2957: if (ctx.get(XMLConstants.DEFAULT_NS_PREFIX) != null)
2958: error("Duplicate default namespace declaration");
2959: if (XMLConstants.XML_NS_URI.equals(attr.value))
2960: error("can't bind XML namespace");
2961: ctx.put(XMLConstants.DEFAULT_NS_PREFIX, attr.value);
2962: return true;
2963: }
2964: else if ("xmlns".equals(attr.prefix))
2965: {
2966: LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2967: if (ctx.get(attr.localName) != null)
2968: error("Duplicate namespace declaration for prefix",
2969: attr.localName);
2970: if (XMLConstants.XML_NS_PREFIX.equals(attr.localName))
2971: {
2972: if (!XMLConstants.XML_NS_URI.equals(attr.value))
2973: error("can't redeclare xml prefix");
2974: else
2975: return false;
2976: }
2977: if (XMLConstants.XML_NS_URI.equals(attr.value))
2978: error("can't bind non-xml prefix to XML namespace");
2979: if (XMLConstants.XMLNS_ATTRIBUTE.equals(attr.localName))
2980: error("can't redeclare xmlns prefix");
2981: if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(attr.value))
2982: error("can't bind non-xmlns prefix to XML Namespace namespace");
2983: if ("".equals(attr.value) && !input.xml11)
2984: error("illegal use of 1.1-style prefix unbinding in 1.0 document");
2985: ctx.put(attr.localName, attr.value);
2986: return true;
2987: }
2988: return false;
2989: }
2990:
2991:
2994: private void readEndElement()
2995: throws IOException, XMLStreamException
2996: {
2997:
2998: String expected = (String) stack.removeLast();
2999: require(expected);
3000: skipWhitespace();
3001: require('>');
3002:
3003: buf.setLength(0);
3004: buf.append(expected);
3005: if (validating && doctype != null)
3006: endElementValidationHook();
3007: }
3008:
3009:
3013: private void endElementValidationHook()
3014: throws XMLStreamException
3015: {
3016: validateEndElement();
3017: validationStack.removeLast();
3018: if (stack.isEmpty())
3019: currentContentModel = null;
3020: else
3021: {
3022: String parent = (String) stack.getLast();
3023: currentContentModel = doctype.getElementModel(parent);
3024: }
3025: }
3026:
3027:
3030: private void readComment(boolean inDTD)
3031: throws IOException, XMLStreamException
3032: {
3033: boolean saved = expandPE;
3034: expandPE = false;
3035: buf.setLength(0);
3036: readUntil(TEST_END_COMMENT);
3037: require('>');
3038: expandPE = saved;
3039: if (inDTD)
3040: doctype.addComment(buf.toString());
3041: }
3042:
3043:
3046: private void readPI(boolean inDTD)
3047: throws IOException, XMLStreamException
3048: {
3049: boolean saved = expandPE;
3050: expandPE = false;
3051: piTarget = readNmtoken(true);
3052: if (piTarget.indexOf(':') != -1)
3053: error("illegal character in PI target", new Character(':'));
3054: if ("xml".equalsIgnoreCase(piTarget))
3055: error("illegal PI target", piTarget);
3056: if (tryRead(TEST_END_PI))
3057: piData = null;
3058: else
3059: {
3060: if (!tryWhitespace())
3061: error("whitespace required between PI target and data");
3062: buf.setLength(0);
3063: readUntil(TEST_END_PI);
3064: piData = buf.toString();
3065: }
3066: expandPE = saved;
3067: if (inDTD)
3068: doctype.addPI(piTarget, piData);
3069: }
3070:
3071:
3074: private void readReference()
3075: throws IOException, XMLStreamException
3076: {
3077: buf.setLength(0);
3078: String entityName = readNmtoken(true);
3079: require(';');
3080: buf.setLength(0);
3081: buf.append(entityName);
3082: }
3083:
3084:
3087: private void readCDSect()
3088: throws IOException, XMLStreamException
3089: {
3090: buf.setLength(0);
3091: readUntil(TEST_END_CDATA);
3092: }
3093:
3094:
3098: private int readCharData(String prefix)
3099: throws IOException, XMLStreamException
3100: {
3101: boolean white = true;
3102: buf.setLength(0);
3103: if (prefix != null)
3104: buf.append(prefix);
3105: boolean done = false;
3106: boolean entities = false;
3107: while (!done)
3108: {
3109:
3110: mark(tmpBuf.length);
3111: int len = read(tmpBuf, 0, tmpBuf.length);
3112: if (len == -1)
3113: {
3114: if (inputStack.size() > 1)
3115: {
3116: popInput();
3117:
3118: done = true;
3119: }
3120: else
3121: throw new EOFException();
3122: }
3123: for (int i = 0; i < len && !done; i++)
3124: {
3125: int c = tmpBuf[i];
3126: switch (c)
3127: {
3128: case 0x20:
3129: case 0x09:
3130: case 0x0a:
3131: case 0x0d:
3132: buf.append(Character.toChars(c));
3133: break;
3134: case 0x26:
3135: reset();
3136: read(tmpBuf, 0, i);
3137:
3138: mark(3);
3139: c = readCh();
3140: c = readCh();
3141: if (c == 0x23)
3142: {
3143: mark(1);
3144: c = readCh();
3145: boolean hex = (c == 0x78);
3146: if (!hex)
3147: reset();
3148: char[] ch = readCharacterRef(hex ? 16 : 10);
3149: buf.append(ch, 0, ch.length);
3150: for (int j = 0; j < ch.length; j++)
3151: {
3152: switch (ch[j])
3153: {
3154: case 0x20:
3155: case 0x09:
3156: case 0x0a:
3157: case 0x0d:
3158: break;
3159: default:
3160: white = false;
3161: }
3162: }
3163: }
3164: else
3165: {
3166:
3167: reset();
3168: c = readCh();
3169: String entityName = readNmtoken(true);
3170: require(';');
3171: String text =
3172: (String) PREDEFINED_ENTITIES.get(entityName);
3173: if (text != null)
3174: buf.append(text);
3175: else
3176: {
3177: pushInput("", "&" + entityName + ";", false, false);
3178: done = true;
3179: break;
3180: }
3181: }
3182:
3183: i = -1;
3184: mark(tmpBuf.length);
3185: len = read(tmpBuf, 0, tmpBuf.length);
3186: if (len == -1)
3187: {
3188: if (inputStack.size() > 1)
3189: {
3190: popInput();
3191: done = true;
3192: }
3193: else
3194: throw new EOFException();
3195: }
3196: entities = true;
3197: break;
3198: case 0x3e:
3199: int l = buf.length();
3200: if (l > 1 &&
3201: buf.charAt(l - 1) == ']' &&
3202: buf.charAt(l - 2) == ']')
3203: error("Character data may not contain unescaped ']]>'");
3204: buf.append(Character.toChars(c));
3205: break;
3206: case 0x3c:
3207: reset();
3208:
3209: int count = 0, remaining = i;
3210: do
3211: {
3212: int r = read(tmpBuf, 0, remaining);
3213: count += r;
3214: remaining -= r;
3215: }
3216: while (count < i);
3217: i = len;
3218: if (coalescing && tryRead(TEST_CDATA))
3219: readUntil(TEST_END_CDATA);
3220: else
3221: done = true;
3222: break;
3223: default:
3224: if (input.xml11)
3225: {
3226: if (!isXML11Char(c) || isXML11RestrictedChar(c))
3227: error("illegal XML 1.1 character",
3228: "U+" + Integer.toHexString(c));
3229: }
3230: else if (!isChar(c))
3231: error("illegal XML character",
3232: "U+" + Integer.toHexString(c));
3233: white = false;
3234: buf.append(Character.toChars(c));
3235: }
3236: }
3237:
3238:
3239: if (buf.length() >= 2097152)
3240: done = true;
3241: }
3242: if (entities)
3243: normalizeCRLF(buf);
3244: return white ? XMLStreamConstants.SPACE : XMLStreamConstants.CHARACTERS;
3245: }
3246:
3247:
3250: private void expandEntity(String name, boolean inAttr, boolean normalize)
3251: throws IOException, XMLStreamException
3252: {
3253: if (doctype != null)
3254: {
3255: Object value = doctype.getEntity(name);
3256: if (value != null)
3257: {
3258: if (xmlStandalone == Boolean.TRUE)
3259: {
3260:
3261: if (doctype.isEntityExternal(name))
3262: error("reference to external entity in standalone document");
3263: else if (value instanceof ExternalIds)
3264: {
3265: ExternalIds ids = (ExternalIds) value;
3266: if (ids.notationName != null &&
3267: doctype.isNotationExternal(ids.notationName))
3268: error("reference to external notation in " +
3269: "standalone document");
3270: }
3271: }
3272: if (value instanceof String)
3273: {
3274: String text = (String) value;
3275: if (inAttr && text.indexOf('<') != -1)
3276: error("< in attribute value");
3277: pushInput(name, text, !inAttr, normalize);
3278: }
3279: else if (inAttr)
3280: error("reference to external entity in attribute value", name);
3281: else
3282: pushInput(name, (ExternalIds) value, !inAttr, normalize);
3283: return;
3284: }
3285: }
3286: error("reference to undeclared entity", name);
3287: }
3288:
3289:
3292: private boolean isUnparsedEntity(String name)
3293: {
3294: if (doctype != null)
3295: {
3296: Object value = doctype.getEntity(name);
3297: if (value != null && value instanceof ExternalIds)
3298: return ((ExternalIds) value).notationName != null;
3299: }
3300: return false;
3301: }
3302:
3303:
3306: private void readEq()
3307: throws IOException, XMLStreamException
3308: {
3309: skipWhitespace();
3310: require('=');
3311: skipWhitespace();
3312: }
3313:
3314:
3318: private int literalReadCh(boolean recognizePEs)
3319: throws IOException, XMLStreamException
3320: {
3321: int c = recognizePEs ? readCh() : read();
3322: while (c == -1)
3323: {
3324: if (inputStack.size() > 1)
3325: {
3326: inputStack.removeLast();
3327: input = (Input) inputStack.getLast();
3328:
3329: c = recognizePEs ? readCh() : read();
3330: }
3331: else
3332: throw new EOFException();
3333: }
3334: return c;
3335: }
3336:
3337:
3340: private String readLiteral(int flags, boolean recognizePEs)
3341: throws IOException, XMLStreamException
3342: {
3343: boolean saved = expandPE;
3344: int delim = readCh();
3345: if (delim != 0x27 && delim != 0x22)
3346: error("expected '\"' or \"'\"", "U+" + Integer.toHexString(delim));
3347: literalBuf.setLength(0);
3348: if ((flags & LIT_DISABLE_PE) != 0)
3349: expandPE = false;
3350: boolean entities = false;
3351: int inputStackSize = inputStack.size();
3352: do
3353: {
3354: int c = literalReadCh(recognizePEs);
3355: if (c == delim && inputStackSize == inputStack.size())
3356: break;
3357: switch (c)
3358: {
3359: case 0x0a:
3360: case 0x0d:
3361: if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0)
3362: c = 0x20;
3363: break;
3364: case 0x09:
3365: if ((flags & LIT_ATTRIBUTE) != 0)
3366: c = 0x20;
3367: break;
3368: case 0x26:
3369: mark(2);
3370: c = readCh();
3371: if (c == 0x23)
3372: {
3373: if ((flags & LIT_DISABLE_CREF) != 0)
3374: {
3375: reset();
3376: c = 0x26;
3377: }
3378: else
3379: {
3380: mark(1);
3381: c = readCh();
3382: boolean hex = (c == 0x78);
3383: if (!hex)
3384: reset();
3385: char[] ref = readCharacterRef(hex ? 16 : 10);
3386: for (int i = 0; i < ref.length; i++)
3387: literalBuf.append(ref[i]);
3388: entities = true;
3389: continue;
3390: }
3391: }
3392: else
3393: {
3394: if ((flags & LIT_DISABLE_EREF) != 0)
3395: {
3396: reset();
3397: c = 0x26;
3398: }
3399: else
3400: {
3401: reset();
3402: String entityName = readNmtoken(true);
3403: require(';');
3404: String text =
3405: (String) PREDEFINED_ENTITIES.get(entityName);
3406: if (text != null)
3407: literalBuf.append(text);
3408: else
3409: expandEntity(entityName,
3410: (flags & LIT_ATTRIBUTE) != 0,
3411: true);
3412: entities = true;
3413: continue;
3414: }
3415: }
3416: break;
3417: case 0x3c:
3418: if ((flags & LIT_ATTRIBUTE) != 0)
3419: error("attribute values may not contain '<'");
3420: break;
3421: case -1:
3422: if (inputStack.size() > 1)
3423: {
3424: popInput();
3425: continue;
3426: }
3427: throw new EOFException();
3428: default:
3429: if ((c < 0x0020 || c > 0xfffd) ||
3430: (c >= 0xd800 && c < 0xdc00) ||
3431: (input.xml11 && (c >= 0x007f) &&
3432: (c <= 0x009f) && (c != 0x0085)))
3433: error("illegal character", "U+" + Integer.toHexString(c));
3434: }
3435: literalBuf.append(Character.toChars(c));
3436: }
3437: while (true);
3438: expandPE = saved;
3439: if (entities)
3440: normalizeCRLF(literalBuf);
3441: if ((flags & LIT_NORMALIZE) > 0)
3442: literalBuf = normalize(literalBuf);
3443: return literalBuf.toString();
3444: }
3445:
3446:
3451: private StringBuffer normalize(StringBuffer buf)
3452: {
3453: StringBuffer acc = new StringBuffer();
3454: int len = buf.length();
3455: int avState = 0;
3456: for (int i = 0; i < len; i++)
3457: {
3458: char c = buf.charAt(i);
3459: if (c == ' ')
3460: avState = (avState == 0) ? 0 : 1;
3461: else
3462: {
3463: if (avState == 1)
3464: acc.append(' ');
3465: acc.append(c);
3466: avState = 2;
3467: }
3468: }
3469: return acc;
3470: }
3471:
3472:
3477: private void normalizeCRLF(StringBuffer buf)
3478: {
3479: int len = buf.length() - 1;
3480: for (int i = 0; i < len; i++)
3481: {
3482: char c = buf.charAt(i);
3483: if (c == '\r' && buf.charAt(i + 1) == '\n')
3484: {
3485: buf.deleteCharAt(i--);
3486: len--;
3487: }
3488: }
3489: }
3490:
3491:
3494: private void expandPEReference()
3495: throws IOException, XMLStreamException
3496: {
3497: String name = readNmtoken(true, new StringBuffer());
3498: require(';');
3499: mark(1);
3500: if (doctype != null)
3501: {
3502: String entityName = "%" + name;
3503: Object entity = doctype.getEntity(entityName);
3504: if (entity != null)
3505: {
3506: if (xmlStandalone == Boolean.TRUE)
3507: {
3508: if (doctype.isEntityExternal(entityName))
3509: error("reference to external parameter entity in " +
3510: "standalone document");
3511: }
3512: if (entity instanceof String)
3513: {
3514: pushInput(name, (String) entity, false, input.normalize);
3515:
3516: }
3517: else
3518: {
3519:
3520: pushInput(name, (ExternalIds) entity, false, input.normalize);
3521:
3522: }
3523: }
3524: else
3525: error("reference to undeclared parameter entity", name);
3526: }
3527: else
3528: error("reference to parameter entity without doctype", name);
3529: }
3530:
3531:
3535: private char[] readCharacterRef(int base)
3536: throws IOException, XMLStreamException
3537: {
3538: StringBuffer b = new StringBuffer();
3539: for (int c = readCh(); c != 0x3b && c != -1; c = readCh())
3540: b.append(Character.toChars(c));
3541: try
3542: {
3543: int ord = Integer.parseInt(b.toString(), base);
3544: if (input.xml11)
3545: {
3546: if (!isXML11Char(ord))
3547: error("illegal XML 1.1 character reference " +
3548: "U+" + Integer.toHexString(ord));
3549: }
3550: else
3551: {
3552: if ((ord < 0x20 && !(ord == 0x0a || ord == 0x09 || ord == 0x0d))
3553: || (ord >= 0xd800 && ord <= 0xdfff)
3554: || ord == 0xfffe || ord == 0xffff
3555: || ord > 0x0010ffff)
3556: error("illegal XML character reference " +
3557: "U+" + Integer.toHexString(ord));
3558: }
3559: return Character.toChars(ord);
3560: }
3561: catch (NumberFormatException e)
3562: {
3563: error("illegal characters in character reference", b.toString());
3564: return null;
3565: }
3566: }
3567:
3568:
3572: private String readNmtoken(boolean isName)
3573: throws IOException, XMLStreamException
3574: {
3575: return readNmtoken(isName, nmtokenBuf);
3576: }
3577:
3578:
3583: private String readNmtoken(boolean isName, StringBuffer buf)
3584: throws IOException, XMLStreamException
3585: {
3586: buf.setLength(0);
3587: int c = readCh();
3588: if (isName)
3589: {
3590: if (!isNameStartCharacter(c, input.xml11))
3591: error("not a name start character",
3592: "U+" + Integer.toHexString(c));
3593: }
3594: else
3595: {
3596: if (!isNameCharacter(c, input.xml11))
3597: error("not a name character",
3598: "U+" + Integer.toHexString(c));
3599: }
3600: buf.append(Character.toChars(c));
3601: do
3602: {
3603: mark(1);
3604: c = readCh();
3605: switch (c)
3606: {
3607: case 0x25:
3608: case 0x3c:
3609: case 0x3e:
3610: case 0x26:
3611: case 0x2c:
3612: case 0x7c:
3613: case 0x2a:
3614: case 0x2b:
3615: case 0x3f:
3616: case 0x29:
3617: case 0x3d:
3618: case 0x27:
3619: case 0x22:
3620: case 0x5b:
3621: case 0x20:
3622: case 0x09:
3623: case 0x0a:
3624: case 0x0d:
3625: case 0x3b:
3626: case 0x2f:
3627: case -1:
3628: reset();
3629: return intern(buf.toString());
3630: default:
3631: if (!isNameCharacter(c, input.xml11))
3632: error("not a name character",
3633: "U+" + Integer.toHexString(c));
3634: else
3635: buf.append(Character.toChars(c));
3636: }
3637: }
3638: while (true);
3639: }
3640:
3641:
3644: public static boolean isXML11Char(int c)
3645: {
3646: return ((c >= 0x0001 && c <= 0xD7FF) ||
3647: (c >= 0xE000 && c < 0xFFFE) ||
3648: (c >= 0x10000 && c <= 0x10FFFF));
3649: }
3650:
3651:
3655: public static boolean isXML11RestrictedChar(int c)
3656: {
3657: return ((c >= 0x0001 && c <= 0x0008) ||
3658: (c >= 0x000B && c <= 0x000C) ||
3659: (c >= 0x000E && c <= 0x001F) ||
3660: (c >= 0x007F && c <= 0x0084) ||
3661: (c >= 0x0086 && c <= 0x009F));
3662: }
3663:
3664:
3668: private boolean isNmtoken(String text, boolean isName)
3669: {
3670: try
3671: {
3672: int[] cp = UnicodeReader.toCodePointArray(text);
3673: if (cp.length == 0)
3674: return false;
3675: if (isName)
3676: {
3677: if (!isNameStartCharacter(cp[0], input.xml11))
3678: return false;
3679: }
3680: else
3681: {
3682: if (!isNameCharacter(cp[0], input.xml11))
3683: return false;
3684: }
3685: for (int i = 1; i < cp.length; i++)
3686: {
3687: if (!isNameCharacter(cp[i], input.xml11))
3688: return false;
3689: }
3690: return true;
3691: }
3692: catch (IOException e)
3693: {
3694: return false;
3695: }
3696: }
3697:
3698:
3702: public static boolean isNameStartCharacter(int c, boolean xml11)
3703: {
3704: if (xml11)
3705: return ((c >= 0x0041 && c <= 0x005a) ||
3706: (c >= 0x0061 && c <= 0x007a) ||
3707: c == 0x3a |
3708: c == 0x5f |
3709: (c >= 0xC0 && c <= 0xD6) ||
3710: (c >= 0xD8 && c <= 0xF6) ||
3711: (c >= 0xF8 && c <= 0x2FF) ||
3712: (c >= 0x370 && c <= 0x37D) ||
3713: (c >= 0x37F && c <= 0x1FFF) ||
3714: (c >= 0x200C && c <= 0x200D) ||
3715: (c >= 0x2070 && c <= 0x218F) ||
3716: (c >= 0x2C00 && c <= 0x2FEF) ||
3717: (c >= 0x3001 && c <= 0xD7FF) ||
3718: (c >= 0xF900 && c <= 0xFDCF) ||
3719: (c >= 0xFDF0 && c <= 0xFFFD) ||
3720: (c >= 0x10000 && c <= 0xEFFFF));
3721: else
3722: return (c == 0x5f || c == 0x3a || isLetter(c));
3723: }
3724:
3725:
3729: public static boolean isNameCharacter(int c, boolean xml11)
3730: {
3731: if (xml11)
3732: return ((c >= 0x0041 && c <= 0x005a) ||
3733: (c >= 0x0061 && c <= 0x007a) ||
3734: (c >= 0x0030 && c <= 0x0039) ||
3735: c == 0x3a |
3736: c == 0x5f |
3737: c == 0x2d |
3738: c == 0x2e |
3739: c == 0xB7 |
3740: (c >= 0xC0 && c <= 0xD6) ||
3741: (c >= 0xD8 && c <= 0xF6) ||
3742: (c >= 0xF8 && c <= 0x2FF) ||
3743: (c >= 0x300 && c <= 0x37D) ||
3744: (c >= 0x37F && c <= 0x1FFF) ||
3745: (c >= 0x200C && c <= 0x200D) ||
3746: (c >= 0x203F && c <= 0x2040) ||
3747: (c >= 0x2070 && c <= 0x218F) ||
3748: (c >= 0x2C00 && c <= 0x2FEF) ||
3749: (c >= 0x3001 && c <= 0xD7FF) ||
3750: (c >= 0xF900 && c <= 0xFDCF) ||
3751: (c >= 0xFDF0 && c <= 0xFFFD) ||
3752: (c >= 0x10000 && c <= 0xEFFFF));
3753: else
3754: return (c == 0x2e || c == 0x2d || c == 0x5f || c == 0x3a ||
3755: isLetter(c) || isDigit(c) ||
3756: isCombiningChar(c) || isExtender(c));
3757: }
3758:
3759:
3763: public static boolean isLetter(int c)
3764: {
3765: if ((c >= 0x0041 && c <= 0x005A) ||
3766: (c >= 0x0061 && c <= 0x007A) ||
3767: (c >= 0x00C0 && c <= 0x00D6) ||
3768: (c >= 0x00D8 && c <= 0x00F6) ||
3769: (c >= 0x00F8 && c <= 0x00FF) ||
3770: (c >= 0x0100 && c <= 0x0131) ||
3771: (c >= 0x0134 && c <= 0x013E) ||
3772: (c >= 0x0141 && c <= 0x0148) ||
3773: (c >= 0x014A && c <= 0x017E) ||
3774: (c >= 0x0180 && c <= 0x01C3) ||
3775: (c >= 0x01CD && c <= 0x01F0) ||
3776: (c >= 0x01F4 && c <= 0x01F5) ||
3777: (c >= 0x01FA && c <= 0x0217) ||
3778: (c >= 0x0250 && c <= 0x02A8) ||
3779: (c >= 0x02BB && c <= 0x02C1) ||
3780: c == 0x0386 ||
3781: (c >= 0x0388 && c <= 0x038A) ||
3782: c == 0x038C ||
3783: (c >= 0x038E && c <= 0x03A1) ||
3784: (c >= 0x03A3 && c <= 0x03CE) ||
3785: (c >= 0x03D0 && c <= 0x03D6) ||
3786: c == 0x03DA ||
3787: c == 0x03DC ||
3788: c == 0x03DE ||
3789: c == 0x03E0 ||
3790: (c >= 0x03E2 && c <= 0x03F3) ||
3791: (c >= 0x0401 && c <= 0x040C) ||
3792: (c >= 0x040E && c <= 0x044F) ||
3793: (c >= 0x0451 && c <= 0x045C) ||
3794: (c >= 0x045E && c <= 0x0481) ||
3795: (c >= 0x0490 && c <= 0x04C4) ||
3796: (c >= 0x04C7 && c <= 0x04C8) ||
3797: (c >= 0x04CB && c <= 0x04CC) ||
3798: (c >= 0x04D0 && c <= 0x04EB) ||
3799: (c >= 0x04EE && c <= 0x04F5) ||
3800: (c >= 0x04F8 && c <= 0x04F9) ||
3801: (c >= 0x0531 && c <= 0x0556) ||
3802: c == 0x0559 ||
3803: (c >= 0x0561 && c <= 0x0586) ||
3804: (c >= 0x05D0 && c <= 0x05EA) ||
3805: (c >= 0x05F0 && c <= 0x05F2) ||
3806: (c >= 0x0621 && c <= 0x063A) ||
3807: (c >= 0x0641 && c <= 0x064A) ||
3808: (c >= 0x0671 && c <= 0x06B7) ||
3809: (c >= 0x06BA && c <= 0x06BE) ||
3810: (c >= 0x06C0 && c <= 0x06CE) ||
3811: (c >= 0x06D0 && c <= 0x06D3) ||
3812: c == 0x06D5 ||
3813: (c >= 0x06E5 && c <= 0x06E6) ||
3814: (c >= 0x0905 && c <= 0x0939) ||
3815: c == 0x093D ||
3816: (c >= 0x0958 && c <= 0x0961) ||
3817: (c >= 0x0985 && c <= 0x098C) ||
3818: (c >= 0x098F && c <= 0x0990) ||
3819: (c >= 0x0993 && c <= 0x09A8) ||
3820: (c >= 0x09AA && c <= 0x09B0) ||
3821: c == 0x09B2 ||
3822: (c >= 0x09B6 && c <= 0x09B9) ||
3823: (c >= 0x09DC && c <= 0x09DD) ||
3824: (c >= 0x09DF && c <= 0x09E1) ||
3825: (c >= 0x09F0 && c <= 0x09F1) ||
3826: (c >= 0x0A05 && c <= 0x0A0A) ||
3827: (c >= 0x0A0F && c <= 0x0A10) ||
3828: (c >= 0x0A13 && c <= 0x0A28) ||
3829: (c >= 0x0A2A && c <= 0x0A30) ||
3830: (c >= 0x0A32 && c <= 0x0A33) ||
3831: (c >= 0x0A35 && c <= 0x0A36) ||
3832: (c >= 0x0A38 && c <= 0x0A39) ||
3833: (c >= 0x0A59 && c <= 0x0A5C) ||
3834: c == 0x0A5E ||
3835: (c >= 0x0A72 && c <= 0x0A74) ||
3836: (c >= 0x0A85 && c <= 0x0A8B) ||
3837: c == 0x0A8D ||
3838: (c >= 0x0A8F && c <= 0x0A91) ||
3839: (c >= 0x0A93 && c <= 0x0AA8) ||
3840: (c >= 0x0AAA && c <= 0x0AB0) ||
3841: (c >= 0x0AB2 && c <= 0x0AB3) ||
3842: (c >= 0x0AB5 && c <= 0x0AB9) ||
3843: c == 0x0ABD ||
3844: c == 0x0AE0 ||
3845: (c >= 0x0B05 && c <= 0x0B0C) ||
3846: (c >= 0x0B0F && c <= 0x0B10) ||
3847: (c >= 0x0B13 && c <= 0x0B28) ||
3848: (c >= 0x0B2A && c <= 0x0B30) ||
3849: (c >= 0x0B32 && c <= 0x0B33) ||
3850: (c >= 0x0B36 && c <= 0x0B39) ||
3851: c == 0x0B3D ||
3852: (c >= 0x0B5C && c <= 0x0B5D) ||
3853: (c >= 0x0B5F && c <= 0x0B61) ||
3854: (c >= 0x0B85 && c <= 0x0B8A) ||
3855: (c >= 0x0B8E && c <= 0x0B90) ||
3856: (c >= 0x0B92 && c <= 0x0B95) ||
3857: (c >= 0x0B99 && c <= 0x0B9A) ||
3858: c == 0x0B9C ||
3859: (c >= 0x0B9E && c <= 0x0B9F) ||
3860: (c >= 0x0BA3 && c <= 0x0BA4) ||
3861: (c >= 0x0BA8 && c <= 0x0BAA) ||
3862: (c >= 0x0BAE && c <= 0x0BB5) ||
3863: (c >= 0x0BB7 && c <= 0x0BB9) ||
3864: (c >= 0x0C05 && c <= 0x0C0C) ||
3865: (c >= 0x0C0E && c <= 0x0C10) ||
3866: (c >= 0x0C12 && c <= 0x0C28) ||
3867: (c >= 0x0C2A && c <= 0x0C33) ||
3868: (c >= 0x0C35 && c <= 0x0C39) ||
3869: (c >= 0x0C60 && c <= 0x0C61) ||
3870: (c >= 0x0C85 && c <= 0x0C8C) ||
3871: (c >= 0x0C8E && c <= 0x0C90) ||
3872: (c >= 0x0C92 && c <= 0x0CA8) ||
3873: (c >= 0x0CAA && c <= 0x0CB3) ||
3874: (c >= 0x0CB5 && c <= 0x0CB9) ||
3875: c == 0x0CDE ||
3876: (c >= 0x0CE0 && c <= 0x0CE1) ||
3877: (c >= 0x0D05 && c <= 0x0D0C) ||
3878: (c >= 0x0D0E && c <= 0x0D10) ||
3879: (c >= 0x0D12 && c <= 0x0D28) ||
3880: (c >= 0x0D2A && c <= 0x0D39) ||
3881: (c >= 0x0D60 && c <= 0x0D61) ||
3882: (c >= 0x0E01 && c <= 0x0E2E) ||
3883: c == 0x0E30 ||
3884: (c >= 0x0E32 && c <= 0x0E33) ||
3885: (c >= 0x0E40 && c <= 0x0E45) ||
3886: (c >= 0x0E81 && c <= 0x0E82) ||
3887: c == 0x0E84 ||
3888: (c >= 0x0E87 && c <= 0x0E88) ||
3889: c == 0x0E8A ||
3890: c == 0x0E8D ||
3891: (c >= 0x0E94 && c <= 0x0E97) ||
3892: (c >= 0x0E99 && c <= 0x0E9F) ||
3893: (c >= 0x0EA1 && c <= 0x0EA3) ||
3894: c == 0x0EA5 ||
3895: c == 0x0EA7 ||
3896: (c >= 0x0EAA && c <= 0x0EAB) ||
3897: (c >= 0x0EAD && c <= 0x0EAE) ||
3898: c == 0x0EB0 ||
3899: (c >= 0x0EB2 && c <= 0x0EB3) ||
3900: c == 0x0EBD ||
3901: (c >= 0x0EC0 && c <= 0x0EC4) ||
3902: (c >= 0x0F40 && c <= 0x0F47) ||
3903: (c >= 0x0F49 && c <= 0x0F69) ||
3904: (c >= 0x10A0 && c <= 0x10C5) ||
3905: (c >= 0x10D0 && c <= 0x10F6) ||
3906: c == 0x1100 ||
3907: (c >= 0x1102 && c <= 0x1103) ||
3908: (c >= 0x1105 && c <= 0x1107) ||
3909: c == 0x1109 ||
3910: (c >= 0x110B && c <= 0x110C) ||
3911: (c >= 0x110E && c <= 0x1112) ||
3912: c == 0x113C ||
3913: c == 0x113E ||
3914: c == 0x1140 ||
3915: c == 0x114C ||
3916: c == 0x114E ||
3917: c == 0x1150 ||
3918: (c >= 0x1154 && c <= 0x1155) ||
3919: c == 0x1159 ||
3920: (c >= 0x115F && c <= 0x1161) ||
3921: c == 0x1163 ||
3922: c == 0x1165 ||
3923: c == 0x1167 ||
3924: c == 0x1169 ||
3925: (c >= 0x116D && c <= 0x116E) ||
3926: (c >= 0x1172 && c <= 0x1173) ||
3927: c == 0x1175 ||
3928: c == 0x119E ||
3929: c == 0x11A8 ||
3930: c == 0x11AB ||
3931: (c >= 0x11AE && c <= 0x11AF) ||
3932: (c >= 0x11B7 && c <= 0x11B8) ||
3933: c == 0x11BA ||
3934: (c >= 0x11BC && c <= 0x11C2) ||
3935: c == 0x11EB ||
3936: c == 0x11F0 ||
3937: c == 0x11F9 ||
3938: (c >= 0x1E00 && c <= 0x1E9B) ||
3939: (c >= 0x1EA0 && c <= 0x1EF9) ||
3940: (c >= 0x1F00 && c <= 0x1F15) ||
3941: (c >= 0x1F18 && c <= 0x1F1D) ||
3942: (c >= 0x1F20 && c <= 0x1F45) ||
3943: (c >= 0x1F48 && c <= 0x1F4D) ||
3944: (c >= 0x1F50 && c <= 0x1F57) ||
3945: c == 0x1F59 ||
3946: c == 0x1F5B ||
3947: c == 0x1F5D ||
3948: (c >= 0x1F5F && c <= 0x1F7D) ||
3949: (c >= 0x1F80 && c <= 0x1FB4) ||
3950: (c >= 0x1FB6 && c <= 0x1FBC) ||
3951: c == 0x1FBE ||
3952: (c >= 0x1FC2 && c <= 0x1FC4) ||
3953: (c >= 0x1FC6 && c <= 0x1FCC) ||
3954: (c >= 0x1FD0 && c <= 0x1FD3) ||
3955: (c >= 0x1FD6 && c <= 0x1FDB) ||
3956: (c >= 0x1FE0 && c <= 0x1FEC) ||
3957: (c >= 0x1FF2 && c <= 0x1FF4) ||
3958: (c >= 0x1FF6 && c <= 0x1FFC) ||
3959: c == 0x2126 ||
3960: (c >= 0x212A && c <= 0x212B) ||
3961: c == 0x212E ||
3962: (c >= 0x2180 && c <= 0x2182) ||
3963: (c >= 0x3041 && c <= 0x3094) ||
3964: (c >= 0x30A1 && c <= 0x30FA) ||
3965: (c >= 0x3105 && c <= 0x312C) ||
3966: (c >= 0xAC00 && c <= 0xD7A3))
3967: return true;
3968: if ((c >= 0x4e00 && c <= 0x9fa5) ||
3969: c == 0x3007 ||
3970: (c >= 0x3021 && c <= 0x3029))
3971: return true;
3972: return false;
3973: }
3974:
3975:
3979: public static boolean isDigit(int c)
3980: {
3981: return ((c >= 0x0030 && c <= 0x0039) ||
3982: (c >= 0x0660 && c <= 0x0669) ||
3983: (c >= 0x06F0 && c <= 0x06F9) ||
3984: (c >= 0x0966 && c <= 0x096F) ||
3985: (c >= 0x09E6 && c <= 0x09EF) ||
3986: (c >= 0x0A66 && c <= 0x0A6F) ||
3987: (c >= 0x0AE6 && c <= 0x0AEF) ||
3988: (c >= 0x0B66 && c <= 0x0B6F) ||
3989: (c >= 0x0BE7 && c <= 0x0BEF) ||
3990: (c >= 0x0C66 && c <= 0x0C6F) ||
3991: (c >= 0x0CE6 && c <= 0x0CEF) ||
3992: (c >= 0x0D66 && c <= 0x0D6F) ||
3993: (c >= 0x0E50 && c <= 0x0E59) ||
3994: (c >= 0x0ED0 && c <= 0x0ED9) ||
3995: (c >= 0x0F20 && c <= 0x0F29));
3996: }
3997:
3998:
4002: public static boolean isCombiningChar(int c)
4003: {
4004: return ((c >= 0x0300 && c <= 0x0345) ||
4005: (c >= 0x0360 && c <= 0x0361) ||
4006: (c >= 0x0483 && c <= 0x0486) ||
4007: (c >= 0x0591 && c <= 0x05A1) ||
4008: (c >= 0x05A3 && c <= 0x05B9) ||
4009: (c >= 0x05BB && c <= 0x05BD) ||
4010: c == 0x05BF ||
4011: (c >= 0x05C1 && c <= 0x05C2) ||
4012: c == 0x05C4 ||
4013: (c >= 0x064B && c <= 0x0652) ||
4014: c == 0x0670 ||
4015: (c >= 0x06D6 && c <= 0x06DC) ||
4016: (c >= 0x06DD && c <= 0x06DF) ||
4017: (c >= 0x06E0 && c <= 0x06E4) ||
4018: (c >= 0x06E7 && c <= 0x06E8) ||
4019: (c >= 0x06EA && c <= 0x06ED) ||
4020: (c >= 0x0901 && c <= 0x0903) ||
4021: c == 0x093C ||
4022: (c >= 0x093E && c <= 0x094C) ||
4023: c == 0x094D ||
4024: (c >= 0x0951 && c <= 0x0954) ||
4025: (c >= 0x0962 && c <= 0x0963) ||
4026: (c >= 0x0981 && c <= 0x0983) ||
4027: c == 0x09BC ||
4028: c == 0x09BE ||
4029: c == 0x09BF ||
4030: (c >= 0x09C0 && c <= 0x09C4) ||
4031: (c >= 0x09C7 && c <= 0x09C8) ||
4032: (c >= 0x09CB && c <= 0x09CD) ||
4033: c == 0x09D7 ||
4034: (c >= 0x09E2 && c <= 0x09E3) ||
4035: c == 0x0A02 ||
4036: c == 0x0A3C ||
4037: c == 0x0A3E ||
4038: c == 0x0A3F ||
4039: (c >= 0x0A40 && c <= 0x0A42) ||
4040: (c >= 0x0A47 && c <= 0x0A48) ||
4041: (c >= 0x0A4B && c <= 0x0A4D) ||
4042: (c >= 0x0A70 && c <= 0x0A71) ||
4043: (c >= 0x0A81 && c <= 0x0A83) ||
4044: c == 0x0ABC ||
4045: (c >= 0x0ABE && c <= 0x0AC5) ||
4046: (c >= 0x0AC7 && c <= 0x0AC9) ||
4047: (c >= 0x0ACB && c <= 0x0ACD) ||
4048: (c >= 0x0B01 && c <= 0x0B03) ||
4049: c == 0x0B3C ||
4050: (c >= 0x0B3E && c <= 0x0B43) ||
4051: (c >= 0x0B47 && c <= 0x0B48) ||
4052: (c >= 0x0B4B && c <= 0x0B4D) ||
4053: (c >= 0x0B56 && c <= 0x0B57) ||
4054: (c >= 0x0B82 && c <= 0x0B83) ||
4055: (c >= 0x0BBE && c <= 0x0BC2) ||
4056: (c >= 0x0BC6 && c <= 0x0BC8) ||
4057: (c >= 0x0BCA && c <= 0x0BCD) ||
4058: c == 0x0BD7 ||
4059: (c >= 0x0C01 && c <= 0x0C03) ||
4060: (c >= 0x0C3E && c <= 0x0C44) ||
4061: (c >= 0x0C46 && c <= 0x0C48) ||
4062: (c >= 0x0C4A && c <= 0x0C4D) ||
4063: (c >= 0x0C55 && c <= 0x0C56) ||
4064: (c >= 0x0C82 && c <= 0x0C83) ||
4065: (c >= 0x0CBE && c <= 0x0CC4) ||
4066: (c >= 0x0CC6 && c <= 0x0CC8) ||
4067: (c >= 0x0CCA && c <= 0x0CCD) ||
4068: (c >= 0x0CD5 && c <= 0x0CD6) ||
4069: (c >= 0x0D02 && c <= 0x0D03) ||
4070: (c >= 0x0D3E && c <= 0x0D43) ||
4071: (c >= 0x0D46 && c <= 0x0D48) ||
4072: (c >= 0x0D4A && c <= 0x0D4D) ||
4073: c == 0x0D57 ||
4074: c == 0x0E31 ||
4075: (c >= 0x0E34 && c <= 0x0E3A) ||
4076: (c >= 0x0E47 && c <= 0x0E4E) ||
4077: c == 0x0EB1 ||
4078: (c >= 0x0EB4 && c <= 0x0EB9) ||
4079: (c >= 0x0EBB && c <= 0x0EBC) ||
4080: (c >= 0x0EC8 && c <= 0x0ECD) ||
4081: (c >= 0x0F18 && c <= 0x0F19) ||
4082: c == 0x0F35 ||
4083: c == 0x0F37 ||
4084: c == 0x0F39 ||
4085: c == 0x0F3E ||
4086: c == 0x0F3F ||
4087: (c >= 0x0F71 && c <= 0x0F84) ||
4088: (c >= 0x0F86 && c <= 0x0F8B) ||
4089: (c >= 0x0F90 && c <= 0x0F95) ||
4090: c == 0x0F97 ||
4091: (c >= 0x0F99 && c <= 0x0FAD) ||
4092: (c >= 0x0FB1 && c <= 0x0FB7) ||
4093: c == 0x0FB9 ||
4094: (c >= 0x20D0 && c <= 0x20DC) ||
4095: c == 0x20E1 ||
4096: (c >= 0x302A && c <= 0x302F) ||
4097: c == 0x3099 ||
4098: c == 0x309A);
4099: }
4100:
4101:
4105: public static boolean isExtender(int c)
4106: {
4107: return (c == 0x00B7 ||
4108: c == 0x02D0 ||
4109: c == 0x02D1 ||
4110: c == 0x0387 ||
4111: c == 0x0640 ||
4112: c == 0x0E46 ||
4113: c == 0x0EC6 ||
4114: c == 0x3005 ||
4115: (c >= 0x3031 && c <= 0x3035) ||
4116: (c >= 0x309D && c <= 0x309E) ||
4117: (c >= 0x30FC && c <= 0x30FE));
4118: }
4119:
4120:
4124: public static boolean isChar(int c)
4125: {
4126: return (c >= 0x20 && c < 0xd800) ||
4127: (c >= 0xe00 && c < 0xfffe) ||
4128: (c >= 0x10000 && c < 0x110000) ||
4129: c == 0xa || c == 0x9 || c == 0xd;
4130: }
4131:
4132:
4136: private String intern(String text)
4137: {
4138: return stringInterning ? text.intern() : text;
4139: }
4140:
4141:
4144: private void error(String message)
4145: throws XMLStreamException
4146: {
4147: error(message, null);
4148: }
4149:
4150:
4153: private void error(String message, Object info)
4154: throws XMLStreamException
4155: {
4156: if (info != null)
4157: {
4158: if (info instanceof String)
4159: message += ": \"" + ((String) info) + "\"";
4160: else if (info instanceof Character)
4161: message += ": '" + ((Character) info) + "'";
4162: }
4163: throw new XMLStreamException(message);
4164: }
4165:
4166:
4169: private void validateStartElement(String elementName)
4170: throws XMLStreamException
4171: {
4172: if (currentContentModel == null)
4173: {
4174:
4175:
4176: if (!elementName.equals(doctype.rootName))
4177: error("root element name must match name in DTD");
4178: return;
4179: }
4180:
4181: switch (currentContentModel.type)
4182: {
4183: case ContentModel.EMPTY:
4184: error("child element found in empty element", elementName);
4185: break;
4186: case ContentModel.ELEMENT:
4187: LinkedList ctx = (LinkedList) validationStack.getLast();
4188: ctx.add(elementName);
4189: break;
4190: case ContentModel.MIXED:
4191: MixedContentModel mm = (MixedContentModel) currentContentModel;
4192: if (!mm.containsName(elementName))
4193: error("illegal element for content model", elementName);
4194: break;
4195: }
4196: }
4197:
4198:
4201: private void validateEndElement()
4202: throws XMLStreamException
4203: {
4204: if (currentContentModel == null)
4205: {
4206:
4207:
4208: if (!idrefs.containsAll(ids))
4209: error("IDREF values must match the value of some ID attribute");
4210: return;
4211: }
4212:
4213: switch (currentContentModel.type)
4214: {
4215: case ContentModel.ELEMENT:
4216: LinkedList ctx = (LinkedList) validationStack.getLast();
4217: ElementContentModel ecm = (ElementContentModel) currentContentModel;
4218: validateElementContent(ecm, ctx);
4219: break;
4220: }
4221: }
4222:
4223:
4226: private void validatePCData(String text)
4227: throws XMLStreamException
4228: {
4229:
4230: switch (currentContentModel.type)
4231: {
4232: case ContentModel.EMPTY:
4233: error("character data found in empty element", text);
4234: break;
4235: case ContentModel.ELEMENT:
4236: boolean white = true;
4237: int len = text.length();
4238: for (int i = 0; i < len; i++)
4239: {
4240: char c = text.charAt(i);
4241: if (c != ' ' && c != '\t' && c != '\n' && c != '\r')
4242: {
4243: white = false;
4244: break;
4245: }
4246: }
4247: if (!white)
4248: error("character data found in element with element content", text);
4249: else if (xmlStandalone == Boolean.TRUE && currentContentModel.external)
4250:
4251: error("whitespace in element content of externally declared " +
4252: "element in standalone document");
4253: break;
4254: }
4255: }
4256:
4257:
4261: private void validateElementContent(ElementContentModel model,
4262: LinkedList children)
4263: throws XMLStreamException
4264: {
4265:
4266: StringBuffer buf = new StringBuffer();
4267: for (Iterator i = children.iterator(); i.hasNext(); )
4268: {
4269: buf.append((String) i.next());
4270: buf.append(' ');
4271: }
4272: String c = buf.toString();
4273: String regex = createRegularExpression(model);
4274: if (!c.matches(regex))
4275: error("element content "+model.text+" does not match expression "+regex, c);
4276: }
4277:
4278:
4282: private String createRegularExpression(ElementContentModel model)
4283: {
4284: if (model.regex == null)
4285: {
4286: StringBuffer buf = new StringBuffer();
4287: buf.append('(');
4288: for (Iterator i = model.contentParticles.iterator(); i.hasNext(); )
4289: {
4290: ContentParticle cp = (ContentParticle) i.next();
4291: if (cp.content instanceof String)
4292: {
4293: buf.append('(');
4294: buf.append((String) cp.content);
4295: buf.append(' ');
4296: buf.append(')');
4297: if (cp.max == -1)
4298: {
4299: if (cp.min == 0)
4300: buf.append('*');
4301: else
4302: buf.append('+');
4303: }
4304: else if (cp.min == 0)
4305: buf.append('?');
4306: }
4307: else
4308: {
4309: ElementContentModel ecm = (ElementContentModel) cp.content;
4310: buf.append(createRegularExpression(ecm));
4311: }
4312: if (model.or && i.hasNext())
4313: buf.append('|');
4314: }
4315: buf.append(')');
4316: if (model.max == -1)
4317: {
4318: if (model.min == 0)
4319: buf.append('*');
4320: else
4321: buf.append('+');
4322: }
4323: else if (model.min == 0)
4324: buf.append('?');
4325: model.regex = buf.toString();
4326: }
4327: return model.regex;
4328: }
4329:
4330:
4333: void validateDoctype()
4334: throws XMLStreamException
4335: {
4336: for (Iterator i = doctype.entityIterator(); i.hasNext(); )
4337: {
4338: Map.Entry entry = (Map.Entry) i.next();
4339: Object entity = entry.getValue();
4340: if (entity instanceof ExternalIds)
4341: {
4342: ExternalIds ids = (ExternalIds) entity;
4343: if (ids.notationName != null)
4344: {
4345:
4346: ExternalIds notation = doctype.getNotation(ids.notationName);
4347: if (notation == null)
4348: error("Notation name must match the declared name of a " +
4349: "notation", ids.notationName);
4350: }
4351: }
4352: }
4353: }
4354:
4355:
4360: public static void main(String[] args)
4361: throws Exception
4362: {
4363: boolean validating = false;
4364: boolean namespaceAware = false;
4365: boolean xIncludeAware = false;
4366: int pos = 0;
4367: while (pos < args.length && args[pos].startsWith("-"))
4368: {
4369: if ("-x".equals(args[pos]))
4370: xIncludeAware = true;
4371: else if ("-v".equals(args[pos]))
4372: validating = true;
4373: else if ("-n".equals(args[pos]))
4374: namespaceAware = true;
4375: pos++;
4376: }
4377: if (pos >= args.length)
4378: {
4379: System.out.println("Syntax: XMLParser [-n] [-v] [-x] <file> [<file2> [...]]");
4380: System.out.println("\t-n: use namespace aware mode");
4381: System.out.println("\t-v: use validating parser");
4382: System.out.println("\t-x: use XInclude aware mode");
4383: System.exit(2);
4384: }
4385: while (pos < args.length)
4386: {
4387: XMLParser p = new XMLParser(new java.io.FileInputStream(args[pos]),
4388: absolutize(null, args[pos]),
4389: validating,
4390: namespaceAware,
4391: true,
4392: true,
4393: true,
4394: true,
4395: true,
4396: true,
4397: true,
4398: null,
4399: null);
4400: XMLStreamReader reader = p;
4401: if (xIncludeAware)
4402: reader = new XIncludeFilter(p, args[pos], true, true, true);
4403: try
4404: {
4405: int event;
4406:
4407: while (reader.hasNext())
4408: {
4409: event = reader.next();
4410: Location loc = reader.getLocation();
4411: System.out.print(loc.getLineNumber() + ":" +
4412: loc.getColumnNumber() + " ");
4413: switch (event)
4414: {
4415: case XMLStreamConstants.START_DOCUMENT:
4416: System.out.println("START_DOCUMENT version=" +
4417: reader.getVersion() +
4418: " encoding=" +
4419: reader.getEncoding());
4420: break;
4421: case XMLStreamConstants.END_DOCUMENT:
4422: System.out.println("END_DOCUMENT");
4423: break;
4424: case XMLStreamConstants.START_ELEMENT:
4425: System.out.println("START_ELEMENT " +
4426: reader.getName());
4427: int l = reader.getNamespaceCount();
4428: for (int i = 0; i < l; i++)
4429: System.out.println("\tnamespace " +
4430: reader.getNamespacePrefix(i) + "='" +
4431: reader.getNamespaceURI(i)+"'");
4432: l = reader.getAttributeCount();
4433: for (int i = 0; i < l; i++)
4434: System.out.println("\tattribute " +
4435: reader.getAttributeName(i) + "='" +
4436: reader.getAttributeValue(i) + "'");
4437: break;
4438: case XMLStreamConstants.END_ELEMENT:
4439: System.out.println("END_ELEMENT " + reader.getName());
4440: break;
4441: case XMLStreamConstants.CHARACTERS:
4442: System.out.println("CHARACTERS '" +
4443: encodeText(reader.getText()) + "'");
4444: break;
4445: case XMLStreamConstants.CDATA:
4446: System.out.println("CDATA '" +
4447: encodeText(reader.getText()) + "'");
4448: break;
4449: case XMLStreamConstants.SPACE:
4450: System.out.println("SPACE '" +
4451: encodeText(reader.getText()) + "'");
4452: break;
4453: case XMLStreamConstants.DTD:
4454: System.out.println("DTD " + reader.getText());
4455: break;
4456: case XMLStreamConstants.ENTITY_REFERENCE:
4457: System.out.println("ENTITY_REFERENCE " + reader.getText());
4458: break;
4459: case XMLStreamConstants.COMMENT:
4460: System.out.println("COMMENT '" +
4461: encodeText(reader.getText()) + "'");
4462: break;
4463: case XMLStreamConstants.PROCESSING_INSTRUCTION:
4464: System.out.println("PROCESSING_INSTRUCTION " +
4465: reader.getPITarget() + " " +
4466: reader.getPIData());
4467: break;
4468: case START_ENTITY:
4469: System.out.println("START_ENTITY " + reader.getText());
4470: break;
4471: case END_ENTITY:
4472: System.out.println("END_ENTITY " + reader.getText());
4473: break;
4474: default:
4475: System.out.println("Unknown event: " + event);
4476: }
4477: }
4478: }
4479: catch (XMLStreamException e)
4480: {
4481: Location l = reader.getLocation();
4482: System.out.println("At line "+l.getLineNumber()+
4483: ", column "+l.getColumnNumber()+
4484: " of "+l.getSystemId());
4485: throw e;
4486: }
4487: pos++;
4488: }
4489: }
4490:
4491:
4494: private static String encodeText(String text)
4495: {
4496: StringBuffer b = new StringBuffer();
4497: int len = text.length();
4498: for (int i = 0; i < len; i++)
4499: {
4500: char c = text.charAt(i);
4501: switch (c)
4502: {
4503: case '\t':
4504: b.append("\\t");
4505: break;
4506: case '\n':
4507: b.append("\\n");
4508: break;
4509: case '\r':
4510: b.append("\\r");
4511: break;
4512: default:
4513: b.append(c);
4514: }
4515: }
4516: return b.toString();
4517: }
4518:
4519:
4522: class Attribute
4523: {
4524:
4525:
4528: final String name;
4529:
4530:
4533: final String type;
4534:
4535:
4538: final boolean specified;
4539:
4540:
4543: final String value;
4544:
4545:
4548: final String prefix;
4549:
4550:
4553: final String localName;
4554:
4555: Attribute(String name, String type, boolean specified, String value)
4556: {
4557: this.name = name;
4558: this.type = type;
4559: this.specified = specified;
4560: this.value = value;
4561: int ci = name.indexOf(':');
4562: if (ci == -1)
4563: {
4564: prefix = null;
4565: localName = intern(name);
4566: }
4567: else
4568: {
4569: prefix = intern(name.substring(0, ci));
4570: localName = intern(name.substring(ci + 1));
4571: }
4572: }
4573:
4574: public boolean equals(Object other)
4575: {
4576: if (other instanceof Attribute)
4577: {
4578: Attribute a = (Attribute) other;
4579: if (namespaceAware)
4580: {
4581: if (!a.localName.equals(localName))
4582: return false;
4583: String auri = getNamespaceURI(a.prefix);
4584: String uri = getNamespaceURI(prefix);
4585: if (uri == null && (auri == null ||
4586: (input.xml11 && "".equals(auri))))
4587: return true;
4588: if (uri != null)
4589: {
4590: if ("".equals(uri) && input.xml11 && "".equals(auri))
4591: return true;
4592: return uri.equals(auri);
4593: }
4594: return false;
4595: }
4596: else
4597: return a.name.equals(name);
4598: }
4599: return false;
4600: }
4601:
4602: public String toString()
4603: {
4604: StringBuffer buf = new StringBuffer(getClass().getName());
4605: buf.append('[');
4606: buf.append("name=");
4607: buf.append(name);
4608: if (value != null)
4609: {
4610: buf.append(",value=");
4611: buf.append(value);
4612: }
4613: if (type != null)
4614: {
4615: buf.append(",type=");
4616: buf.append(type);
4617: }
4618: if (specified)
4619: buf.append(",specified");
4620: buf.append(']');
4621: return buf.toString();
4622: }
4623:
4624: }
4625:
4626:
4629: class Doctype
4630: {
4631:
4632:
4635: final String rootName;
4636:
4637:
4640: final String publicId;
4641:
4642:
4645: final String systemId;
4646:
4647:
4650: private final LinkedHashMap elements = new LinkedHashMap();
4651:
4652:
4655: private final LinkedHashMap attlists = new LinkedHashMap();
4656:
4657:
4660: private final LinkedHashMap entities = new LinkedHashMap();
4661:
4662:
4665: private final LinkedHashMap notations = new LinkedHashMap();
4666:
4667:
4670: private final LinkedHashMap comments = new LinkedHashMap();
4671:
4672:
4676: private final LinkedHashMap pis = new LinkedHashMap();
4677:
4678:
4681: private final LinkedList entries = new LinkedList();
4682:
4683:
4686: private final HashSet externalEntities = new HashSet();
4687:
4688:
4691: private final HashSet externalNotations = new HashSet();
4692:
4693:
4696: private int anon = 1;
4697:
4698:
4701: Doctype(String rootName, String publicId, String systemId)
4702: {
4703: this.rootName = rootName;
4704: this.publicId = publicId;
4705: this.systemId = systemId;
4706: }
4707:
4708:
4714: void addElementDecl(String name, String text, ContentModel model)
4715: {
4716: if (elements.containsKey(name))
4717: return;
4718: model.text = text;
4719: model.external = (inputStack.size() != 1);
4720: elements.put(name, model);
4721: entries.add("E" + name);
4722: }
4723:
4724:
4730: void addAttributeDecl(String ename, String aname, AttributeDecl decl)
4731: {
4732: LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4733: if (attlist == null)
4734: {
4735: attlist = new LinkedHashMap();
4736: attlists.put(ename, attlist);
4737: }
4738: else if (attlist.containsKey(aname))
4739: return;
4740: attlist.put(aname, decl);
4741: String key = "A" + ename;
4742: if (!entries.contains(key))
4743: entries.add(key);
4744: }
4745:
4746:
4752: void addEntityDecl(String name, String text, boolean inExternalSubset)
4753: {
4754: if (entities.containsKey(name))
4755: return;
4756: entities.put(name, text);
4757: entries.add("e" + name);
4758: if (inExternalSubset)
4759: externalEntities.add(name);
4760: }
4761:
4762:
4768: void addEntityDecl(String name, ExternalIds ids, boolean inExternalSubset)
4769: {
4770: if (entities.containsKey(name))
4771: return;
4772: entities.put(name, ids);
4773: entries.add("e" + name);
4774: if (inExternalSubset)
4775: externalEntities.add(name);
4776: }
4777:
4778:
4784: void addNotationDecl(String name, ExternalIds ids, boolean inExternalSubset)
4785: {
4786: if (notations.containsKey(name))
4787: return;
4788: notations.put(name, ids);
4789: entries.add("n" + name);
4790: if (inExternalSubset)
4791: externalNotations.add(name);
4792: }
4793:
4794:
4797: void addComment(String text)
4798: {
4799: String key = Integer.toString(anon++);
4800: comments.put(key, text);
4801: entries.add("c" + key);
4802: }
4803:
4804:
4807: void addPI(String target, String data)
4808: {
4809: String key = Integer.toString(anon++);
4810: pis.put(key, new String[] {target, data});
4811: entries.add("p" + key);
4812: }
4813:
4814:
4818: ContentModel getElementModel(String name)
4819: {
4820: return (ContentModel) elements.get(name);
4821: }
4822:
4823:
4828: AttributeDecl getAttributeDecl(String ename, String aname)
4829: {
4830: LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4831: return (attlist == null) ? null : (AttributeDecl) attlist.get(aname);
4832: }
4833:
4834:
4839: boolean isAttributeDeclared(String ename, String aname)
4840: {
4841: LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4842: return (attlist == null) ? false : attlist.containsKey(aname);
4843: }
4844:
4845:
4850: Iterator attlistIterator(String ename)
4851: {
4852: LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4853: return (attlist == null) ? Collections.EMPTY_LIST.iterator() :
4854: attlist.entrySet().iterator();
4855: }
4856:
4857:
4860: Object getEntity(String name)
4861: {
4862: return entities.get(name);
4863: }
4864:
4865:
4869: boolean isEntityExternal(String name)
4870: {
4871: return externalEntities.contains(name);
4872: }
4873:
4874:
4877: Iterator entityIterator()
4878: {
4879: return entities.entrySet().iterator();
4880: }
4881:
4882:
4885: ExternalIds getNotation(String name)
4886: {
4887: return (ExternalIds) notations.get(name);
4888: }
4889:
4890:
4894: boolean isNotationExternal(String name)
4895: {
4896: return externalNotations.contains(name);
4897: }
4898:
4899:
4902: String getComment(String key)
4903: {
4904: return (String) comments.get(key);
4905: }
4906:
4907:
4911: String[] getPI(String key)
4912: {
4913: return (String[]) pis.get(key);
4914: }
4915:
4916:
4920: Iterator entryIterator()
4921: {
4922: return entries.iterator();
4923: }
4924:
4925: }
4926:
4927:
4930: class ExternalIds
4931: {
4932:
4933:
4936: String publicId;
4937:
4938:
4941: String systemId;
4942:
4943:
4946: String notationName;
4947: }
4948:
4949:
4952: abstract class ContentModel
4953: {
4954: static final int EMPTY = 0;
4955: static final int ANY = 1;
4956: static final int ELEMENT = 2;
4957: static final int MIXED = 3;
4958:
4959: int min;
4960: int max;
4961: final int type;
4962: String text;
4963: boolean external;
4964:
4965: ContentModel(int type)
4966: {
4967: this.type = type;
4968: min = 1;
4969: max = 1;
4970: }
4971:
4972: }
4973:
4974:
4977: class EmptyContentModel
4978: extends ContentModel
4979: {
4980:
4981: EmptyContentModel()
4982: {
4983: super(ContentModel.EMPTY);
4984: min = 0;
4985: max = 0;
4986: }
4987:
4988: }
4989:
4990:
4993: class AnyContentModel
4994: extends ContentModel
4995: {
4996:
4997: AnyContentModel()
4998: {
4999: super(ContentModel.ANY);
5000: min = 0;
5001: max = -1;
5002: }
5003:
5004: }
5005:
5006:
5009: class ElementContentModel
5010: extends ContentModel
5011: {
5012:
5013: LinkedList contentParticles;
5014: boolean or;
5015: String regex;
5016:
5017: ElementContentModel()
5018: {
5019: super(ContentModel.ELEMENT);
5020: contentParticles = new LinkedList();
5021: }
5022:
5023: void addContentParticle(ContentParticle cp)
5024: {
5025: contentParticles.add(cp);
5026: }
5027:
5028: }
5029:
5030: class ContentParticle
5031: {
5032:
5033: int min = 1;
5034: int max = 1;
5035: Object content;
5036:
5037: }
5038:
5039:
5042: class MixedContentModel
5043: extends ContentModel
5044: {
5045:
5046: private HashSet names;
5047:
5048: MixedContentModel()
5049: {
5050: super(ContentModel.MIXED);
5051: names = new HashSet();
5052: }
5053:
5054: void addName(String name)
5055: {
5056: names.add(name);
5057: }
5058:
5059: boolean containsName(String name)
5060: {
5061: return names.contains(name);
5062: }
5063:
5064: }
5065:
5066:
5069: class AttributeDecl
5070: {
5071:
5072:
5075: final String type;
5076:
5077:
5080: final String value;
5081:
5082:
5085: final int valueType;
5086:
5087:
5090: final String enumeration;
5091:
5092:
5095: final HashSet values;
5096:
5097:
5100: final boolean external;
5101:
5102: AttributeDecl(String type, String value,
5103: int valueType, String enumeration,
5104: HashSet values, boolean external)
5105: {
5106: this.type = type;
5107: this.value = value;
5108: this.valueType = valueType;
5109: this.enumeration = enumeration;
5110: this.values = values;
5111: this.external = external;
5112: }
5113:
5114: }
5115:
5116:
5119: static class Input
5120: implements Location
5121: {
5122:
5123: int line = 1, markLine;
5124: int column, markColumn;
5125: int offset, markOffset;
5126: final String publicId, systemId, name;
5127: final boolean report;
5128: final boolean normalize;
5129:
5130: InputStream in;
5131: Reader reader;
5132: UnicodeReader unicodeReader;
5133: boolean initialized;
5134: boolean encodingDetected;
5135: String inputEncoding;
5136: boolean xml11;
5137:
5138: Input(InputStream in, Reader reader, String publicId, String systemId,
5139: String name, String inputEncoding, boolean report,
5140: boolean normalize)
5141: {
5142: if (inputEncoding == null)
5143: inputEncoding = "UTF-8";
5144: this.inputEncoding = inputEncoding;
5145: this.publicId = publicId;
5146: this.systemId = systemId;
5147: this.name = name;
5148: this.report = report;
5149: this.normalize = normalize;
5150: if (in != null)
5151: {
5152: if (reader != null)
5153: throw new IllegalStateException("both byte and char streams "+
5154: "specified");
5155: if (normalize)
5156: in = new CRLFInputStream(in);
5157: in = new BufferedInputStream(in);
5158: this.in = in;
5159: }
5160: else
5161: {
5162: this.reader = normalize ? new CRLFReader(reader) : reader;
5163: unicodeReader = new UnicodeReader(this.reader);
5164: }
5165: initialized = false;
5166: }
5167:
5168:
5169:
5170: public int getCharacterOffset()
5171: {
5172: return offset;
5173: }
5174:
5175: public int getColumnNumber()
5176: {
5177: return column;
5178: }
5179:
5180: public int getLineNumber()
5181: {
5182: return line;
5183: }
5184:
5185: public String getPublicId()
5186: {
5187: return publicId;
5188: }
5189:
5190: public String getSystemId()
5191: {
5192: return systemId;
5193: }
5194:
5195: void init()
5196: throws IOException
5197: {
5198: if (initialized)
5199: return;
5200: if (in != null)
5201: detectEncoding();
5202: initialized = true;
5203: }
5204:
5205: void mark(int len)
5206: throws IOException
5207: {
5208: markOffset = offset;
5209: markLine = line;
5210: markColumn = column;
5211: if (unicodeReader != null)
5212: unicodeReader.mark(len);
5213: else
5214: in.mark(len);
5215: }
5216:
5217:
5220: int read()
5221: throws IOException
5222: {
5223: offset++;
5224: int ret = (unicodeReader != null) ? unicodeReader.read() : in.read();
5225: if (normalize &&
5226: (ret == 0x0d || (xml11 && (ret == 0x85 || ret == 0x2028))))
5227: {
5228:
5229: ret = 0x0a;
5230: }
5231:
5232: if (ret == 0x0a)
5233: {
5234: line++;
5235: column = 0;
5236: }
5237: else
5238: column++;
5239: return ret;
5240: }
5241:
5242:
5245: int read(int[] b, int off, int len)
5246: throws IOException
5247: {
5248: int ret;
5249: if (unicodeReader != null)
5250: {
5251: ret = unicodeReader.read(b, off, len);
5252: }
5253: else
5254: {
5255: byte[] b2 = new byte[len];
5256: ret = in.read(b2, 0, len);
5257: if (ret != -1)
5258: {
5259: String s = new String(b2, 0, ret, inputEncoding);
5260: int[] c = UnicodeReader.toCodePointArray(s);
5261: ret = c.length;
5262: System.arraycopy(c, 0, b, off, ret);
5263: }
5264: }
5265: if (ret != -1)
5266: {
5267:
5268: for (int i = 0; i < ret; i++)
5269: {
5270: int c = b[off + i];
5271: if (normalize &&
5272: (c == 0x0d || (xml11 && (c == 0x85 || c == 0x2028))))
5273: {
5274:
5275: c = 0x0a;
5276: b[off + i] = c;
5277: }
5278: if (c == 0x0a)
5279: {
5280: line++;
5281: column = 0;
5282: }
5283: else
5284: column++;
5285: }
5286: }
5287: return ret;
5288: }
5289:
5290: void reset()
5291: throws IOException
5292: {
5293: if (unicodeReader != null)
5294: unicodeReader.reset();
5295: else
5296: in.reset();
5297: offset = markOffset;
5298: line = markLine;
5299: column = markColumn;
5300: }
5301:
5302:
5303:
5304: private static final int[] SIGNATURE_UCS_4_1234 =
5305: new int[] { 0x00, 0x00, 0x00, 0x3c };
5306: private static final int[] SIGNATURE_UCS_4_4321 =
5307: new int[] { 0x3c, 0x00, 0x00, 0x00 };
5308: private static final int[] SIGNATURE_UCS_4_2143 =
5309: new int[] { 0x00, 0x00, 0x3c, 0x00 };
5310: private static final int[] SIGNATURE_UCS_4_3412 =
5311: new int[] { 0x00, 0x3c, 0x00, 0x00 };
5312: private static final int[] SIGNATURE_UCS_2_12 =
5313: new int[] { 0xfe, 0xff };
5314: private static final int[] SIGNATURE_UCS_2_21 =
5315: new int[] { 0xff, 0xfe };
5316: private static final int[] SIGNATURE_UCS_2_12_NOBOM =
5317: new int[] { 0x00, 0x3c, 0x00, 0x3f };
5318: private static final int[] SIGNATURE_UCS_2_21_NOBOM =
5319: new int[] { 0x3c, 0x00, 0x3f, 0x00 };
5320: private static final int[] SIGNATURE_UTF_8 =
5321: new int[] { 0x3c, 0x3f, 0x78, 0x6d };
5322: private static final int[] SIGNATURE_UTF_8_BOM =
5323: new int[] { 0xef, 0xbb, 0xbf };
5324:
5325:
5328: private void detectEncoding()
5329: throws IOException
5330: {
5331: int[] signature = new int[4];
5332: in.mark(4);
5333: for (int i = 0; i < 4; i++)
5334: signature[i] = in.read();
5335: in.reset();
5336:
5337:
5338: if (equals(SIGNATURE_UCS_4_1234, signature))
5339: {
5340: in.read();
5341: in.read();
5342: in.read();
5343: in.read();
5344: setInputEncoding("UTF-32BE");
5345: encodingDetected = true;
5346: }
5347: else if (equals(SIGNATURE_UCS_4_4321, signature))
5348: {
5349: in.read();
5350: in.read();
5351: in.read();
5352: in.read();
5353: setInputEncoding("UTF-32LE");
5354: encodingDetected = true;
5355: }
5356: else if (equals(SIGNATURE_UCS_4_2143, signature) ||
5357: equals(SIGNATURE_UCS_4_3412, signature))
5358: throw new UnsupportedEncodingException("unsupported UCS-4 byte ordering");
5359:
5360:
5361: else if (equals(SIGNATURE_UCS_2_12, signature))
5362: {
5363: in.read();
5364: in.read();
5365: setInputEncoding("UTF-16BE");
5366: encodingDetected = true;
5367: }
5368: else if (equals(SIGNATURE_UCS_2_21, signature))
5369: {
5370: in.read();
5371: in.read();
5372: setInputEncoding("UTF-16LE");
5373: encodingDetected = true;
5374: }
5375: else if (equals(SIGNATURE_UCS_2_12_NOBOM, signature))
5376: {
5377:
5378: throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity");
5379: }
5380: else if (equals(SIGNATURE_UCS_2_21_NOBOM, signature))
5381: {
5382:
5383: throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity");
5384: }
5385:
5386: else if (equals(SIGNATURE_UTF_8, signature))
5387: {
5388:
5389: }
5390: else if (equals(SIGNATURE_UTF_8_BOM, signature))
5391: {
5392: in.read();
5393: in.read();
5394: in.read();
5395: setInputEncoding("UTF-8");
5396: encodingDetected = true;
5397: }
5398: }
5399:
5400: private static boolean equals(int[] b1, int[] b2)
5401: {
5402: for (int i = 0; i < b1.length; i++)
5403: {
5404: if (b1[i] != b2[i])
5405: return false;
5406: }
5407: return true;
5408: }
5409:
5410: void setInputEncoding(String encoding)
5411: throws IOException
5412: {
5413: if (encoding.equals(inputEncoding))
5414: return;
5415: if ("UTF-16".equalsIgnoreCase(encoding) &&
5416: inputEncoding.startsWith("UTF-16"))
5417: return;
5418: if (encodingDetected)
5419: throw new UnsupportedEncodingException("document is not in its " +
5420: "declared encoding " +
5421: inputEncoding +
5422: ": " + encoding);
5423: inputEncoding = encoding;
5424: finalizeEncoding();
5425: }
5426:
5427: void finalizeEncoding()
5428: throws IOException
5429: {
5430: if (reader != null)
5431: return;
5432: reader = new BufferedReader(new InputStreamReader(in, inputEncoding));
5433: unicodeReader = new UnicodeReader(reader);
5434: mark(1);
5435: }
5436:
5437: }
5438:
5439: }