1:
37:
38:
39: package ;
40:
41: import ;
42: import ;
43: import ;
44: import ;
45: import ;
46: import ;
47: import ;
48: import ;
49:
50: import ;
51: import ;
52:
53: import ;
54: import ;
55: import ;
56: import ;
57:
58: import ;
59: import ;
60: import ;
61: import ;
62: import ;
63: import ;
64: import ;
65: import ;
66: import ;
67:
68:
91: public class Parser
92: extends ReaderTokenizer
93: implements DTDConstants
94: {
95:
98: public Token hTag = new Token();
99:
100:
103: protected DTD dtd;
104:
105:
111: protected boolean strict;
112:
113:
116: protected int preformatted = 0;
117:
118:
122: private Set documentTags =
123: new TreeSet(new Comparator()
124: {
125: public int compare(Object a, Object b)
126: {
127: return ((String) a).compareToIgnoreCase((String) b);
128: }
129: }
130: );
131:
132:
135: private StringBuffer buffer = new StringBuffer();
136:
137:
140: private StringBuffer title = new StringBuffer();
141:
142:
145: private Token t;
146:
147:
151: private boolean titleHandled;
152:
153:
157: private boolean titleOpen;
158:
159:
163: htmlAttributeSet attributes =
164: htmlAttributeSet.EMPTY_HTML_ATTRIBUTE_SET;
165:
166:
170: private htmlValidator validator;
171:
172:
176: private parameterDefaulter defaulter;
177:
178:
181: private textPreProcessor textProcessor = new textPreProcessor();
182:
183:
191: public Parser(DTD a_dtd)
192: {
193: if (a_dtd == null)
194: dtd = gnu.javax.swing.text.html.parser.HTML_401F.getInstance();
195: else
196: dtd = a_dtd;
197:
198: defaulter = new parameterDefaulter(dtd);
199:
200: validator =
201: new htmlValidator(dtd)
202: {
203:
208: protected void s_error(String msg)
209: {
210: error(msg);
211: }
212:
213:
221: protected void handleSupposedEndTag(Element tElement)
222: {
223:
224:
225:
226: TagElement tag = makeTag(tElement, true);
227: _handleEndTag_remaining(tag);
228: }
229:
230:
239: protected void handleSupposedStartTag(Element tElement)
240: {
241: TagElement tag = makeTag(tElement, true);
242: htmlAttributeSet were = attributes;
243: attributes = htmlAttributeSet.EMPTY_HTML_ATTRIBUTE_SET;
244: _handleStartTag(tag);
245: attributes = were;
246: }
247: };
248: }
249:
250:
254: public SimpleAttributeSet getAttributes()
255: {
256: return new SimpleAttributeSet(attributes);
257: }
258:
259:
263: public void error(String msg)
264: {
265: error(msg, getTokenAhead());
266: }
267:
268: public void error(String msg, Token atToken)
269: {
270: if (atToken != null)
271: handleError(atToken.where.beginLine,
272: msg + ": line " + atToken.where.beginLine +
273: ", absolute pos " + atToken.where.startPosition
274: );
275: else
276: handleError(0, msg);
277: }
278:
279:
283: public void error(String msg, String invalid)
284: {
285: error(msg + ": '" + invalid + "'");
286: }
287:
288:
292: public void error(String parm1, String parm2, String parm3)
293: {
294: error(parm1 + " " + parm2 + " " + parm3);
295: }
296:
297:
301: public void error(String parm1, String parm2, String parm3, String parm4)
302: {
303: error(parm1 + " " + parm2 + " " + parm3 + " " + parm4);
304: }
305:
306: public void flushAttributes()
307: {
308: }
309:
310:
316: public synchronized void parse(Reader reader)
317: throws IOException
318: {
319: reset(reader);
320: restart();
321: try
322: {
323: parseDocument();
324: validator.closeAll();
325: }
326: catch (ParseException ex)
327: {
328: if (ex != null)
329: {
330: error("Unable to continue parsing the document", ex.getMessage());
331:
332: Throwable cause = ex.getCause();
333: if (cause instanceof IOException)
334: throw (IOException) cause;
335: }
336: }
337: }
338:
339:
344: public String parseDTDMarkup()
345: throws IOException
346: {
347: return null;
348: }
349:
350:
367: public boolean parseMarkupDeclarations(StringBuffer strBuff)
368: throws IOException
369: {
370: return false;
371: }
372:
373:
376: protected int getCurrentLine()
377: {
378: return hTag.where.beginLine;
379: }
380:
381:
389: protected void CDATA(boolean clearBuffer)
390: throws ParseException
391: {
392: Token start = hTag = getTokenAhead();
393:
394: if (clearBuffer)
395: buffer.setLength(0);
396:
397:
398: if (start.kind == EOF)
399: return;
400:
401: read:
402: while (true)
403: {
404: t = getTokenAhead();
405: if (t.kind == EOF)
406: {
407: error("unexpected eof", t);
408: break read;
409: }
410: else if (t.kind == BEGIN)
411: break read;
412: else if (t.kind == Constants.ENTITY)
413: {
414: resolveAndAppendEntity(t);
415: getNextToken();
416: }
417: else
418: {
419: append(t);
420: getNextToken();
421: }
422: }
423: hTag = new Token(start, getTokenAhead(0));
424: if (buffer.length() != 0)
425: _handleText();
426: }
427:
428:
433: protected void Comment()
434: throws ParseException
435: {
436: buffer.setLength(0);
437:
438: Token start = hTag = mustBe(BEGIN);
439: optional(WS);
440: mustBe(EXCLAMATION);
441: optional(WS);
442: mustBe(DOUBLE_DASH);
443:
444: Token t;
445: Token last;
446:
447: comment:
448: while (true)
449: {
450: t = getTokenAhead();
451: if (t.kind == EOF)
452: {
453: handleEOFInComment();
454: last = t;
455: break comment;
456: }
457: else if (COMMENT_END.matches(this))
458: {
459: mustBe(DOUBLE_DASH);
460: optional(WS);
461: last = mustBe(END);
462: break comment;
463: }
464: else if (COMMENT_TRIPLEDASH_END.matches(this))
465: {
466: mustBe(DOUBLE_DASH);
467: t = mustBe(NUMTOKEN);
468: if (t.getImage().equals("-"))
469: {
470: append(t);
471: last = mustBe(END);
472: break comment;
473: }
474: else
475: {
476: buffer.append("--");
477: append(t);
478: t = getTokenAhead();
479: }
480: }
481: else
482:
483: if ((t.getImage().endsWith("--")) &&
484: (
485: getTokenAhead(1).kind == END ||
486: (getTokenAhead(1).kind == WS && getTokenAhead(2).kind == END)
487: )
488: )
489: {
490: buffer.append(t.getImage().substring(0, t.getImage().length() - 2));
491:
492:
493: last = mustBe(t.kind);
494: break comment;
495: }
496: else
497: append(t);
498: mustBe(t.kind);
499: }
500: hTag = new Token(start, last);
501:
502:
503: optional(WS);
504: handleComment();
505: }
506:
507:
511: protected void Script()
512: throws ParseException
513: {
514: Token name;
515:
516: Token start = hTag = mustBe(BEGIN);
517: optional(WS);
518:
519: name = mustBe(SCRIPT);
520:
521: optional(WS);
522:
523: restOfTag(false, name, start);
524:
525: buffer.setLength(0);
526:
527: script:
528: while (!SCRIPT_CLOSE.matches(this))
529: {
530: append(getNextToken());
531: }
532:
533: consume(SCRIPT_CLOSE);
534:
535: _handleText();
536:
537: endTag(false);
538: _handleEndTag(makeTagElement(name.getImage(), false));
539: }
540:
541:
544: protected void Sgml()
545: throws ParseException
546: {
547: if (COMMENT_OPEN.matches(this))
548: Comment();
549: else
550: {
551: Token start = hTag = mustBe(BEGIN);
552: optional(WS);
553: mustBe(EXCLAMATION);
554:
555: buffer.setLength(0);
556: read:
557: while (true)
558: {
559: t = getNextToken();
560: if (t.kind == Constants.ENTITY)
561: {
562: resolveAndAppendEntity(t);
563: }
564: else if (t.kind == EOF)
565: {
566: error("unexpected eof", t);
567: break read;
568: }
569: else if (t.kind == END)
570: break read;
571: else
572: append(t);
573: }
574:
575: try
576: {
577: parseMarkupDeclarations(buffer);
578: }
579: catch (IOException ex)
580: {
581: error("Unable to parse SGML insertion: '" + buffer + "'",
582: new Token(start, t)
583: );
584: }
585: }
586:
587: optional(WS);
588: }
589:
590:
594: protected void Style()
595: throws ParseException
596: {
597: Token name;
598:
599: Token start = hTag = mustBe(BEGIN);
600: optional(WS);
601:
602: name = mustBe(STYLE);
603:
604: optional(WS);
605:
606: restOfTag(false, name, start);
607:
608: buffer.setLength(0);
609:
610: style:
611: while (!STYLE_CLOSE.matches(this))
612: {
613: append(getNextToken());
614: }
615:
616: consume(STYLE_CLOSE);
617:
618: _handleText();
619:
620: endTag(false);
621: _handleEndTag(makeTagElement(name.getImage(), false));
622: }
623:
624:
627: protected void Tag()
628: throws ParseException
629: {
630: mark(true);
631:
632: boolean closing = false;
633: Token name;
634: Token start = hTag = mustBe(BEGIN);
635:
636: optional(WS);
637: name = getNextToken();
638: optional(WS);
639:
640: if (name.kind == SLASH)
641: {
642: closing = true;
643: name = getNextToken();
644: }
645:
646: restOfTag(closing, name, start);
647: }
648:
649:
658: protected void _handleText()
659: {
660: char[] text;
661:
662: if (preformatted > 0)
663: text = textProcessor.preprocessPreformatted(buffer);
664: else
665: text = textProcessor.preprocess(buffer);
666:
667: if (text != null && text.length > 0
668:
669:
670: && (text.length > 1 || text[0] != ' ' || ! TAG_CLOSE.matches(this)))
671: {
672: TagElement pcdata = new TagElement(dtd.getElement("#pcdata"));
673: attributes = htmlAttributeSet.EMPTY_HTML_ATTRIBUTE_SET;
674: _handleEmptyTag(pcdata);
675:
676: handleText(text);
677: if (titleOpen)
678: title.append(text);
679: }
680: }
681:
682:
686: protected final void append(Token t)
687: {
688: if (t.kind != EOF)
689: t.appendTo(buffer);
690: }
691:
692:
696: protected final void consume(pattern p)
697: {
698: node n;
699: for (int i = 0; i < p.nodes.length; i++)
700: {
701: n = p.nodes [ i ];
702: if (n.optional)
703: optional(n.kind);
704: else
705: mustBe(n.kind);
706: }
707: }
708:
709:
718: protected void endTag(boolean omitted)
719: {
720: }
721:
722:
726: protected void handleComment(char[] comment)
727: {
728: }
729:
730:
736: protected void handleEOFInComment()
737: {
738: error("Unclosed comment");
739: }
740:
741:
748: protected void handleEmptyTag(TagElement tag)
749: throws javax.swing.text.ChangedCharSetException
750: {
751: }
752:
753:
759: protected void handleEndTag(TagElement tag)
760: {
761: }
762:
763:
764: protected void handleError(int line, String message)
765: {
766: }
767:
768:
774: protected void handleStartTag(TagElement tag)
775: {
776: }
777:
778:
792: protected void handleText(char[] text)
793: {
794: }
795:
796:
803: protected void handleTitle(char[] title)
804: {
805: }
806:
807:
812: protected TagElement makeTag(Element element)
813: {
814: return makeTag(element, false);
815: }
816:
817:
825: protected TagElement makeTag(Element element, boolean isSupposed)
826: {
827: return new TagElement(element, isSupposed);
828: }
829:
830:
835: protected void markFirstTime(Element element)
836: {
837: }
838:
839:
843: protected Token mustBe(int kind)
844: {
845: if (getTokenAhead().kind == kind)
846: return getNextToken();
847: else
848: {
849: String ei = "";
850: if (kind < 1000)
851: ei = " ('" + (char) kind + "') ";
852: throw new AssertionError("The token of kind " + kind + ei +
853: " MUST be here,"
854: );
855: }
856: }
857:
858:
867: protected void noValueAttribute(String element, String attribute)
868: {
869: Object value = HTML.NULL_ATTRIBUTE_VALUE;
870:
871: Element e = (Element) dtd.elementHash.get(element.toLowerCase());
872: if (e != null)
873: {
874: AttributeList attr = e.getAttribute(attribute);
875: if (attr != null)
876: {
877: Vector values = attr.values;
878: if (values != null && values.size() == 1)
879: value = values.get(0);
880: }
881: }
882: attributes.addAttribute(attribute, value);
883: }
884:
885:
889: protected Token optional(int kind)
890: {
891: if (getTokenAhead().kind == kind)
892: return getNextToken();
893: else
894: return null;
895: }
896:
897:
898: protected void parseDocument()
899: throws ParseException
900: {
901:
902: optional(WS);
903: while (getTokenAhead().kind != EOF)
904: {
905: advanced = false;
906: if (TAG.matches(this))
907: Tag();
908: else if (COMMENT_OPEN.matches(this))
909: Comment();
910: else if (STYLE_OPEN.matches(this))
911: Style();
912: else if (SCRIPT_OPEN.matches(this))
913: Script();
914: else if (SGML.matches(this))
915: Sgml();
916: else
917: CDATA(true);
918:
919:
920: if (!advanced)
921: {
922: Token wrong = getNextToken();
923: error("unexpected '" + wrong.getImage() + "'", wrong);
924: buffer.setLength(0);
925: buffer.append(wrong.getImage());
926: _handleText();
927: }
928: }
929: }
930:
931:
936: protected void readAttributes(String element)
937: {
938: Token name;
939: Token value;
940: Token next;
941: String attrValue;
942:
943: attributes = new htmlAttributeSet();
944:
945: optional(WS);
946:
947: attributeReading:
948: while (getTokenAhead().kind == NUMTOKEN)
949: {
950: name = getNextToken();
951: optional(WS);
952:
953: next = getTokenAhead();
954: if (next.kind == EQ)
955: {
956: mustBe(EQ);
957: optional(WS);
958:
959: next = getNextToken();
960:
961: switch (next.kind)
962: {
963: case QUOT:
964:
965:
966: buffer.setLength(0);
967: readTillTokenE(QUOT);
968: attrValue = buffer.toString();
969: break;
970:
971: case AP:
972:
973:
974: buffer.setLength(0);
975: readTillTokenE(AP);
976: attrValue = buffer.toString();
977: break;
978:
979:
980: case NUMTOKEN:
981: value = next;
982: optional(WS);
983:
984:
985: next = getTokenAhead();
986: if (bQUOTING.get(next.kind))
987: {
988: hTag = next;
989: error("The value without opening quote is closed with '"
990: + next.getImage() + "'");
991: attrValue = value.getImage();
992: }
993: else if (next.kind == SLASH || next.kind == OTHER)
994:
995:
996:
997:
998: {
999: StringBuffer image = new StringBuffer(value.getImage());
1000: while (next.kind == NUMTOKEN || next.kind == SLASH
1001: || next.kind == OTHER)
1002: {
1003: image.append(getNextToken().getImage());
1004: next = getTokenAhead();
1005: }
1006: attrValue = image.toString();
1007: }
1008: else
1009: attrValue = value.getImage();
1010: break;
1011:
1012: case SLASH:
1013: value = next;
1014: optional(WS);
1015:
1016:
1017: next = getTokenAhead();
1018: if (bQUOTING.get(next.kind))
1019: {
1020: hTag = next;
1021: error("The value without opening quote is closed with '"
1022: + next.getImage() + "'");
1023: attrValue = value.getImage();
1024: }
1025: else if (next.kind == NUMTOKEN || next.kind == SLASH)
1026:
1027:
1028:
1029: {
1030: StringBuffer image = new StringBuffer(value.getImage());
1031: while (next.kind == NUMTOKEN || next.kind == SLASH)
1032: {
1033: image.append(getNextToken().getImage());
1034: next = getTokenAhead();
1035: }
1036: attrValue = image.toString();
1037: }
1038: else
1039: attrValue = value.getImage();
1040: break;
1041: default:
1042: break attributeReading;
1043: }
1044: attributes.addAttribute(name.getImage(), attrValue);
1045: optional(WS);
1046: }
1047: else
1048:
1049: {
1050: noValueAttribute(element, name.getImage());
1051: }
1052: }
1053: }
1054:
1055:
1059: protected String resolveNamedEntity(final String a_tag)
1060: {
1061:
1062: if (!a_tag.startsWith("&"))
1063: throw new AssertionError("Named entity " + a_tag +
1064: " must start witn '&'."
1065: );
1066:
1067: String tag = a_tag.substring(1);
1068:
1069: try
1070: {
1071: Entity entity = dtd.getEntity(tag);
1072: if (entity != null)
1073: return entity.getString();
1074:
1075: entity = dtd.getEntity(tag.toLowerCase());
1076:
1077: if (entity != null)
1078: {
1079: error("The name of this entity should be in lowercase", a_tag);
1080: return entity.getString();
1081: }
1082: }
1083: catch (IndexOutOfBoundsException ibx)
1084: {
1085:
1086: }
1087:
1088: error("Unknown named entity", a_tag);
1089: return a_tag;
1090: }
1091:
1092:
1097: protected char resolveNumericEntity(final String a_tag)
1098: {
1099:
1100: if (!a_tag.startsWith("&#"))
1101: throw new AssertionError("Numeric entity " + a_tag +
1102: " must start witn '&#'."
1103: );
1104:
1105: String tag = a_tag.substring(2);
1106:
1107: try
1108: {
1109:
1110: char cx = tag.charAt(0);
1111: if (cx == 'x' || cx == 'X')
1112:
1113: return (char) Integer.parseInt(tag.substring(1), 16);
1114:
1115: return (char) Integer.parseInt(tag);
1116: }
1117:
1118:
1119: catch (NumberFormatException nex)
1120: {
1121: }
1122: catch (IndexOutOfBoundsException ix)
1123: {
1124: }
1125:
1126: error("Invalid numeric entity", a_tag);
1127: return '?';
1128: }
1129:
1130:
1134: protected void restart()
1135: {
1136: documentTags.clear();
1137: titleHandled = false;
1138: titleOpen = false;
1139: buffer.setLength(0);
1140: title.setLength(0);
1141: validator.restart();
1142: }
1143:
1144:
1151: protected void startTag(TagElement tag)
1152: throws ChangedCharSetException
1153: {
1154: }
1155:
1156:
1162: private void _handleCompleteElement(TagElement tag)
1163: {
1164: _handleStartTag(tag);
1165:
1166:
1167: HTML.Tag h = tag.getHTMLTag();
1168: if (h == HTML.Tag.SCRIPT || h == HTML.Tag.STYLE)
1169: {
1170: boolean tmp = titleOpen;
1171: titleOpen = false;
1172: _handleText();
1173: titleOpen = tmp;
1174: }
1175: else
1176: _handleText();
1177:
1178: _handleEndTag(tag);
1179: }
1180:
1181:
1187: private void _handleEmptyTag(TagElement tag)
1188: {
1189: try
1190: {
1191: validator.validateTag(tag, attributes);
1192: handleEmptyTag(tag);
1193: HTML.Tag h = tag.getHTMLTag();
1194:
1195:
1196:
1197:
1198: if (isBlock(h))
1199: optional(WS);
1200: }
1201: catch (ChangedCharSetException ex)
1202: {
1203: error("Changed charset exception:", ex.getMessage());
1204: }
1205: }
1206:
1207:
1213: private void _handleEndTag(TagElement tag)
1214: {
1215: if (validator.closeTag(tag))
1216: _handleEndTag_remaining(tag);
1217: }
1218:
1219:
1224: void _handleEndTag_remaining(TagElement tag)
1225: {
1226: HTML.Tag h = tag.getHTMLTag();
1227:
1228: handleEndTag(tag);
1229: endTag(tag.fictional());
1230:
1231: if (h.isPreformatted())
1232: preformatted--;
1233: if (preformatted < 0)
1234: preformatted = 0;
1235:
1236:
1237:
1238: if (isBlock(h))
1239: optional(WS);
1240:
1241: if (h == HTML.Tag.TITLE)
1242: {
1243: titleOpen = false;
1244: titleHandled = true;
1245:
1246: char[] a = new char[ title.length() ];
1247: title.getChars(0, a.length, a, 0);
1248: handleTitle(a);
1249: }
1250: }
1251:
1252:
1259: void _handleStartTag(TagElement tag)
1260: {
1261: validator.openTag(tag, attributes);
1262: startingTag(tag);
1263: handleStartTag(tag);
1264:
1265: HTML.Tag h = tag.getHTMLTag();
1266:
1267: if (isBlock(h))
1268: optional(WS);
1269:
1270: if (h.isPreformatted())
1271: preformatted++;
1272:
1273: if (h == HTML.Tag.TITLE)
1274: {
1275: if (titleHandled)
1276: error("Repetetive <TITLE> tag");
1277: titleOpen = true;
1278: titleHandled = false;
1279: }
1280: }
1281:
1282:
1286: private void forciblyCloseTheTag()
1287: throws ParseException
1288: {
1289: int closeAt = 0;
1290: buffer.setLength(0);
1291:
1292: ahead:
1293: for (int i = 1; i < 100; i++)
1294: {
1295: t = getTokenAhead(i - 1);
1296: if (t.kind == EOF || t.kind == BEGIN)
1297: break ahead;
1298: if (t.kind == END)
1299: {
1300:
1301: closeAt = i;
1302: break ahead;
1303: }
1304: }
1305: if (closeAt > 0)
1306: {
1307: buffer.append("Ignoring '");
1308: for (int i = 1; i <= closeAt; i++)
1309: {
1310: t = getNextToken();
1311: append(t);
1312: }
1313: buffer.append('\'');
1314: error(buffer.toString());
1315: }
1316: }
1317:
1318:
1322: private void handleComment()
1323: {
1324: char[] a = new char[ buffer.length() ];
1325: buffer.getChars(0, a.length, a, 0);
1326: handleComment(a);
1327: }
1328:
1329: private TagElement makeTagElement(String name, boolean isSupposed)
1330: {
1331: Element e = (Element) dtd.elementHash.get(name.toLowerCase());
1332: if (e == null)
1333: {
1334: error("Unknown tag <" + name + ">");
1335: e = dtd.getElement(name);
1336: e.name = name.toUpperCase();
1337: e.index = -1;
1338: }
1339:
1340: if (!documentTags.contains(e.name))
1341: {
1342: markFirstTime(e);
1343: documentTags.add(e.name);
1344: }
1345:
1346: return makeTag(e, isSupposed);
1347: }
1348:
1349:
1355: private void readTillTokenE(int till)
1356: throws ParseException
1357: {
1358: buffer.setLength(0);
1359: read:
1360: while (true)
1361: {
1362: t = getNextToken();
1363: if (t.kind == Constants.ENTITY)
1364: {
1365: resolveAndAppendEntity(t);
1366: }
1367: else if (t.kind == EOF)
1368: {
1369: error("unexpected eof", t);
1370: break read;
1371: }
1372: else if (t.kind == till)
1373: break read;
1374: else if (t.kind == WS)
1375: {
1376:
1377: String s = t.getImage();
1378: char c;
1379: for (int i = 0; i < s.length(); i++)
1380: {
1381: c = s.charAt(i);
1382: if (c == '\r')
1383: buffer.append(' ');
1384: else if (c == '\n')
1385: { }
1386: else if (c == '\t')
1387: buffer.append(' ');
1388: else
1389: buffer.append(c);
1390: }
1391: }
1392: else
1393: append(t);
1394: }
1395: }
1396:
1397:
1401: private void resolveAndAppendEntity(Token entity)
1402: {
1403: switch (entity.category)
1404: {
1405: case ENTITY_NAMED :
1406: buffer.append(resolveNamedEntity(entity.getImage()));
1407: break;
1408:
1409: case ENTITY_NUMERIC :
1410: buffer.append(resolveNumericEntity(entity.getImage()));
1411: break;
1412:
1413: default :
1414: throw new AssertionError("Invalid entity category " +
1415: entity.category
1416: );
1417: }
1418: }
1419:
1420:
1428: private void restOfTag(boolean closing, Token name, Token start)
1429: throws ParseException
1430: {
1431: boolean end = false;
1432: Token next;
1433:
1434: optional(WS);
1435:
1436: readAttributes(name.getImage());
1437:
1438: optional(WS);
1439:
1440: next = getTokenAhead();
1441: if (next.kind == END)
1442: {
1443: mustBe(END);
1444: end = true;
1445: }
1446:
1447: hTag = new Token(start, next);
1448:
1449: if (!end)
1450: {
1451:
1452:
1453: if (dtd.elementHash.get(name.getImage().toLowerCase()) == null &&
1454: backupMode
1455: )
1456: {
1457: error("Errors in tag body and unknown tag name. " +
1458: "Treating the tag as a text."
1459: );
1460: reset();
1461:
1462: hTag = mustBe(BEGIN);
1463: buffer.setLength(0);
1464: buffer.append(hTag.getImage());
1465: CDATA(false);
1466: return;
1467: }
1468: else
1469: {
1470: error("Forcibly closing invalid parameter list");
1471: forciblyCloseTheTag();
1472: }
1473: }
1474:
1475: if (closing)
1476: {
1477: endTag(false);
1478: _handleEndTag(makeTagElement(name.getImage(), false));
1479: }
1480: else
1481: {
1482: TagElement te = makeTagElement(name.getImage(), false);
1483: if (te.getElement().type == DTDConstants.EMPTY)
1484: _handleEmptyTag(te);
1485: else
1486: {
1487:
1488:
1489: optional(WS);
1490: _handleStartTag(te);
1491: }
1492: }
1493: }
1494:
1495:
1501: private void startingTag(TagElement tag)
1502: {
1503: try
1504: {
1505: startTag(tag);
1506: }
1507: catch (ChangedCharSetException cax)
1508: {
1509: error("Invalid change of charset");
1510: }
1511: }
1512:
1513: private void ws_error()
1514: {
1515: error("Whitespace here is not permitted");
1516: }
1517:
1518:
1528: private boolean isBlock(HTML.Tag tag)
1529: {
1530: return tag.isBlock() || tag == HTML.Tag.STYLE || tag == HTML.Tag.FRAME;
1531: }
1532: }