1:
37:
38: package ;
39:
40: import ;
41: import ;
42: import ;
43: import ;
44: import ;
45: import ;
46:
47:
51:
52:
57: public class XPathTokenizer
58: implements XPathParser.yyInput
59:
60: {
61:
62: static class XPathToken
63:
64: {
65:
66: int type;
67: String val;
68:
69: XPathToken (int type)
70: {
71: this (type, null);
72: }
73:
74: XPathToken (int type, String val)
75: {
76:
77: this.type = type;
78: this.val = val;
79: }
80:
81: public String getText ()
82: {
83: return val;
84: }
85:
86: public String toString ()
87: {
88: return val;
89: }
90:
91: }
92:
93: static final Map keywords = new TreeMap ();
94: static
95: {
96: keywords.put ("ancestor", new Integer (XPathParser.ANCESTOR));
97: keywords.put ("ancestor-or-self", new Integer (XPathParser.ANCESTOR_OR_SELF));
98: keywords.put ("attribute", new Integer (XPathParser.ATTRIBUTE));
99: keywords.put ("child", new Integer (XPathParser.CHILD));
100: keywords.put ("descendant", new Integer (XPathParser.DESCENDANT));
101: keywords.put ("descendant-or-self", new Integer (XPathParser.DESCENDANT_OR_SELF));
102: keywords.put ("following", new Integer (XPathParser.FOLLOWING));
103: keywords.put ("following-sibling", new Integer (XPathParser.FOLLOWING_SIBLING));
104: keywords.put ("namespace", new Integer (XPathParser.NAMESPACE));
105: keywords.put ("parent", new Integer (XPathParser.PARENT));
106: keywords.put ("preceding", new Integer (XPathParser.PRECEDING));
107: keywords.put ("preceding-sibling", new Integer (XPathParser.PRECEDING_SIBLING));
108: keywords.put ("self", new Integer (XPathParser.SELF));
109: keywords.put ("div", new Integer (XPathParser.DIV));
110: keywords.put ("mod", new Integer (XPathParser.MOD));
111: keywords.put ("or", new Integer (XPathParser.OR));
112: keywords.put ("and", new Integer (XPathParser.AND));
113: keywords.put ("comment", new Integer (XPathParser.COMMENT));
114: keywords.put ("processing-instruction", new Integer (XPathParser.PROCESSING_INSTRUCTION));
115: keywords.put ("text", new Integer (XPathParser.TEXT));
116: keywords.put ("node", new Integer (XPathParser.NODE));
117: }
118:
119: Reader in;
120: XPathToken token;
121: XPathToken lastToken;
122:
123: public XPathTokenizer (String expr)
124: {
125: this (new StringReader (expr));
126: }
127:
128: XPathTokenizer (Reader in)
129: {
130: this.in = in.markSupported () ? in : new BufferedReader (in);
131: }
132:
133:
154:
155: public boolean advance ()
156: throws IOException
157: {
158: lastToken = token;
159: int c = in.read ();
160: switch (c)
161: {
162: case -1:
163: return false;
164: case 0x20:
165: case 0x09:
166: case 0x0d:
167: case 0x0a:
168: return advance ();
169: case 0x22:
170: case 0x27:
171: token = consume_literal (c);
172: break;
173: case 0x28:
174: token = new XPathToken (XPathParser.LP);
175: break;
176: case 0x29:
177: token = new XPathToken (XPathParser.RP);
178: break;
179: case 0x5b:
180: token = new XPathToken (XPathParser.LB);
181: break;
182: case 0x5d:
183: token = new XPathToken (XPathParser.RB);
184: break;
185: case 0x2c:
186: token = new XPathToken (XPathParser.COMMA);
187: break;
188: case 0x7c:
189: token = new XPathToken (XPathParser.PIPE);
190: break;
191: case 0x2f:
192: in.mark (1);
193: int d1 = in.read ();
194: if (d1 == 0x2f)
195: {
196: token = new XPathToken (XPathParser.DOUBLE_SLASH);
197: }
198: else
199: {
200: in.reset ();
201: token = new XPathToken (XPathParser.SLASH);
202: }
203: break;
204: case 0x3d:
205: token = new XPathToken (XPathParser.EQ);
206: break;
207: case 0x21:
208: in.mark (1);
209: int d2 = in.read ();
210: if (d2 == 0x3d)
211: {
212: token = new XPathToken (XPathParser.NE);
213: }
214: else
215: {
216: in.reset ();
217: token = new XPathToken (XPathParser.yyErrorCode);
218: }
219: break;
220: case 0x3e:
221: in.mark (1);
222: int d3 = in.read ();
223: if (d3 == 0x3d)
224: {
225: token = new XPathToken (XPathParser.GTE);
226: }
227: else
228: {
229: in.reset ();
230: token = new XPathToken (XPathParser.GT);
231: }
232: break;
233: case 0x3c:
234: in.mark (1);
235: int d4 = in.read ();
236: if (d4 == 0x3d)
237: {
238: token = new XPathToken (XPathParser.LTE);
239: }
240: else
241: {
242: in.reset ();
243: token = new XPathToken (XPathParser.LT);
244: }
245: break;
246: case 0x2b:
247: token = new XPathToken (XPathParser.PLUS);
248: break;
249: case 0x2d:
250: token = new XPathToken (XPathParser.MINUS);
251: break;
252: case 0x40:
253: token = new XPathToken (XPathParser.AT);
254: break;
255: case 0x2a:
256: token = new XPathToken (XPathParser.STAR);
257: break;
258: case 0x24:
259: token = new XPathToken (XPathParser.DOLLAR);
260: break;
261: case 0x3a:
262: in.mark (1);
263: int d5 = in.read ();
264: if (d5 == 0x3a)
265: {
266: token = new XPathToken (XPathParser.DOUBLE_COLON);
267: }
268: else
269: {
270: in.reset ();
271: token = new XPathToken (XPathParser.COLON);
272: }
273: break;
274: case 0x2e:
275: in.mark (1);
276: int d6 = in.read ();
277: if (d6 == 0x2e)
278: {
279: token = new XPathToken (XPathParser.DOUBLE_DOT);
280: }
281: else
282: {
283: in.reset ();
284: token = new XPathToken (XPathParser.DOT);
285: }
286: break;
287: default:
288: if (c >= 0x30 && c <= 0x39)
289: {
290: token = consume_digits (c);
291: }
292: else if (c == 0x5f || Character.isLetter ((char) c))
293: {
294: token = consume_name (c);
295: }
296: else
297: {
298: token = new XPathToken (XPathParser.yyErrorCode);
299: }
300: }
301: return true;
302: }
303:
304: public int token ()
305: {
306: return token.type;
307: }
308:
309: public Object value ()
310: {
311: return token.val;
312: }
313:
314: XPathToken consume_literal (int delimiter)
315: throws IOException
316: {
317: StringBuffer buf = new StringBuffer ();
318: while (true)
319: {
320: int c = in.read ();
321: if (c == -1)
322: {
323: return new XPathToken (XPathParser.yyErrorCode);
324: }
325: else if (c == delimiter)
326: {
327: return new XPathToken (XPathParser.LITERAL, buf.toString ());
328: }
329: else
330: {
331: buf.append ((char) c);
332: }
333: }
334: }
335:
336: XPathToken consume_digits (int c)
337: throws IOException
338: {
339: StringBuffer buf = new StringBuffer ();
340: buf.append ((char) c);
341: while (true)
342: {
343: in.mark (1);
344: c = in.read ();
345: if (c >= 0x30 && c <= 0x39)
346: {
347: buf.append ((char) c);
348: }
349: else
350: {
351: in.reset ();
352: return new XPathToken (XPathParser.DIGITS, buf.toString ());
353: }
354: }
355: }
356:
357: XPathToken consume_name (int c)
358: throws IOException
359: {
360: StringBuffer buf = new StringBuffer ();
361: buf.append ((char) c);
362: while (true)
363: {
364: in.mark (1);
365: c = in.read ();
366: if (isNameChar (c))
367: {
368: buf.append ((char) c);
369: }
370: else
371: {
372: in.reset ();
373: String name = buf.toString ();
374: Integer keyword = (Integer) keywords.get (name);
375: if (keyword == null)
376: {
377: return new XPathToken (XPathParser.NAME, name);
378: }
379: else
380: {
381: int val = keyword.intValue ();
382: switch (val)
383: {
384: case XPathParser.NODE:
385: case XPathParser.COMMENT:
386: case XPathParser.TEXT:
387: case XPathParser.PROCESSING_INSTRUCTION:
388:
389: in.mark (1);
390: do
391: {
392: c = in.read ();
393: }
394: while (c == 0x20 || c == 0x09);
395: if (c != 0x28)
396: {
397: in.reset ();
398: return new XPathToken (XPathParser.NAME, name);
399: }
400: break;
401: case XPathParser.CHILD:
402: case XPathParser.PARENT:
403: case XPathParser.SELF:
404: case XPathParser.DESCENDANT:
405: case XPathParser.ANCESTOR:
406: case XPathParser.DESCENDANT_OR_SELF:
407: case XPathParser.ANCESTOR_OR_SELF:
408: case XPathParser.ATTRIBUTE:
409: case XPathParser.NAMESPACE:
410: case XPathParser.FOLLOWING:
411: case XPathParser.FOLLOWING_SIBLING:
412: case XPathParser.PRECEDING:
413: case XPathParser.PRECEDING_SIBLING:
414:
415: in.mark(1);
416: do
417: {
418: c = in.read();
419: }
420: while (c == 0x20 || c == 0x09);
421: if (c == 0x3a)
422: {
423: c = in.read();
424: if (c == 0x3a)
425: {
426: in.reset();
427: return new XPathToken(val);
428: }
429: }
430: in.reset();
431: return new XPathToken(XPathParser.NAME, name);
432: case XPathParser.DIV:
433: case XPathParser.MOD:
434:
435: if (lastToken == null)
436: {
437: return new XPathToken(XPathParser.NAME, name);
438: }
439: switch (lastToken.type)
440: {
441: case XPathParser.LP:
442: case XPathParser.LB:
443: case XPathParser.COMMA:
444: case XPathParser.PIPE:
445: case XPathParser.EQ:
446: case XPathParser.NE:
447: case XPathParser.GT:
448: case XPathParser.LT:
449: case XPathParser.GTE:
450: case XPathParser.LTE:
451: case XPathParser.PLUS:
452: case XPathParser.MINUS:
453: case XPathParser.STAR:
454: case XPathParser.AT:
455: case XPathParser.DOLLAR:
456: case XPathParser.COLON:
457: case XPathParser.DOUBLE_COLON:
458: case XPathParser.DIV:
459: case XPathParser.MOD:
460: case XPathParser.OR:
461: case XPathParser.AND:
462: case XPathParser.SLASH:
463: return new XPathToken(XPathParser.NAME, name);
464: }
465: break;
466: }
467: return new XPathToken (val);
468: }
469: }
470: }
471: }
472:
473: boolean isNameChar (int c)
474: {
475:
476: return (c == 0x5f
477: || c == 0x2d
478: || c == 0x2e
479: || (c >= 0x30 && c <= 0x39)
480:
481: || (c >= 0x0300 && c <= 0x0345)
482: || (c >= 0x0360 && c <= 0x0361)
483: || (c >= 0x0483 && c <= 0x0486)
484: || (c >= 0x0591 && c <= 0x05A1)
485: || (c >= 0x05A3 && c <= 0x05B9)
486: || (c >= 0x05BB && c <= 0x05BD)
487: || c == 0x05BF
488: || (c >= 0x05C1 && c <= 0x05C2)
489: || c == 0x05C4
490: || (c >= 0x064B && c <= 0x0652)
491: || c == 0x0670
492: || (c >= 0x06D6 && c <= 0x06DC)
493: || (c >= 0x06DD && c <= 0x06DF)
494: || (c >= 0x06E0 && c <= 0x06E4)
495: || (c >= 0x06E7 && c <= 0x06E8)
496: || (c >= 0x06EA && c <= 0x06ED)
497: || (c >= 0x0901 && c <= 0x0903)
498: || c == 0x093C
499: || (c >= 0x093E && c <= 0x094C)
500: || c == 0x094D
501: || (c >= 0x0951 && c <= 0x0954)
502: || (c >= 0x0962 && c <= 0x0963)
503: || (c >= 0x0981 && c <= 0x0983)
504: || c == 0x09BC
505: || c == 0x09BE
506: || c == 0x09BF
507: || (c >= 0x09C0 && c <= 0x09C4)
508: || (c >= 0x09C7 && c <= 0x09C8)
509: || (c >= 0x09CB && c <= 0x09CD)
510: || c == 0x09D7
511: || (c >= 0x09E2 && c <= 0x09E3)
512: || c == 0x0A02
513: || c == 0x0A3C
514: || c == 0x0A3E
515: || c == 0x0A3F
516: || (c >= 0x0A40 && c <= 0x0A42)
517: || (c >= 0x0A47 && c <= 0x0A48)
518: || (c >= 0x0A4B && c <= 0x0A4D)
519: || (c >= 0x0A70 && c <= 0x0A71)
520: || (c >= 0x0A81 && c <= 0x0A83)
521: || c == 0x0ABC
522: || (c >= 0x0ABE && c <= 0x0AC5)
523: || (c >= 0x0AC7 && c <= 0x0AC9)
524: || (c >= 0x0ACB && c <= 0x0ACD)
525: || (c >= 0x0B01 && c <= 0x0B03)
526: || c == 0x0B3C
527: || (c >= 0x0B3E && c <= 0x0B43)
528: || (c >= 0x0B47 && c <= 0x0B48)
529: || (c >= 0x0B4B && c <= 0x0B4D)
530: || (c >= 0x0B56 && c <= 0x0B57)
531: || (c >= 0x0B82 && c <= 0x0B83)
532: || (c >= 0x0BBE && c <= 0x0BC2)
533: || (c >= 0x0BC6 && c <= 0x0BC8)
534: || (c >= 0x0BCA && c <= 0x0BCD)
535: || c == 0x0BD7
536: || (c >= 0x0C01 && c <= 0x0C03)
537: || (c >= 0x0C3E && c <= 0x0C44)
538: || (c >= 0x0C46 && c <= 0x0C48)
539: || (c >= 0x0C4A && c <= 0x0C4D)
540: || (c >= 0x0C55 && c <= 0x0C56)
541: || (c >= 0x0C82 && c <= 0x0C83)
542: || (c >= 0x0CBE && c <= 0x0CC4)
543: || (c >= 0x0CC6 && c <= 0x0CC8)
544: || (c >= 0x0CCA && c <= 0x0CCD)
545: || (c >= 0x0CD5 && c <= 0x0CD6)
546: || (c >= 0x0D02 && c <= 0x0D03)
547: || (c >= 0x0D3E && c <= 0x0D43)
548: || (c >= 0x0D46 && c <= 0x0D48)
549: || (c >= 0x0D4A && c <= 0x0D4D)
550: || c == 0x0D57
551: || c == 0x0E31
552: || (c >= 0x0E34 && c <= 0x0E3A)
553: || (c >= 0x0E47 && c <= 0x0E4E)
554: || c == 0x0EB1
555: || (c >= 0x0EB4 && c <= 0x0EB9)
556: || (c >= 0x0EBB && c <= 0x0EBC)
557: || (c >= 0x0EC8 && c <= 0x0ECD)
558: || (c >= 0x0F18 && c <= 0x0F19)
559: || c == 0x0F35
560: || c == 0x0F37
561: || c == 0x0F39
562: || c == 0x0F3E
563: || c == 0x0F3F
564: || (c >= 0x0F71 && c <= 0x0F84)
565: || (c >= 0x0F86 && c <= 0x0F8B)
566: || (c >= 0x0F90 && c <= 0x0F95)
567: || c == 0x0F97
568: || (c >= 0x0F99 && c <= 0x0FAD)
569: || (c >= 0x0FB1 && c <= 0x0FB7)
570: || c == 0x0FB9
571: || (c >= 0x20D0 && c <= 0x20DC)
572: || c == 0x20E1
573: || (c >= 0x302A && c <= 0x302F)
574: || c == 0x3099
575: || c == 0x309A
576:
577: || c == 0x00B7
578: || c == 0x02D0
579: || c == 0x02D1
580: || c == 0x0387
581: || c == 0x0640
582: || c == 0x0E46
583: || c == 0x0EC6
584: || c == 0x3005
585: || (c >= 0x3031 && c <= 0x3035)
586: || (c >= 0x309D && c <= 0x309E)
587: || (c >= 0x30FC && c <= 0x30FE)
588:
589: || Character.isLetter ((char) c));
590: }
591:
592: }