1    
2    /* ====================================================================
3     * The Apache Software License, Version 1.1
4     *
5     * Copyright (c) 2002 The Apache Software Foundation.  All rights
6     * reserved.
7     *
8     * Redistribution and use in source and binary forms, with or without
9     * modification, are permitted provided that the following conditions
10    * are met:
11    *
12    * 1. Redistributions of source code must retain the above copyright
13    *    notice, this list of conditions and the following disclaimer.
14    *
15    * 2. Redistributions in binary form must reproduce the above copyright
16    *    notice, this list of conditions and the following disclaimer in
17    *    the documentation and/or other materials provided with the
18    *    distribution.
19    *
20    * 3. The end-user documentation included with the redistribution,
21    *    if any, must include the following acknowledgment:
22    *       "This product includes software developed by the
23    *        Apache Software Foundation (http://www.apache.org/)."
24    *    Alternately, this acknowledgment may appear in the software itself,
25    *    if and wherever such third-party acknowledgments normally appear.
26    *
27    * 4. The names "Apache" and "Apache Software Foundation" and
28    *    "Apache POI" must not be used to endorse or promote products
29    *    derived from this software without prior written permission. For
30    *    written permission, please contact apache@apache.org.
31    *
32    * 5. Products derived from this software may not be called "Apache",
33    *    "Apache POI", nor may "Apache" appear in their name, without
34    *    prior written permission of the Apache Software Foundation.
35    *
36    * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37    * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38    * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39    * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40    * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41    * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42    * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43    * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44    * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45    * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46    * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47    * SUCH DAMAGE.
48    * ====================================================================
49    *
50    * This software consists of voluntary contributions made by many
51    * individuals on behalf of the Apache Software Foundation.  For more
52    * information on the Apache Software Foundation, please see
53    * <http://www.apache.org/>.
54    */
55   
56   package org.apache.poi.poifs.filesystem;
57   
58   import java.io.*;
59   
60   /**
61    * This class provides methods to read a DocumentEntry managed by a
62    * Filesystem instance.
63    *
64    * @author Marc Johnson (mjohnson at apache dot org)
65    */
66   
67   public class DocumentInputStream
68       extends InputStream
69   {
70   
71       // current offset into the Document
72       private int              _current_offset;
73   
74       // current marked offset into the Document (used by mark and
75       // reset)
76       private int              _marked_offset;
77   
78       // the Document's size
79       private int              _document_size;
80   
81       // have we been closed?
82       private boolean          _closed;
83   
84       // the actual Document
85       private POIFSDocument    _document;
86   
87       // buffer used to read one byte at a time
88       private byte[]           _tiny_buffer;
89   
90       // returned by read operations if we're at end of document
91       static private final int EOD = -1;
92   
93       /**
94        * Create an InputStream from the specified DocumentEntry
95        *
96        * @param document the DocumentEntry to be read
97        *
98        * @exception IOException if the DocumentEntry cannot be opened
99        *            (like, maybe it has been deleted?)
100       */
101  
102      public DocumentInputStream(final DocumentEntry document)
103          throws IOException
104      {
105          _current_offset = 0;
106          _marked_offset  = 0;
107          _document_size  = document.getSize();
108          _closed         = false;
109          _tiny_buffer    = null;
110          if (document instanceof DocumentNode)
111          {
112              _document = (( DocumentNode ) document).getDocument();
113          }
114          else
115          {
116              throw new IOException("Cannot open internal document storage");
117          }
118      }
119  
120      /**
121       * Create an InputStream from the specified Document
122       *
123       * @param document the Document to be read
124       *
125       * @exception IOException if the DocumentEntry cannot be opened
126       *            (like, maybe it has been deleted?)
127       */
128  
129      public DocumentInputStream(final POIFSDocument document)
130          throws IOException
131      {
132          _current_offset = 0;
133          _marked_offset  = 0;
134          _document_size  = document.getSize();
135          _closed         = false;
136          _tiny_buffer    = null;
137          _document       = document;
138      }
139  
140      /**
141       * Returns the number of bytes that can be read (or skipped over)
142       * from this input stream without blocking by the next caller of a
143       * method for this input stream. The next caller might be the same
144       * thread or or another thread.
145       *
146       * @return the number of bytes that can be read from this input
147       *         stream without blocking.
148       *
149       * @exception IOException on error (such as the stream has been
150       *            closed)
151       */
152  
153      public int available()
154          throws IOException
155      {
156          dieIfClosed();
157          return _document_size - _current_offset;
158      }
159  
160      /**
161       * Closes this input stream and releases any system resources
162       * associated with the stream.
163       *
164       * @exception IOException
165       */
166  
167      public void close()
168          throws IOException
169      {
170          _closed = true;
171      }
172  
173      /**
174       * Marks the current position in this input stream. A subsequent
175       * call to the reset method repositions this stream at the last
176       * marked position so that subsequent reads re-read the same
177       * bytes.
178       * <p>
179       * The readlimit arguments tells this input stream to allow that
180       * many bytes to be read before the mark position gets
181       * invalidated. This implementation, however, does not care.
182       * <p>
183       * The general contract of mark is that, if the method
184       * markSupported returns true, the stream somehow remembers all
185       * the bytes read after the call to mark and stands ready to
186       * supply those same bytes again if and whenever the method reset
187       * is called. However, the stream is not required to remember any
188       * data at all if more than readlimit bytes are read from the
189       * stream before reset is called. But this stream will.
190       *
191       * @param ignoredReadlimit the maximum limit of bytes that can be
192       *                         read before the mark position becomes
193       *                         invalid. Ignored by this
194       *                         implementation.
195       */
196  
197      public void mark(int ignoredReadlimit)
198      {
199          _marked_offset = _current_offset;
200      }
201  
202      /**
203       * Tests if this input stream supports the mark and reset methods.
204       *
205       * @return true
206       */
207  
208      public boolean markSupported()
209      {
210          return true;
211      }
212  
213      /**
214       * Reads the next byte of data from the input stream. The value
215       * byte is returned as an int in the range 0 to 255. If no byte is
216       * available because the end of the stream has been reached, the
217       * value -1 is returned. The definition of this method in
218       * java.io.InputStream allows this method to block, but it won't.
219       *
220       * @return the next byte of data, or -1 if the end of the stream
221       *         is reached.
222       *
223       * @exception IOException
224       */
225  
226      public int read()
227          throws IOException
228      {
229          dieIfClosed();
230          if (atEOD())
231          {
232              return EOD;
233          }
234          if (_tiny_buffer == null)
235          {
236              _tiny_buffer = new byte[ 1 ];
237          }
238          _document.read(_tiny_buffer, _current_offset++);
239          return ((int)_tiny_buffer[ 0 ]) & 0x000000FF;
240      }
241  
242      /**
243       * Reads some number of bytes from the input stream and stores
244       * them into the buffer array b. The number of bytes actually read
245       * is returned as an integer. The definition of this method in
246       * java.io.InputStream allows this method to block, but it won't.
247       * <p>
248       * If b is null, a NullPointerException is thrown. If the length
249       * of b is zero, then no bytes are read and 0 is returned;
250       * otherwise, there is an attempt to read at least one byte. If no
251       * byte is available because the stream is at end of file, the
252       * value -1 is returned; otherwise, at least one byte is read and
253       * stored into b.
254       * <p>
255       * The first byte read is stored into element b[0], the next one
256       * into b[1], and so on. The number of bytes read is, at most,
257       * equal to the length of b. Let k be the number of bytes actually
258       * read; these bytes will be stored in elements b[0] through
259       * b[k-1], leaving elements b[k] through b[b.length-1] unaffected.
260       * <p>
261       * If the first byte cannot be read for any reason other than end
262       * of file, then an IOException is thrown. In particular, an
263       * IOException is thrown if the input stream has been closed.
264       * <p>
265       * The read(b) method for class InputStream has the same effect as:
266       * <p>
267       * <code>read(b, 0, b.length)</code>
268       *
269       * @param b the buffer into which the data is read.
270       *
271       * @return the total number of bytes read into the buffer, or -1
272       *         if there is no more data because the end of the stream
273       *         has been reached.
274       *
275       * @exception IOException
276       * @exception NullPointerException
277       */
278  
279      public int read(final byte [] b)
280          throws IOException, NullPointerException
281      {
282          return read(b, 0, b.length);
283      }
284  
285      /**
286       * Reads up to len bytes of data from the input stream into an
287       * array of bytes. An attempt is made to read as many as len
288       * bytes, but a smaller number may be read, possibly zero. The
289       * number of bytes actually read is returned as an integer.
290       * <p>
291       * The definition of this method in java.io.InputStream allows it
292       * to block, but it won't.
293       * <p>
294       * If b is null, a NullPointerException is thrown.
295       * <p>
296       * If off is negative, or len is negative, or off+len is greater
297       * than the length of the array b, then an
298       * IndexOutOfBoundsException is thrown.
299       * <p>
300       * If len is zero, then no bytes are read and 0 is returned;
301       * otherwise, there is an attempt to read at least one byte. If no
302       * byte is available because the stream is at end of file, the
303       * value -1 is returned; otherwise, at least one byte is read and
304       * stored into b.
305       * <p>
306       * The first byte read is stored into element b[off], the next one
307       * into b[off+1], and so on. The number of bytes read is, at most,
308       * equal to len. Let k be the number of bytes actually read; these
309       * bytes will be stored in elements b[off] through b[off+k-1],
310       * leaving elements b[off+k] through b[off+len-1] unaffected.
311       * <p>
312       * In every case, elements b[0] through b[off] and elements
313       * b[off+len] through b[b.length-1] are unaffected.
314       * <p>
315       * If the first byte cannot be read for any reason other than end
316       * of file, then an IOException is thrown. In particular, an
317       * IOException is thrown if the input stream has been closed.
318       *
319       * @param b the buffer into which the data is read.
320       * @param off the start offset in array b at which the data is
321       *            written.
322       * @param len the maximum number of bytes to read.
323       *
324       * @return the total number of bytes read into the buffer, or -1
325       *         if there is no more data because the end of the stream
326       *         has been reached.
327       *
328       * @exception IOException
329       * @exception NullPointerException
330       * @exception IndexOutOfBoundsException
331       */
332  
333      public int read(final byte [] b, final int off, final int len)
334          throws IOException, NullPointerException, IndexOutOfBoundsException
335      {
336          dieIfClosed();
337          if (b == null)
338          {
339              throw new NullPointerException("buffer is null");
340          }
341          if ((off < 0) || (len < 0) || (b.length < (off + len)))
342          {
343              throw new IndexOutOfBoundsException(
344                  "can't read past buffer boundaries");
345          }
346          if (len == 0)
347          {
348              return 0;
349          }
350          if (atEOD())
351          {
352              return EOD;
353          }
354          int limit = Math.min(available(), len);
355  
356          if ((off == 0) && (limit == b.length))
357          {
358              _document.read(b, _current_offset);
359          }
360          else
361          {
362              byte[] buffer = new byte[ limit ];
363  
364              _document.read(buffer, _current_offset);
365              System.arraycopy(buffer, 0, b, off, limit);
366          }
367          _current_offset += limit;
368          return limit;
369      }
370  
371      /**
372       * Repositions this stream to the position at the time the mark
373       * method was last called on this input stream.
374       * <p>
375       * The general contract of reset is:
376       * <p>
377       * <ul>
378       *    <li>
379       *        If the method markSupported returns true, then:
380       *        <ul>
381       *            <li>
382       *                If the method mark has not been called since the
383       *                stream was created, or the number of bytes read
384       *                from the stream since mark was last called is
385       *                larger than the argument to mark at that last
386       *                call, then an IOException might be thrown.
387       *            </li>
388       *            <li>
389       *                If such an IOException is not thrown, then the
390       *                stream is reset to a state such that all the
391       *                bytes read since the most recent call to mark
392       *                (or since the start of the file, if mark has not
393       *                been called) will be resupplied to subsequent
394       *                callers of the read method, followed by any
395       *                bytes that otherwise would have been the next
396       *                input data as of the time of the call to reset.
397       *             </li>
398       *         </ul>
399       *     </li>
400       *     <li>
401       *         If the method markSupported returns false, then:
402       *         <ul>
403       *             <li>
404       *                 The call to reset may throw an IOException.
405       *             </li>
406       *             <li>
407       *                 If an IOException is not thrown, then the
408       *                 stream is reset to a fixed state that depends
409       *                 on the particular type of the input and how it
410       *                 was created. The bytes that will be supplied to
411       *                 subsequent callers of the read method depend on
412       *                 the particular type of the input stream.
413       *             </li>
414       *         </ul>
415       *     </li>
416       * </ul>
417       * <p>
418       * All well and good ... this class's markSupported method returns
419       * true and this method does not care whether you've called mark
420       * at all, or whether you've exceeded the number of bytes
421       * specified in the last call to mark. We're basically walking a
422       * byte array ... mark and reset to your heart's content.
423       */
424  
425      public void reset()
426      {
427          _current_offset = _marked_offset;
428      }
429  
430      /**
431       * Skips over and discards n bytes of data from this input
432       * stream. The skip method may, for a variety of reasons, end up
433       * skipping over some smaller number of bytes, possibly 0. This
434       * may result from any of a number of conditions; reaching end of
435       * file before n bytes have been skipped is only one
436       * possibility. The actual number of bytes skipped is returned. If
437       * n is negative, no bytes are skipped.
438       *
439       * @param n the number of bytes to be skipped.
440       *
441       * @return the actual number of bytes skipped.
442       *
443       * @exception IOException
444       */
445  
446      public long skip(final long n)
447          throws IOException
448      {
449          dieIfClosed();
450          if (n < 0)
451          {
452              return 0;
453          }
454          int new_offset = _current_offset + ( int ) n;
455  
456          if (new_offset < _current_offset)
457          {
458  
459              // wrap around in converting a VERY large long to an int
460              new_offset = _document_size;
461          }
462          else if (new_offset > _document_size)
463          {
464              new_offset = _document_size;
465          }
466          long rval = new_offset - _current_offset;
467  
468          _current_offset = new_offset;
469          return rval;
470      }
471  
472      private void dieIfClosed()
473          throws IOException
474      {
475          if (_closed)
476          {
477              throw new IOException(
478                  "cannot perform requested operation on a closed stream");
479          }
480      }
481  
482      private boolean atEOD()
483      {
484          return _current_offset == _document_size;
485      }
486  }   // end public class DocumentInputStream
487  
488