com.lowagie.text.pdf
Class PdfReader

java.lang.Object
  |
  +--com.lowagie.text.pdf.PdfReader
Direct Known Subclasses:
FdfReader

public class PdfReader
extends Object

Reads a PDF document and prepares it to import pages to our document. This class is thread safe; this means that a single instance can serve as many output documents as needed and can even be static.

Author:
Paulo Soares (psoares@consiste.pt)

Field Summary
protected  PRAcroForm acroForm
           
protected  PdfDictionary catalog
           
protected  PdfEncryption decrypt
           
protected  boolean encrypted
           
protected  int eofPos
           
protected  int freeXref
           
protected  int lastXref
           
protected  int objGen
           
protected  int objNum
           
protected  ArrayList pageInh
           
(package private) static PdfName[] pageInhCandidates
           
protected  PRIndirectReference[] pageRefs
           
protected  PdfDictionary[] pages
           
protected  int pagesCount
           
protected  byte[] password
           
protected  char pdfVersion
           
protected  boolean rebuilt
           
protected  boolean sharedStreams
           
protected  ArrayList strings
           
protected  boolean tampered
           
protected  PRTokeniser tokens
           
protected  PdfDictionary trailer
           
protected  int[] xref
           
protected  PdfObject[] xrefObj
           
 
Constructor Summary
PdfReader(byte[] pdfIn)
          Reads and parses a PDF document.
PdfReader(byte[] pdfIn, byte[] ownerPassword)
           
PdfReader(String filename)
          Reads and parses a PDF document.
PdfReader(String filename, byte[] ownerPassword)
           
PdfReader(URL url)
          Reads and parses a PDF document.
PdfReader(URL url, byte[] ownerPassword)
           
 
Method Summary
static byte[] ASCII85Decode(byte[] in)
           
static byte[] ASCIIHexDecode(byte[] in)
           
 void eliminateSharedStreams()
          Eliminates shared streams if they exist.
static byte[] FlateDecode(byte[] in)
           
static byte[] FlateDecode(byte[] in, boolean strict)
           
 PRAcroForm getAcroForm()
          Returns the document's acroform, if it has one.
 PdfDictionary getCatalog()
          Returns the document's catalog.
 Rectangle getCropBox(int index)
          Gets the crop box without taking rotation into account.
(package private)  PdfEncryption getDecrypt()
           
 int getEofPos()
           
 HashMap getInfo()
          Returns the content of the document information dictionary as a HashMap of String.
 int getLastXref()
           
 byte[] getMetadata()
          Gets the XML metadata.
static Rectangle getNormalizedRectangle(PdfArray box)
           
 int getNumberOfPages()
          Gets the number of pages in the document.
 byte[] getPageContent(int pageNum, RandomAccessFileOrArray file)
           
 PdfDictionary getPageN(int pageNum)
           
 PRIndirectReference getPageOrigRef(int pageNum)
           
 int getPageRotation(int index)
          Gets the page rotation.
 Rectangle getPageSize(int index)
          Gets the page size without taking rotation into account.
 Rectangle getPageSizeWithRotation(int index)
          Gets the page size, taking rotation into account.
static PdfObject getPdfObject(PdfObject obj)
           
protected  PdfReaderInstance getPdfReaderInstance(PdfWriter writer)
           
 char getPdfVersion()
           
 RandomAccessFileOrArray getSafeFile()
          Gets a new file instance of the original PDF document.
static byte[] getStreamBytes(PRStream stream, RandomAccessFileOrArray file)
           
 boolean isEncrypted()
           
 boolean isRebuilt()
          Checks if the document had errors and was rebuilt.
 boolean isTampered()
           
protected  void iteratePages(PdfDictionary page)
           
(package private) static PdfObject killIndirect(PdfObject obj)
           
protected  void killXref(PdfObject obj)
           
static byte[] LZWDecode(byte[] in)
           
protected  void popPageAttributes()
           
protected  void pushPageAttributes(PdfDictionary nodePages)
           
protected  PdfArray readArray()
           
private  void readDecryptedDocObj()
           
protected  PdfDictionary readDictionary()
           
protected  void readDocObj()
           
protected  void readPages()
           
protected  void readPdf()
           
protected  PdfObject readPRObject()
           
protected  void readXref()
           
protected  void readXrefSection()
           
protected  void rebuildXref()
           
 void setPageContent(int pageNum, byte[] content)
           
 void setTampered(boolean tampered)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

pageInhCandidates

static final PdfName[] pageInhCandidates

tokens

protected PRTokeniser tokens

xref

protected int[] xref

xrefObj

protected PdfObject[] xrefObj

trailer

protected PdfDictionary trailer

pages

protected PdfDictionary[] pages

catalog

protected PdfDictionary catalog

pageRefs

protected PRIndirectReference[] pageRefs

acroForm

protected PRAcroForm acroForm

pageInh

protected ArrayList pageInh

pagesCount

protected int pagesCount

encrypted

protected boolean encrypted

rebuilt

protected boolean rebuilt

freeXref

protected int freeXref

tampered

protected boolean tampered

lastXref

protected int lastXref

eofPos

protected int eofPos

pdfVersion

protected char pdfVersion

decrypt

protected PdfEncryption decrypt

password

protected byte[] password

objNum

protected int objNum

objGen

protected int objGen

strings

protected ArrayList strings

sharedStreams

protected boolean sharedStreams
Constructor Detail

PdfReader

public PdfReader(String filename)
          throws IOException
Reads and parses a PDF document.

Parameters:
filename - the file name of the document
Throws:
IOException - on error

PdfReader

public PdfReader(String filename,
                 byte[] ownerPassword)
          throws IOException

PdfReader

public PdfReader(byte[] pdfIn)
          throws IOException
Reads and parses a PDF document.

Parameters:
pdfIn - the byte array with the document
Throws:
IOException - on error

PdfReader

public PdfReader(byte[] pdfIn,
                 byte[] ownerPassword)
          throws IOException

PdfReader

public PdfReader(URL url)
          throws IOException
Reads and parses a PDF document.

Parameters:
url - the URL of the document
Throws:
IOException - on error

PdfReader

public PdfReader(URL url,
                 byte[] ownerPassword)
          throws IOException
Method Detail

getSafeFile

public RandomAccessFileOrArray getSafeFile()
Gets a new file instance of the original PDF document.

Returns:
a new file instance of the original PDF document

getPdfReaderInstance

protected PdfReaderInstance getPdfReaderInstance(PdfWriter writer)

getNumberOfPages

public int getNumberOfPages()
Gets the number of pages in the document.

Returns:
the number of pages in the document

getCatalog

public PdfDictionary getCatalog()
Returns the document's catalog. This dictionary is not a copy, any changes will be reflected in the catalog.

Returns:
the document's catalog

getAcroForm

public PRAcroForm getAcroForm()
Returns the document's acroform, if it has one.

Returns:
he document's acroform

getPageRotation

public int getPageRotation(int index)
Gets the page rotation. This value can be 0, 90, 180 or 270.

Parameters:
index - the page number. The first page is 1
Returns:
the page rotation

getPageSizeWithRotation

public Rectangle getPageSizeWithRotation(int index)
Gets the page size, taking rotation into account. This is a Rectangle with the value of the /MediaBox and the /Rotate key.

Parameters:
index - the page number. The first page is 1
Returns:
a Rectangle

getPageSize

public Rectangle getPageSize(int index)
Gets the page size without taking rotation into account. This is the value of the /MediaBox key.

Parameters:
index - the page number. The first page is 1
Returns:
the page size

getCropBox

public Rectangle getCropBox(int index)
Gets the crop box without taking rotation into account. This is the value of the /CropBox key. The crop box is the part of the document to be displayed or printed. It usually is the same as the media box but may be smaller.

Parameters:
index - the page number. The first page is 1
Returns:
the crop box

getInfo

public HashMap getInfo()
Returns the content of the document information dictionary as a HashMap of String.

Returns:
content of the document information dictionary

getNormalizedRectangle

public static Rectangle getNormalizedRectangle(PdfArray box)

readPdf

protected void readPdf()
                throws IOException
IOException

readDecryptedDocObj

private void readDecryptedDocObj()
                          throws IOException
IOException

getPdfObject

public static PdfObject getPdfObject(PdfObject obj)

pushPageAttributes

protected void pushPageAttributes(PdfDictionary nodePages)

popPageAttributes

protected void popPageAttributes()

iteratePages

protected void iteratePages(PdfDictionary page)
                     throws IOException
IOException

readPages

protected void readPages()
                  throws IOException
IOException

readDocObj

protected void readDocObj()
                   throws IOException
IOException

killIndirect

static PdfObject killIndirect(PdfObject obj)

readXref

protected void readXref()
                 throws IOException
IOException

readXrefSection

protected void readXrefSection()
                        throws IOException
IOException

rebuildXref

protected void rebuildXref()
                    throws IOException
IOException

readDictionary

protected PdfDictionary readDictionary()
                                throws IOException
IOException

readArray

protected PdfArray readArray()
                      throws IOException
IOException

readPRObject

protected PdfObject readPRObject()
                          throws IOException
IOException

FlateDecode

public static byte[] FlateDecode(byte[] in)

FlateDecode

public static byte[] FlateDecode(byte[] in,
                                 boolean strict)

ASCIIHexDecode

public static byte[] ASCIIHexDecode(byte[] in)

ASCII85Decode

public static byte[] ASCII85Decode(byte[] in)

LZWDecode

public static byte[] LZWDecode(byte[] in)

isRebuilt

public boolean isRebuilt()
Checks if the document had errors and was rebuilt.

Returns:
true if rebuilt.

getPageN

public PdfDictionary getPageN(int pageNum)

getPageOrigRef

public PRIndirectReference getPageOrigRef(int pageNum)

getPageContent

public byte[] getPageContent(int pageNum,
                             RandomAccessFileOrArray file)
                      throws IOException
IOException

killXref

protected void killXref(PdfObject obj)

setPageContent

public void setPageContent(int pageNum,
                           byte[] content)
                    throws IOException
IOException

getStreamBytes

public static byte[] getStreamBytes(PRStream stream,
                                    RandomAccessFileOrArray file)
                             throws IOException
IOException

eliminateSharedStreams

public void eliminateSharedStreams()
Eliminates shared streams if they exist.


isTampered

public boolean isTampered()

setTampered

public void setTampered(boolean tampered)

getMetadata

public byte[] getMetadata()
                   throws IOException
Gets the XML metadata.

Returns:
the XML metadata
Throws:
IOException - on error

getLastXref

public int getLastXref()

getEofPos

public int getEofPos()

getPdfVersion

public char getPdfVersion()

isEncrypted

public boolean isEncrypted()

getDecrypt

PdfEncryption getDecrypt()