1    /* ====================================================================
2     * The Apache Software License, Version 1.1
3     *
4     * Copyright (c) 2002 The Apache Software Foundation.  All rights
5     * reserved.
6     *
7     * Redistribution and use in source and binary forms, with or without
8     * modification, are permitted provided that the following conditions
9     * are met:
10    *
11    * 1. Redistributions of source code must retain the above copyright
12    *    notice, this list of conditions and the following disclaimer.
13    *
14    * 2. Redistributions in binary form must reproduce the above copyright
15    *    notice, this list of conditions and the following disclaimer in
16    *    the documentation and/or other materials provided with the
17    *    distribution.
18    *
19    * 3. The end-user documentation included with the redistribution,
20    *    if any, must include the following acknowledgment:
21    *       "This product includes software developed by the
22    *        Apache Software Foundation (http://www.apache.org/)."
23    *    Alternately, this acknowledgment may appear in the software itself,
24    *    if and wherever such third-party acknowledgments normally appear.
25    *
26    * 4. The names "Apache" and "Apache Software Foundation" and
27    *    "Apache POI" must not be used to endorse or promote products
28    *    derived from this software without prior written permission. For
29    *    written permission, please contact apache@apache.org.
30    *
31    * 5. Products derived from this software may not be called "Apache",
32    *    "Apache POI", nor may "Apache" appear in their name, without
33    *    prior written permission of the Apache Software Foundation.
34    *
35    * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
36    * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
37    * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
38    * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
39    * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40    * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41    * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
42    * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43    * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
44    * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
45    * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46    * SUCH DAMAGE.
47    * ====================================================================
48    *
49    * This software consists of voluntary contributions made by many
50    * individuals on behalf of the Apache Software Foundation.  For more
51    * information on the Apache Software Foundation, please see
52    * <http://www.apache.org/>.
53    */
54   
55   package org.apache.poi.hssf.record;
56   
57   import org.apache.poi.util.BinaryTree;
58   import org.apache.poi.util.LittleEndian;
59   import org.apache.poi.util.LittleEndianConsts;
60   
61   import java.util.Iterator;
62   import java.util.List;
63   import java.util.ArrayList;
64   
65   /**
66    * Title:        Static String Table Record
67    * <P>
68    * Description:  This holds all the strings for LabelSSTRecords.
69    * <P>
70    * REFERENCE:    PG 389 Microsoft Excel 97 Developer's Kit (ISBN:
71    *               1-57231-498-2)
72    * <P>
73    * @author Andrew C. Oliver (acoliver at apache dot org)
74    * @author Marc Johnson (mjohnson at apache dot org)
75    * @author Glen Stampoultzis (glens at apache.org)
76    * @version 2.0-pre
77    * @see org.apache.poi.hssf.record.LabelSSTRecord
78    * @see org.apache.poi.hssf.record.ContinueRecord
79    */
80   
81   public class SSTRecord
82           extends Record
83   {
84   
85       /** how big can an SST record be? As big as any record can be: 8228 bytes */
86       static final int MAX_RECORD_SIZE = 8228;
87   
88       /** standard record overhead: two shorts (record id plus data space size)*/
89       static final int STD_RECORD_OVERHEAD =
90               2 * LittleEndianConsts.SHORT_SIZE;
91   
92       /** SST overhead: the standard record overhead, plus the number of strings and the number of unique strings -- two ints */
93       static final int SST_RECORD_OVERHEAD =
94               ( STD_RECORD_OVERHEAD + ( 2 * LittleEndianConsts.INT_SIZE ) );
95   
96       /** how much data can we stuff into an SST record? That would be _max minus the standard SST record overhead */
97       static final int MAX_DATA_SPACE = MAX_RECORD_SIZE - SST_RECORD_OVERHEAD;
98   
99       /** overhead for each string includes the string's character count (a short) and the flag describing its characteristics (a byte) */
100      static final int STRING_MINIMAL_OVERHEAD = LittleEndianConsts.SHORT_SIZE + LittleEndianConsts.BYTE_SIZE;
101  
102      public static final short sid = 0xfc;
103  
104      /** union of strings in the SST and EXTSST */
105      private int field_1_num_strings;
106  
107      /** according to docs ONLY SST */
108      private int field_2_num_unique_strings;
109      private BinaryTree field_3_strings;
110  
111      /** Record lengths for initial SST record and all continue records */
112      private List _record_lengths = null;
113      private SSTDeserializer deserializer;
114  
115      /**
116       * default constructor
117       */
118  
119      public SSTRecord()
120      {
121          field_1_num_strings = 0;
122          field_2_num_unique_strings = 0;
123          field_3_strings = new BinaryTree();
124          deserializer = new SSTDeserializer(field_3_strings);
125      }
126  
127      /**
128       * Constructs an SST record and sets its fields appropriately.
129       *
130       * @param id must be 0xfc or an exception will be throw upon
131       *           validation
132       * @param size the size of the data area of the record
133       * @param data of the record (should not contain sid/len)
134       */
135  
136      public SSTRecord( final short id, final short size, final byte[] data )
137      {
138          super( id, size, data );
139      }
140  
141      /**
142       * Constructs an SST record and sets its fields appropriately.
143       *
144       * @param id must be 0xfc or an exception will be throw upon
145       *           validation
146       * @param size the size of the data area of the record
147       * @param data of the record (should not contain sid/len)
148       * @param offset of the record
149       */
150  
151      public SSTRecord( final short id, final short size, final byte[] data,
152                        int offset )
153      {
154          super( id, size, data, offset );
155      }
156  
157      /**
158       * Add a string. Determines whether 8-bit encoding can be used, or
159       * whether 16-bit encoding must be used.
160       * <p>
161       * THIS IS THE PREFERRED METHOD OF ADDING A STRING. IF YOU USE THE
162       * OTHER ,code>addString</code> METHOD AND FORCE 8-BIT ENCODING ON
163       * A STRING THAT SHOULD USE 16-BIT ENCODING, YOU WILL CORRUPT THE
164       * STRING; IF YOU USE THAT METHOD AND FORCE 16-BIT ENCODING, YOU
165       * ARE WASTING SPACE WHEN THE WORKBOOK IS WRITTEN OUT.
166       *
167       * @param string string to be added
168       *
169       * @return the index of that string in the table
170       */
171  
172      public int addString( final String string )
173      {
174          int rval;
175  
176          if ( string == null )
177          {
178              rval = addString( "", false );
179          }
180          else
181          {
182  
183              // scan for characters greater than 255 ... if any are
184              // present, we have to use 16-bit encoding. Otherwise, we
185              // can use 8-bit encoding
186              boolean useUTF16 = false;
187              int strlen = string.length();
188  
189              for ( int j = 0; j < strlen; j++ )
190              {
191                  if ( string.charAt( j ) > 255 )
192                  {
193                      useUTF16 = true;
194                      break;
195                  }
196              }
197              rval = addString( string, useUTF16 );
198          }
199          return rval;
200      }
201  
202      /**
203       * Add a string and assert the encoding (8-bit or 16-bit) to be
204       * used.
205       * <P>
206       * USE THIS METHOD AT YOUR OWN RISK. IF YOU FORCE 8-BIT ENCODING,
207       * YOU MAY CORRUPT YOUR STRING. IF YOU FORCE 16-BIT ENCODING AND
208       * IT ISN'T NECESSARY, YOU WILL WASTE SPACE WHEN THIS RECORD IS
209       * WRITTEN OUT.
210       *
211       * @param string string to be added
212       * @param useUTF16 if true, forces 16-bit encoding. If false,
213       *                 forces 8-bit encoding
214       *
215       * @return the index of that string in the table
216       */
217  
218      public int addString( final String string, final boolean useUTF16 )
219      {
220          field_1_num_strings++;
221          String str = ( string == null ) ? ""
222                  : string;
223          int rval = -1;
224          UnicodeString ucs = new UnicodeString();
225  
226          ucs.setString( str );
227          ucs.setCharCount( (short) str.length() );
228          ucs.setOptionFlags( (byte) ( useUTF16 ? 1
229                  : 0 ) );
230          Integer integer = (Integer) field_3_strings.getKeyForValue( ucs );
231  
232          if ( integer != null )
233          {
234              rval = integer.intValue();
235          }
236          else
237          {
238  
239              // This is a new string -- we didn't see it among the
240              // strings we've already collected
241              rval = field_3_strings.size();
242              field_2_num_unique_strings++;
243              integer = new Integer( rval );
244              SSTDeserializer.addToStringTable( field_3_strings, integer, ucs );
245  //            field_3_strings.put( integer, ucs );
246          }
247          return rval;
248      }
249  
250      /**
251       * @return number of strings
252       */
253  
254      public int getNumStrings()
255      {
256          return field_1_num_strings;
257      }
258  
259      /**
260       * @return number of unique strings
261       */
262  
263      public int getNumUniqueStrings()
264      {
265          return field_2_num_unique_strings;
266      }
267  
268      /**
269       * USE THIS METHOD AT YOUR OWN PERIL: THE <code>addString</code>
270       * METHODS MANIPULATE THE NUMBER OF STRINGS AS A SIDE EFFECT; YOUR
271       * ATTEMPTS AT MANIPULATING THE STRING COUNT IS LIKELY TO BE VERY
272       * WRONG AND WILL RESULT IN BAD BEHAVIOR WHEN THIS RECORD IS
273       * WRITTEN OUT AND ANOTHER PROCESS ATTEMPTS TO READ THE RECORD
274       *
275       * @param count  number of strings
276       *
277       */
278  
279      public void setNumStrings( final int count )
280      {
281          field_1_num_strings = count;
282      }
283  
284      /**
285       * USE THIS METHOD AT YOUR OWN PERIL: THE <code>addString</code>
286       * METHODS MANIPULATE THE NUMBER OF UNIQUE STRINGS AS A SIDE
287       * EFFECT; YOUR ATTEMPTS AT MANIPULATING THE UNIQUE STRING COUNT
288       * IS LIKELY TO BE VERY WRONG AND WILL RESULT IN BAD BEHAVIOR WHEN
289       * THIS RECORD IS WRITTEN OUT AND ANOTHER PROCESS ATTEMPTS TO READ
290       * THE RECORD
291       *
292       * @param count  number of strings
293       */
294  
295      public void setNumUniqueStrings( final int count )
296      {
297          field_2_num_unique_strings = count;
298      }
299  
300      /**
301       * Get a particular string by its index
302       *
303       * @param id index into the array of strings
304       *
305       * @return the desired string
306       */
307  
308      public String getString( final int id )
309      {
310          return ( (UnicodeString) field_3_strings.get( new Integer( id ) ) ).getString();
311      }
312  
313      public boolean isString16bit( final int id )
314      {
315          UnicodeString unicodeString = ( (UnicodeString) field_3_strings.get( new Integer( id ) ) );
316          return ( ( unicodeString.getOptionFlags() & 0x01 ) == 1 );
317      }
318  
319      /**
320       * Return a debugging string representation
321       *
322       * @return string representation
323       */
324  
325      public String toString()
326      {
327          StringBuffer buffer = new StringBuffer();
328  
329          buffer.append( "[SST]\n" );
330          buffer.append( "    .numstrings     = " )
331                  .append( Integer.toHexString( getNumStrings() ) ).append( "\n" );
332          buffer.append( "    .uniquestrings  = " )
333                  .append( Integer.toHexString( getNumUniqueStrings() ) ).append( "\n" );
334          for ( int k = 0; k < field_3_strings.size(); k++ )
335          {
336              buffer.append( "    .string_" + k + "      = " )
337                      .getend( ( (UnicodeString) field_3_strings
338                      .get( new Integer( k ) ) ).toString() ).append( "\n" );
339          }
340          buffer.append( "[/SST]\n" );
341          return buffer.toString();
342      }
343  
344      /**
345       * @return sid
346       */
347      public short getSid()
348      {
349          return sid;
350      }
351  
352      /**
353       * @return hashcode
354       */
355      public int hashCode()
356      {
357          return field_2_num_unique_strings;
358      }
359  
360      public boolean equals( Object o )
361      {
362          if ( ( o == null ) || ( o.getClass() != this.getClass() ) )
363          {
364              return false;
365          }
366          SSTRecord other = (SSTRecord) o;
367  
368          return ( field_1_num_stringsfield_1_num_strings         .field_1_num_strings ) && ( field_2_num_unique_strings == other
369                  .field_2_num_unique_strings ) && field_3_strings
370                  .equals( other.field_3_strings ) );
371      }
372  
373      /**
374       * validate SID
375       *
376       * @param id the alleged SID
377       *
378       * @exception RecordFormatException if validation fails
379       */
380  
381      protected void validateSid( final short id )
382              throws RecordFormatException
383      {
384          if ( id != sid )
385          {
386              throw new RecordFormatException( "NOT An SST RECORD" );
387          }
388      }
389  
390      /**
391       * Fill the fields from the data
392       * <P>
393       * The data consists of sets of string data. This string data is
394       * arranged as follows:
395       * <P>
396       * <CODE>
397       * short  string_length;   // length of string data
398       * byte   string_flag;     // flag specifying special string
399       *                         // handling
400       * short  run_count;       // optional count of formatting runs
401       * int    extend_length;   // optional extension length
402       * char[] string_data;     // string data, can be byte[] or
403       *                         // short[] (length of array is
404       *                         // string_length)
405       * int[]  formatting_runs; // optional formatting runs (length of
406       *                         // array is run_count)
407       * byte[] extension;       // optional extension (length of array
408       *                         // is extend_length)
409       * </CODE>
410       * <P>
411       * The string_flag is bit mapped as follows:
412       * <P>
413       * <TABLE>
414       *   <TR>
415       *      <TH>Bit number</TH>
416       *      <TH>Meaning if 0</TH>
417       *      <TH>Meaning if 1</TH>
418       *   <TR>
419       *   <TR>
420       *      <TD>0</TD>
421       *      <TD>string_data is byte[]</TD>
422       *      <TD>string_data is short[]</TH>
423       *   <TR>
424       *   <TR>
425       *      <TD>1</TD>
426       *      <TD>Should always be 0</TD>
427       *      <TD>string_flag is defective</TH>
428       *   <TR>
429       *   <TR>
430       *      <TD>2</TD>
431       *      <TD>extension is not included</TD>
432       *      <TD>extension is included</TH>
433       *   <TR>
434       *   <TR>
435       *      <TD>3</TD>
436       *      <TD>formatting run data is not included</TD>
437       *      <TD>formatting run data is included</TH>
438       *   <TR>
439       *   <TR>
440       *      <TD>4</TD>
441       *      <TD>Should always be 0</TD>
442       *      <TD>string_flag is defective</TH>
443       *   <TR>
444       *   <TR>
445       *      <TD>5</TD>
446       *      <TD>Should always be 0</TD>
447       *      <TD>string_flag is defective</TH>
448       *   <TR>
449       *   <TR>
450       *      <TD>6</TD>
451       *      <TD>Should always be 0</TD>
452       *      <TD>string_flag is defective</TH>
453       *   <TR>
454       *   <TR>
455       *      <TD>7</TD>
456       *      <TD>Should always be 0</TD>
457       *      <TD>string_flag is defective</TH>
458       *   <TR>
459       * </TABLE>
460       * <P>
461       * We can handle eating the overhead associated with bits 2 or 3
462       * (or both) being set, but we have no idea what to do with the
463       * associated data. The UnicodeString class can handle the byte[]
464       * vs short[] nature of the actual string data
465       *
466       * @param data raw data
467       * @param size size of the raw data
468       */
469  
470      protected void fillFields( final byte[] data, final short size,
471                                 int offset )
472      {
473  
474          // this method is ALWAYS called after construction -- using
475          // the nontrivial constructor, of course -- so this is where
476          // we initialize our fields
477          field_1_num_strings = LittleEndian.getInt( data, 0 + offset );
478          field_2_num_unique_strings = LittleEndian.getInt( data, 4 + offset );
479          field_3_strings = new BinaryTree();
480          deserializer = new SSTDeserializer(field_3_strings);
481          deserializer.manufactureStrings( data, 8 + offset, (short)(size - 8) );
482      }
483  
484  
485      /**
486       * @return an iterator of the strings we hold. All instances are
487       *         UnicodeStrings
488       */
489  
490      Iterator getStrings()
491      {
492          return field_3_strings.values().iterator();
493      }
494  
495      /**
496       * @return count of the strings we hold.
497       */
498  
499      int countStrings()
500      {
501          return field_3_strings.size();
502      }
503  
504      /**
505       * called by the class that is responsible for writing this sucker.
506       * Subclasses should implement this so that their data is passed back in a
507       * byte array.
508       *
509       * @return byte array containing instance data
510       */
511  
512      public int serialize( int offset, byte[] data )
513      {
514          SSTSerializer serializer = new SSTSerializer(
515                  _record_lengths, field_3_strings, getNumStrings(), getNumUniqueStrings() );
516          return serializer.serialize( getRecordSize(), offset, data );
517      }
518  
519  
520      public int getRecordSize()
521      {
522          SSTRecordSizeCalculator calculator = new SSTRecordSizeCalculator(field_3_strings);
523          int recordSize = calculator.getRecordSize();
524          _record_lengths = calculator.getRecordLengths();
525          return recordSize;
526      }
527  
528      SSTDeserializer getDeserializer()
529      {
530          return deserializer;
531      }
532  
533      /**
534       * Strange to handle continue records this way.  Is it a smell?
535       */
536      public void processContinueRecord( byte[] record )
537      {
538          deserializer.processContinueRecord( record );
539      }
540  }
541  
542  
543  ???????????????????????????????????????????other??????????????????field_2_num_unique_strings?????????????????????????????????????????????field_2_num_unique_strings???????????????????????????????????????????????????????????????????????????other??????????????????????????????????????????????????field_3_strings??????????????????????????other????????????????????????????????field_3_strings???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????validateSid????????????????????RecordFormatException??????????????id????????????????????sid?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????fillFields??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????field_1_num_strings???????????????????????????????LittleEndian????????????????????????????????????????????getInt????????????????????????????????????????????????????data??????????????????????????????????????????????????????????????offset?????????field_2_num_unique_strings??????????????????????????????????????LittleEndian???????????????????????????????????????????????????getInt???????????????????????????????????????????????????????????data?????????????????????????????????????????????????????????????????????offset?????????field_3_strings???????????????????????????????BinaryTree?????????deserializer????????????????????????????????????????????field_3_strings?????????deserializer??????????????????????manufactureStrings??????????????????????????????????????????data????????????????????????????????????????????????????offset????????????????????????????????????????????????????????????????????size?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????getStrings????????????????field_3_strings????????????????????????????????values??????????????????????????????????????????????????????????????????????countStrings????????????????field_3_strings????????????????????????????????size????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????serialize?????????SSTSerializer?????????????????_record_lengths??????????????????????????????????field_3_strings???????????????????????????????????????????????????getNumStrings????????????????????????????????????????????????????????????????????getNumUniqueStrings????????????????serializer???????????????????????????serialize??????????????????????????????????????getRecordSize???????????????????????????????????????????????????????offset???????????????????????????????????????????????????????????????data????????????????getRecordSize?????????SSTRecordSizeCalculator??????????????????????????????????????????????????????????????????????????field_3_strings??????????????????????????calculator?????????????????????????????????????getRecordSize?????????_record_lengths???????????????????????????calculator??????????????????????????????????????getRecordLengths????????????????recordSize?????SSTDeserializer?????????????????????getDeserializer????????????????deserializer?????????????????????????????????????????????????????????????????????????????????????????????????????processContinueRecord?????????deserializer??????????????????????processContinueRecord?????????????????????????????????????????????record