Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

wvspeex.h

Go to the documentation of this file.
00001 /* -*- Mode: C++ -*-
00002  * Worldvisions Weaver Software:
00003  *   Copyright (C) 1997-2002 Net Integration Technologies, Inc.
00004  *
00005  * Provides a WvEncoder abstraction for the Speex audio packet format.
00006  * suitable for encoding voice at low bitrates.
00007  *
00008  * Only monaural audio is supported for now.
00009  */
00010 #ifndef __WVSPEEX_H
00011 #define __WVSPEEX_H
00012 
00013 #include "wvaudioencoder.h"
00014 
00015 struct SpeexMode;
00016 struct SpeexBits;
00017 
00018 namespace WvSpeex
00019 {
00020     /** The default encoder complexity level. */
00021     static const int DEFAULT_COMPLEXITY = -1;
00022     
00023     /**
00024      * Describes an encoding algorithm used by the Speex codec.
00025      * Might also take on values not listed in the enum at this
00026      * time due to future codec enhancements.
00027      */
00028     enum CodecMode
00029     {
00030         DEFAULT_MODE = -1,      /*!< Chosen based on the sampling rate */
00031         NARROWBAND_MODE = 0,    /*!< Narrowband ~8khz, 20ms frames */
00032         WIDEBAND_MODE = 1,      /*!< Wideband ~16khz, ?ms frames */
00033         ULTRAWIDEBAND_MODE = 2  /*!< Ultrawideband ~32khz, ?ms frames */
00034     };
00035 
00036     /**
00037      * Bitrate specification.
00038      * 
00039      * Identifies a particular bitrate control mechanism.
00040      * Use one of the subclasses to initialize a suitable BitrateSpec.
00041      * 
00042      */
00043     class BitrateSpec
00044     {
00045     public:
00046         // TODO: check whether VBR_BITRATE is a valid mode
00047         enum Mode { VBR_QUALITY, CBR_QUALITY, CBR_BITRATE };
00048         Mode mode;
00049         float quality_index;
00050         int nominal_bitrate;
00051         
00052     protected:
00053         BitrateSpec(Mode mode) : mode(mode) { }
00054 
00055     public:
00056         // allow creation of uninitialized objects for later assignment
00057         BitrateSpec() { }
00058     };
00059     
00060     /**
00061      * Specifies a variable bitrate based on a quality index ranging
00062      * from 0.0 (low quality) to 1.0 (high quality).
00063      */
00064     class VBRQuality : public BitrateSpec
00065     {
00066     public:
00067         /**
00068          * Creates a bitrate specification.
00069          * "quality" is the quality index
00070          */
00071         VBRQuality(float quality) : BitrateSpec(VBR_QUALITY)
00072         {
00073             quality_index = quality;
00074         }
00075     };
00076     
00077     /**
00078      * Specifies a constant bitrate specified in bits per second.
00079      *
00080      * The encoder may adjust the bitrate according to internal
00081      * constraints, but guarantees that it will not select a
00082      * bitrate larger than that specified here.
00083      */
00084     class CBRBitrate : public BitrateSpec
00085     {
00086     public:
00087         /**
00088          * Creates a bitrate specification.
00089          * "nominal" is the nominal bitrate
00090          */
00091         CBRBitrate(int nominal) : BitrateSpec(CBR_BITRATE)
00092         {
00093             nominal_bitrate = nominal;
00094         }
00095     };
00096     
00097     /**
00098      * Specifies a constant bitrate based on a quality index ranging
00099      * from 0.0 (low quality) to 1.0 (high quality).
00100      */
00101     class CBRQuality : public BitrateSpec
00102     {
00103     public:
00104         /**
00105          * Creates a bitrate specification.
00106          * "bitrate" is the fixed bitrate
00107          */
00108         CBRQuality(float quality) : BitrateSpec(CBR_QUALITY)
00109         {
00110             quality_index = quality;
00111         }
00112     };
00113 }; // namespace
00114 
00115 
00116 
00117 /**
00118  * Encodes PCM audio using the Speex audio packet format.
00119  * 
00120  * Input buffer must contain a sequence of signed 'float' type
00121  * values in machine order representing unnormalized PCM
00122  * audio data.
00123  * 
00124  * Outbut buffer will contain a sequence of Speex packets.  Each
00125  * invocation of encode() with flush == false will generate
00126  * precisely one Speex packet suitable for use with unreliable
00127  * datagram transmission protocols that guarantee serial packet
00128  * order on reception.  Each packet contains one frame of exactly
00129  * 20ms of encoded audio in narrowband mode (sampling rate
00130  * <= 12.5Khz).
00131  * 
00132  * Warning: Never invoke encode() with flush == true unless
00133  * the input buffer contains exactly zero or one frame of audio.
00134  * Speex packets do not contain any delimiters therefore it is not
00135  * possible to locate the boundary between adjacent packets unless
00136  * they are encapsulated as individual datagrams in some fashion.
00137  * With flush == true, multiple adjacent generated packets will run
00138  * together to form one large undecodable lump.
00139  * 
00140  * For archival purposes or for streaming, consider using
00141  * WvOggSpeexEncoder.
00142  * 
00143  * For encoding music or other non-speech audio, consider using
00144  * WvOggVorbisEncoder.
00145  * 
00146  */
00147 class WvSpeexEncoder : public WvAudioEncoder
00148 {
00149     void *spxstate;
00150     SpeexBits *spxbits;
00151     SpeexMode *spxmode;
00152     unsigned int _channels;
00153     size_t _samplesperframe;
00154     
00155 public:
00156 
00157     /**
00158      * Creates a Speex Encoder.
00159      *
00160      * "bitrate" is the bitrate specification
00161      * "samplingrate" is the number of samples per second,
00162      *        preferably one of 8000, 16000, or 32000
00163      * "channels" is number of channels (must be 1 for now),
00164      *        defaults to 1
00165      * "mode" is the Speex codec mode to use or
00166      *        WvSpeex::DEFAULT_MODE to select one automatically
00167      *        based on the sampling rate, this is the default
00168      * "complexity" is a measure of the amount of CPU
00169      *        resources that should be allocated to the encoder,
00170      *        ranges from 0 to 10 or WvSpeex::DEFAULT_COMPLEXITY
00171      *        the encoder default, this is the default
00172      */
00173     WvSpeexEncoder(const WvSpeex::BitrateSpec &bitratespec,
00174         int samplingrate, unsigned int channels = 1,
00175         WvSpeex::CodecMode mode = WvSpeex::DEFAULT_MODE,
00176         int complexity = WvSpeex::DEFAULT_COMPLEXITY);
00177         
00178     virtual ~WvSpeexEncoder();
00179     
00180     /**
00181      * Returns the sampling rate.
00182      * Returns: the sampling rate
00183      */
00184     int samplingrate() const;
00185     
00186     /**
00187      * Returns the number of channels.
00188      * Returns: the number of channels
00189      */
00190     virtual unsigned int channels() const
00191         { return _channels; }
00192 
00193     /**
00194      * Returns the number of samples per frame.
00195      * Returns: the frame size
00196      */
00197     virtual size_t samplesperframe() const
00198         { return _samplesperframe; }
00199 
00200     /**
00201      * Returns the current encoding mode.
00202      * Returns: the encoding mode
00203      */
00204     WvSpeex::CodecMode mode() const;
00205 
00206     /**
00207      * Returns true if variable bitrate support has been enabled.
00208      * Returns: true if it is enabled
00209      */
00210     bool vbr() const;
00211 
00212     /**
00213      * Returns the nominal bitrate.
00214      * Returns: the bitrate, or -1 if not specified or not meaningful
00215      */
00216     int nominalbitrate() const;
00217 
00218 protected:
00219     virtual bool _typedencode(IBuffer &inbuf, OBuffer &outbuf,
00220         bool flush);
00221     virtual bool _typedfinish(OBuffer &outbuf);
00222 
00223 private:
00224     bool flushspxbits(OBuffer &outbuf);
00225 };
00226 
00227 
00228 
00229 /**
00230  * Decodes PCM audio using the Speex audio packet format.
00231  * 
00232  * Inbut buffer must contain a sequence of Speex packets.
00233  * 
00234  * Output buffer will contain a sequence of signed 'float' type
00235  * values in machine order representing unnormalized PCM
00236  * audio data.
00237  * 
00238  * Missing audio due to lost or damaged packets may be filled in
00239  * by making predictions (guesses) based on residual energy
00240  * information from previous ones.  The number of lost or damaged
00241  * packets must be known in order to calculate how much new audio
00242  * must be synthesized.  This technique works well to conceal
00243  * occasional dropouts but not long strings of lost packets.
00244  * Still, Speech is still surprizingly recognizable with average
00245  * packet losses of up to 25% to 50%!
00246  * 
00247  * Warning: Never invoke encode() unless the input buffer
00248  * contains exactly zero or one Speex packets. Speex packets
00249  * do not contain any delimiters therefore it is not possible to
00250  * locate the boundary between adjacent packets unless they are
00251  * encapsulated as individual datagrams in some fashion.
00252  * Multiple adjacent packets cannot be decoded at once.
00253  * 
00254  * For archival purposes or for streaming, consider using
00255  * WvOggSpeexDecoder.
00256  * 
00257  * For encoding music or other non-speech audio, consider using
00258  * WvOggVorbisDecoder.
00259  */
00260 class WvSpeexDecoder : public WvAudioDecoder
00261 {
00262     int _samplingrate;
00263     unsigned int _channels;
00264     
00265     void *spxstate;
00266     SpeexBits *spxbits;
00267     SpeexMode *spxmode;
00268     size_t _samplesperframe;
00269 
00270 public:
00271     /**
00272      * Creates a Speex Decoder.
00273      * 
00274      * For now, if the input bitstream is stereo, outputs the left
00275      * channel only.  This behaviour may change later on.
00276      * 
00277      * "samplingrate" is the number of samples per second,
00278      *        preferably one of 8000, 16000, or 32000
00279      * "channels" is number of channels (must be 1 for now),
00280      *        defaults to 1
00281      * "mode" is the Speex codec mode to use or
00282      *        WvSpeex::DEFAULT_MODE to select one automatically
00283      *        based on the sampling rate, this is the default
00284      */
00285     WvSpeexDecoder(int samplingrate, unsigned int channels = 1,
00286         WvSpeex::CodecMode mode = WvSpeex::DEFAULT_MODE);
00287 
00288     virtual ~WvSpeexDecoder();
00289     
00290     /**
00291      * Synthesizes one audio frame to compensate for a missing packet.
00292      * "outbuf" is the output buffer
00293      * Returns: true on success
00294      * @see encode
00295      */
00296     virtual bool missing(OBuffer &outbuf);
00297 
00298     /**
00299      * Returns the number of channels in the stream.
00300      * Returns: the number of channels, non-negative
00301      */
00302     virtual unsigned int channels() const
00303         { return _channels; }
00304 
00305     /**
00306      * Returns the sampling rate of the stream.
00307      * Returns: the sampling rate
00308      */
00309     int samplingrate() const
00310         { return _samplingrate; }
00311 
00312     /**
00313      * Returns the number of samples per frame.
00314      * Returns: the frame size
00315      */
00316     virtual size_t samplesperframe() const
00317         { return _samplesperframe; }
00318 
00319     /**
00320      * Returns the current encoding mode.
00321      * Returns: the encoding mode
00322      */
00323     WvSpeex::CodecMode mode() const;
00324     
00325     /**
00326      * Determines if the perceptual enhancement post-filter is enabled.
00327      * Returns: true if it is enabled
00328      */
00329     bool postfilter() const;
00330 
00331     /**
00332      * Enables or disables the perceptual enhancement post-filter.
00333      * "enable" is true or false
00334      */
00335     void setpostfilter(bool enable);
00336 
00337 protected:
00338     virtual bool _typedencode(IBuffer &inbuf, OBuffer &outbuf,
00339         bool flush);
00340     virtual bool _typedfinish(OBuffer &outbuf);
00341 };
00342 
00343 #endif // __WVSPEEX_H

Generated on Sat Mar 13 14:55:54 2004 for WvStreams by doxygen 1.3.6-20040222