Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

wvoggspeex.cc

Go to the documentation of this file.
00001 /*
00002  * Worldvisions Weaver Software:
00003  *   Copyright (C) 1997-2002 Net Integration Technologies, Inc.
00004  *
00005  * Provides a WvEncoder abstraction for Ogg Speex audio streams
00006  * suitable for encoding voice at low bitrates.
00007  *
00008  * Only monaural audio is supported for now.
00009  */
00010 #include "wvoggspeex.h"
00011 #include <ogg/ogg.h>
00012 #include <speex.h>
00013 #include <speex_header.h>
00014 #include <unistd.h>
00015 
00016 #define OGG_SPEEX_DECODER_BUF_SIZE 16384 // at most 16k at once
00017 
00018 /** Extracts a little endian integer from a buffer. */
00019 static unsigned long int getint_le(WvBuf &inbuf)
00020 {
00021     // FIXME: a little sloppy
00022     return inbuf.getch() | (inbuf.getch() << 8) |
00023         (inbuf.getch() << 16) | (inbuf.getch() << 24);
00024 }
00025 
00026 
00027 /** Appends a little endian integer to a buffer. */
00028 static void putint_le(WvBuf &outbuf, unsigned long int value)
00029 {
00030     // FIXME: a little sloppy
00031     outbuf.putch(value & 255);
00032     outbuf.putch((value >> 8) & 255);
00033     outbuf.putch((value >> 16) & 255);
00034     outbuf.putch(value >> 24);
00035 }
00036 
00037 
00038 /***** WvOggSpeexEncoder *****/
00039 
00040 WvOggSpeexEncoder::WvOggSpeexEncoder(
00041     const WvSpeex::BitrateSpec &bitratespec, int samplingrate,
00042     int channels, WvSpeex::CodecMode mode, int complexity,
00043     long serialno) :
00044     speexenc(NULL), packetno(0),
00045     _vendor("Encoded with Speex"),
00046     oggstream(NULL), wrote_headers(false),
00047     framebuf(MAX_BYTES_PER_FRAME)
00048 {
00049     // pick a serial number
00050     if (serialno == RANDOM_SERIALNO)
00051     {
00052         serialno = rand();
00053     }
00054 
00055     // init ogg bitstream layer
00056     int retval;
00057     oggstream = new ogg_stream_state;
00058     if ((retval = ogg_stream_init(oggstream, serialno)) != 0)
00059     {
00060         seterror("error %s during ogg_stream_init", retval);
00061         return;
00062     }
00063 
00064     // init speex encoder
00065     speexenc = new WvSpeexEncoder(bitratespec, samplingrate, channels,
00066         mode, complexity);
00067 }
00068 
00069 
00070 WvOggSpeexEncoder::~WvOggSpeexEncoder()
00071 {
00072     // destroy speex encoder
00073     delete speexenc;
00074 
00075     // destroy ogg bitstream layer
00076     if (oggstream)
00077     {
00078         ogg_stream_clear(oggstream);
00079         delete oggstream;
00080     }
00081 }
00082 
00083 
00084 bool WvOggSpeexEncoder::_isok() const
00085 {
00086     return speexenc ? speexenc->isok() : true;
00087 }
00088 
00089 
00090 WvString WvOggSpeexEncoder::_geterror() const
00091 {
00092     return speexenc ? speexenc->geterror() : WvString(WvString::null);
00093 }
00094 
00095 
00096 void WvOggSpeexEncoder::add_comment(WvStringParm comment)
00097 {
00098     _comments.append(new WvString(comment), true);
00099 }
00100 
00101 
00102 void WvOggSpeexEncoder::add_tag(WvStringParm tag, WvStringParm value)
00103 {
00104     _comments.append(new WvString("%s=%s", tag, value), true);
00105 }
00106 
00107 
00108 bool WvOggSpeexEncoder::_typedencode(IBuffer &inbuf, OBuffer &outbuf,
00109     bool flush)
00110 {
00111     // write header pages if needed
00112     if (! wrote_headers)
00113     {
00114         if (! write_headers(outbuf))
00115             return false;
00116         wrote_headers = true;
00117     }
00118 
00119     // write compressed audio pages
00120     for (;;)
00121     {
00122         // read in more data
00123         size_t samples = inbuf.used();
00124         if (samples == 0)
00125         {
00126             // no more data
00127             if (flush)
00128                 if (! write_stream(outbuf, true))
00129                     return false;
00130             return true;
00131         }
00132 
00133         framebuf.zap();
00134         if (! speexenc->encode(inbuf, framebuf))
00135             return false;
00136         size_t bytes = framebuf.used();
00137         if (bytes == 0)
00138             return false; // not enough data
00139 
00140         // write out a packet
00141         ogg_packet oggpacket;
00142         oggpacket.packet = framebuf.ptr();
00143         oggpacket.bytes = bytes;
00144         oggpacket.b_o_s = 0;
00145         oggpacket.e_o_s = 0;
00146         oggpacket.granulepos = 0;
00147         oggpacket.packetno = packetno++;
00148         ogg_stream_packetin(oggstream, &oggpacket); // always succeeds
00149         if (! write_stream(outbuf, false))
00150             return false;
00151     }
00152 }
00153 
00154 
00155 bool WvOggSpeexEncoder::_typedfinish(OBuffer &outbuf)
00156 {
00157     // write header pages if needed
00158     if (! wrote_headers)
00159     {
00160         if (! write_headers(outbuf))
00161             return false;
00162         wrote_headers = true;
00163     }
00164     return write_eof(outbuf);
00165 }
00166 
00167 
00168 bool WvOggSpeexEncoder::write_headers(OBuffer &outbuf)
00169 {
00170     // generate stream header
00171     ogg_packet header;
00172     SpeexHeader spxheader;
00173     SpeexMode *spxmode = speex_mode_list[mode()];
00174     speex_init_header(&spxheader, samplingrate(), channels(), spxmode);
00175     spxheader.vbr = vbr();
00176     spxheader.bitrate = nominalbitrate();
00177     spxheader.frames_per_packet = 1;
00178     
00179     int size;
00180     header.packet = (unsigned char*)speex_header_to_packet(
00181         &spxheader, &size);
00182     header.bytes = size;
00183     header.b_o_s = 1;
00184     header.e_o_s = 0;
00185     header.granulepos = 0;
00186     header.packetno = packetno++;
00187     ogg_stream_packetin(oggstream, &header);
00188 
00189     // generate comment header
00190     WvDynBuf cbuf;
00191     putint_le(cbuf, _vendor.len());
00192     cbuf.putstr(_vendor);
00193     putint_le(cbuf, _comments.count());
00194     WvStringList::Iter it(_comments);
00195     for (it.rewind(); it.next(); )
00196     {
00197         putint_le(cbuf, it->len());
00198         cbuf.putstr(*it);
00199     }
00200     header.bytes = cbuf.used();
00201     header.packet = const_cast<unsigned char *>(cbuf.get(header.bytes));
00202     header.b_o_s = 0;
00203     header.e_o_s = 0;
00204     header.granulepos = 0;
00205     header.packetno = packetno++;
00206     ogg_stream_packetin(oggstream, &header);
00207         
00208     // flush to ensure next data packet is in its own page
00209     return write_stream(outbuf, true /*flush*/);
00210 }
00211 
00212 
00213 bool WvOggSpeexEncoder::write_eof(OBuffer &outbuf)
00214 {
00215     ogg_packet oggpacket;
00216     oggpacket.packet = (unsigned char*)"";
00217     oggpacket.bytes = 0;
00218     oggpacket.b_o_s = 0;
00219     oggpacket.e_o_s = 1;
00220     oggpacket.granulepos = 0;
00221     oggpacket.packetno = packetno++;
00222     ogg_stream_packetin(oggstream, &oggpacket);
00223     return write_stream(outbuf, true /*flush*/);
00224 }
00225 
00226 
00227 bool WvOggSpeexEncoder::write_stream(OBuffer &outbuf, bool flush)
00228 {
00229     ogg_page oggpage;
00230     for (;;)
00231     {
00232         if (flush)
00233         {
00234             int retval = ogg_stream_flush(oggstream, & oggpage);
00235             if (retval == 0)
00236                 break; // no remaining data
00237             else if (retval < 0)
00238             {
00239                 seterror("error %s during ogg_stream_flush", retval);
00240                 return false;
00241             }
00242         }
00243         else
00244         {
00245             int retval = ogg_stream_pageout(oggstream, & oggpage);
00246             if (retval == 0)
00247                 break; // not enough data
00248             else if (retval < 0)
00249             {
00250                 seterror("error %s during ogg_stream_pageout", retval);
00251                 return false;
00252             }
00253         }
00254         outbuf.put(oggpage.header, oggpage.header_len);
00255         outbuf.put(oggpage.body, oggpage.body_len);
00256     }
00257     return true;
00258 }
00259 
00260 
00261 
00262 /***** WvOggSpeexDecoder *****/
00263 
00264 WvOggSpeexDecoder::WvOggSpeexDecoder() :
00265     speexdec(NULL), forcepostfilter(false),
00266     _vbr(false), _nominalbitrate(-1),
00267     oggsync(NULL), oggstream(NULL),
00268     need_serialno(true), need_headers(2)
00269 {
00270     int retval;
00271     
00272     // init ogg sync layer
00273     oggsync = new ogg_sync_state;
00274     if ((retval = ogg_sync_init(oggsync)) != 0)
00275     {
00276         seterror("error %s during ogg_sync_init", retval);
00277         return;
00278     }
00279     oggpage = new ogg_page;
00280 }
00281 
00282 
00283 WvOggSpeexDecoder::~WvOggSpeexDecoder()
00284 {
00285     // destroy speex decoder
00286     delete speexdec;
00287 
00288     // destroy ogg bitstream layer
00289     if (oggstream)
00290     {
00291         ogg_stream_clear(oggstream);
00292         delete oggstream;
00293     }
00294     
00295     // destroy ogg sync layer
00296     delete oggpage;
00297     ogg_sync_clear(oggsync);
00298     delete oggsync;
00299 }
00300 
00301 
00302 bool WvOggSpeexDecoder::_isok() const
00303 {
00304     return speexdec ? speexdec->isok() : true;
00305 }
00306 
00307 
00308 WvString WvOggSpeexDecoder::_geterror() const
00309 {
00310     return speexdec ? speexdec->geterror() : WvString(WvString::null);
00311 }
00312 
00313 
00314 bool WvOggSpeexDecoder::isheaderok() const
00315 {
00316     return need_headers == 0;
00317 }
00318 
00319 
00320 bool WvOggSpeexDecoder::_typedencode(IBuffer &inbuf, OBuffer &outbuf,
00321     bool flush)
00322 {
00323     bool checkheaderok = ! isheaderok() && ! flush;
00324     for (;;)
00325     {
00326         // extract packets from the bitstream
00327         if (oggstream)
00328         {
00329             ogg_packet oggpacket;
00330             while (ogg_stream_packetout(oggstream, & oggpacket) > 0)
00331             {
00332                 if (! process_packet(& oggpacket, outbuf))
00333                     return false;
00334             }
00335 
00336             // detect end of stream
00337             if (oggstream->e_o_s)
00338             {
00339                 setfinished();
00340                 return true;
00341             }
00342         }
00343 
00344         // get more pages
00345         while (ogg_sync_pageseek(oggsync, oggpage) <= 0)
00346         {
00347             // read in more data
00348             size_t oggbufsize = inbuf.used();
00349             if (oggbufsize == 0)
00350             {
00351                 // no more data
00352                 if (flush && oggsync->fill != 0)
00353                     return false;
00354                 return true;
00355             }
00356             if (oggbufsize > OGG_SPEEX_DECODER_BUF_SIZE)
00357                 oggbufsize = OGG_SPEEX_DECODER_BUF_SIZE;
00358                 
00359             char *oggbuf = ogg_sync_buffer(oggsync, oggbufsize);
00360             if (oggbuf == NULL)
00361             {
00362                 seterror("error allocating ogg sync buffer");
00363                 return false;
00364             }
00365             inbuf.move(oggbuf, oggbufsize);
00366             ogg_sync_wrote(oggsync, oggbufsize);
00367         }
00368         // we got a page!
00369         if (! process_page(oggpage, outbuf))
00370             return false;
00371         
00372         // return immediately after we see the header if not flushing
00373         // guarantee no data has been decoded yet since Ogg Speex
00374         // spec says that the audio data must begin on a fresh page
00375         // following the headers
00376         if (checkheaderok && isheaderok())
00377             return true;
00378     }
00379 }
00380 
00381 
00382 bool WvOggSpeexDecoder::_typedfinish(OBuffer &outbuf)
00383 {
00384     if (! isheaderok())
00385     {
00386         seterror("failed to detect an Ogg Speex stream");
00387         return false;
00388     }
00389     return true;
00390 }
00391 
00392 
00393 bool WvOggSpeexDecoder::process_page(ogg_page *oggpage,
00394     OBuffer &outbuf)
00395 {       
00396     if (need_serialno)
00397     {
00398         // attach to the first bitstream we find
00399         long serialno = ogg_page_serialno(oggpage);
00400         if (! prepare_stream(serialno))
00401             return false;
00402         need_serialno = false;
00403     }
00404     // submit the page to the bitstream
00405     if (ogg_stream_pagein(oggstream, oggpage) != 0)
00406     {
00407         // this page was bad, or did not match the stream's
00408         // serial number exactly, skip it
00409         return true;
00410     }
00411     return true;
00412 }
00413 
00414 
00415 bool WvOggSpeexDecoder::process_packet(ogg_packet *oggpacket,
00416     OBuffer &outbuf)
00417 {
00418     if (need_headers > 0)
00419     {
00420         // output headers
00421         bool success = need_headers == 2 ?
00422             process_speex_header(oggpacket) :
00423             process_comment_header(oggpacket);
00424         if (! success)
00425             return false;
00426         need_headers -= 1;
00427         return true;
00428     }
00429 
00430     // decode audio
00431     WvConstInPlaceBuf buf(oggpacket->packet, oggpacket->bytes);
00432     return speexdec->flush(buf, outbuf);
00433 }
00434 
00435 
00436 bool WvOggSpeexDecoder::process_speex_header(ogg_packet *header)
00437 {
00438     if (! header->b_o_s)
00439     {
00440         seterror("missing speex header at beginning of stream");
00441         return false;
00442     }
00443     SpeexHeader *spxheader = speex_packet_to_header(
00444         (char*)header->packet, header->bytes);
00445     if (! spxheader)
00446     {
00447         seterror("invalid speex header");
00448         return false;
00449     }
00450     if (spxheader->mode < 0 || spxheader->mode >= SPEEX_NB_MODES)
00451     {
00452         seterror("header contains an unrecognized or invalid codec mode");
00453         return false;
00454     }
00455     _vbr = spxheader->vbr;
00456     _nominalbitrate = spxheader->bitrate;
00457     
00458     // create the decoder
00459     speexdec = new WvSpeexDecoder(spxheader->rate, spxheader->nb_channels,
00460         WvSpeex::CodecMode(spxheader->mode));
00461     return true;
00462 }
00463 
00464 
00465 bool WvOggSpeexDecoder::process_comment_header(ogg_packet *header)
00466 {
00467     if (! header->b_o_s && header->bytes >= 8)
00468     {
00469         WvConstInPlaceBuf cbuf(header->packet, header->bytes);
00470         unsigned long int length = getint_le(cbuf);
00471         if (length <= cbuf.used() - 4)
00472         {
00473             _vendor = WvString(reinterpret_cast<const char*>(
00474                 cbuf.get(length))).unique();
00475             unsigned long int count = getint_le(cbuf);
00476             while (count * 4 < cbuf.used())
00477             {
00478                 length = getint_le(cbuf);
00479                 if (length > cbuf.used())
00480                     break;
00481                 WvString comment(reinterpret_cast<const char*>(
00482                     cbuf.get(length)));
00483                 _comments.append(new WvString(comment.unique()), true);
00484                 count -= 1;
00485             }
00486             if (count == 0)
00487                 return true;
00488         }
00489     }
00490     seterror("invalid comment header");
00491     return false;
00492 }
00493 
00494 
00495 bool WvOggSpeexDecoder::prepare_stream(long serialno)
00496 {
00497     // init ogg bitstream layer
00498     oggstream = new ogg_stream_state;
00499     int retval;
00500     if ((retval = ogg_stream_init(oggstream, serialno)) != 0)
00501     {
00502         seterror("error %s during ogg_stream_init", retval);
00503         return false;
00504     }
00505     return true;
00506 }
00507 
00508 
00509 int WvOggSpeexDecoder::channels() const
00510 {
00511     return speexdec ? speexdec->channels() : 0;
00512 }
00513 
00514 
00515 int WvOggSpeexDecoder::samplingrate() const
00516 {
00517     return speexdec ? speexdec->samplingrate() : 0;
00518 }
00519  
00520 
00521 int WvOggSpeexDecoder::samplesperframe() const
00522 {
00523     return speexdec ? speexdec->samplesperframe() : 0;
00524 }
00525 
00526 
00527 WvSpeex::CodecMode WvOggSpeexDecoder::mode() const
00528 {
00529     return speexdec ? speexdec->mode() : WvSpeex::NARROWBAND_MODE;
00530 }
00531 
00532 
00533 bool WvOggSpeexDecoder::vbr() const
00534 {
00535     return _vbr;
00536 }
00537 
00538 
00539 int WvOggSpeexDecoder::nominalbitrate() const
00540 {
00541     return _nominalbitrate;
00542 }
00543 
00544 
00545 bool WvOggSpeexDecoder::postfilter() const
00546 {
00547     return speexdec ? speexdec->postfilter() : forcepostfilter;
00548 }
00549 
00550 
00551 void WvOggSpeexDecoder::setpostfilter(bool enable)
00552 {
00553     forcepostfilter = enable;
00554     if (speexdec)
00555         speexdec->setpostfilter(enable);
00556 }

Generated on Sat Feb 21 21:05:30 2004 for WvStreams by doxygen 1.3.5