codecvt_specializations.h

00001 // Locale support (codecvt) -*- C++ -*- 00002 00003 // Copyright (C) 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the 00007 // terms of the GNU General Public License as published by the 00008 // Free Software Foundation; either version 2, or (at your option) 00009 // any later version. 00010 00011 // This library is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 00016 // You should have received a copy of the GNU General Public License along 00017 // with this library; see the file COPYING. If not, write to the Free 00018 // Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, 00019 // USA. 00020 00021 // As a special exception, you may use this file as part of a free software 00022 // library without restriction. Specifically, if other files instantiate 00023 // templates or use macros or inline functions from this file, or you compile 00024 // this file and link it with other files to produce an executable, this 00025 // file does not by itself cause the resulting executable to be covered by 00026 // the GNU General Public License. This exception does not however 00027 // invalidate any other reasons why the executable file might be covered by 00028 // the GNU General Public License. 00029 00030 // 00031 // ISO C++ 14882: 22.2.1.5 Template class codecvt 00032 // 00033 00034 // Warning: this file is not meant for user inclusion. Use <locale>. 00035 00036 // Written by Benjamin Kosnik <bkoz@cygnus.com> 00037 00038 // XXX 00039 // Define this here so codecvt.cc can have _S_max_size definition. 00040 #define _GLIBCXX_USE___ENC_TRAITS 1 00041 00042 // Extension to use icov for dealing with character encodings, 00043 // including conversions and comparisons between various character 00044 // sets. This object encapsulates data that may need to be shared between 00045 // char_traits, codecvt and ctype. 00046 class __enc_traits 00047 { 00048 public: 00049 // Types: 00050 // NB: A conversion descriptor subsumes and enhances the 00051 // functionality of a simple state type such as mbstate_t. 00052 typedef iconv_t __desc_type; 00053 00054 protected: 00055 // Data Members: 00056 // Max size of charset encoding name 00057 static const int _S_max_size = 32; 00058 // Name of internal character set encoding. 00059 char _M_int_enc[_S_max_size]; 00060 // Name of external character set encoding. 00061 char _M_ext_enc[_S_max_size]; 00062 00063 // Conversion descriptor between external encoding to internal encoding. 00064 __desc_type _M_in_desc; 00065 // Conversion descriptor between internal encoding to external encoding. 00066 __desc_type _M_out_desc; 00067 00068 // Details the byte-order marker for the external encoding, if necessary. 00069 int _M_ext_bom; 00070 00071 // Details the byte-order marker for the internal encoding, if necessary. 00072 int _M_int_bom; 00073 00074 public: 00075 explicit __enc_traits() 00076 : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0) 00077 { 00078 memset(_M_int_enc, 0, _S_max_size); 00079 memset(_M_ext_enc, 0, _S_max_size); 00080 } 00081 00082 explicit __enc_traits(const char* __int, const char* __ext, 00083 int __ibom = 0, int __ebom = 0) 00084 : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(__ebom), _M_int_bom(__ibom) 00085 { 00086 strncpy(_M_int_enc, __int, _S_max_size); 00087 strncpy(_M_ext_enc, __ext, _S_max_size); 00088 _M_init(); 00089 } 00090 00091 // 21.1.2 traits typedefs 00092 // p4 00093 // typedef STATE_T state_type 00094 // requires: state_type shall meet the requirements of 00095 // CopyConstructible types (20.1.3) 00096 // NB: This does not preseve the actual state of the conversion 00097 // descriptor member, but it does duplicate the encoding 00098 // information. 00099 __enc_traits(const __enc_traits& __obj): _M_in_desc(0), _M_out_desc(0) 00100 { 00101 strncpy(_M_int_enc, __obj._M_int_enc, _S_max_size); 00102 strncpy(_M_ext_enc, __obj._M_ext_enc, _S_max_size); 00103 _M_ext_bom = __obj._M_ext_bom; 00104 _M_int_bom = __obj._M_int_bom; 00105 _M_destroy(); 00106 _M_init(); 00107 } 00108 00109 // Need assignment operator as well. 00110 __enc_traits& 00111 operator=(const __enc_traits& __obj) 00112 { 00113 strncpy(_M_int_enc, __obj._M_int_enc, _S_max_size); 00114 strncpy(_M_ext_enc, __obj._M_ext_enc, _S_max_size); 00115 _M_ext_bom = __obj._M_ext_bom; 00116 _M_int_bom = __obj._M_int_bom; 00117 _M_destroy(); 00118 _M_init(); 00119 return *this; 00120 } 00121 00122 ~__enc_traits() 00123 { _M_destroy(); } 00124 00125 void 00126 _M_init() 00127 { 00128 const __desc_type __err = reinterpret_cast<iconv_t>(-1); 00129 if (!_M_in_desc) 00130 { 00131 _M_in_desc = iconv_open(_M_int_enc, _M_ext_enc); 00132 if (_M_in_desc == __err) 00133 __throw_runtime_error(__N("__enc_traits::_M_init " 00134 "creating iconv input descriptor failed")); 00135 } 00136 if (!_M_out_desc) 00137 { 00138 _M_out_desc = iconv_open(_M_ext_enc, _M_int_enc); 00139 if (_M_out_desc == __err) 00140 __throw_runtime_error(__N("__enc_traits::_M_init " 00141 "creating iconv output descriptor failed")); 00142 } 00143 } 00144 00145 void 00146 _M_destroy() 00147 { 00148 const __desc_type __err = reinterpret_cast<iconv_t>(-1); 00149 if (_M_in_desc && _M_in_desc != __err) 00150 { 00151 iconv_close(_M_in_desc); 00152 _M_in_desc = 0; 00153 } 00154 if (_M_out_desc && _M_out_desc != __err) 00155 { 00156 iconv_close(_M_out_desc); 00157 _M_out_desc = 0; 00158 } 00159 } 00160 00161 bool 00162 _M_good() 00163 { 00164 const __desc_type __err = reinterpret_cast<iconv_t>(-1); 00165 bool __test = _M_in_desc && _M_in_desc != __err; 00166 __test &= _M_out_desc && _M_out_desc != __err; 00167 return __test; 00168 } 00169 00170 const __desc_type* 00171 _M_get_in_descriptor() 00172 { return &_M_in_desc; } 00173 00174 const __desc_type* 00175 _M_get_out_descriptor() 00176 { return &_M_out_desc; } 00177 00178 int 00179 _M_get_external_bom() 00180 { return _M_ext_bom; } 00181 00182 int 00183 _M_get_internal_bom() 00184 { return _M_int_bom; } 00185 00186 const char* 00187 _M_get_internal_enc() 00188 { return _M_int_enc; } 00189 00190 const char* 00191 _M_get_external_enc() 00192 { return _M_ext_enc; } 00193 }; 00194 00195 // Partial specialization 00196 // This specialization takes advantage of iconv to provide code 00197 // conversions between a large number of character encodings. 00198 template<typename _InternT, typename _ExternT> 00199 class codecvt<_InternT, _ExternT, __enc_traits> 00200 : public __codecvt_abstract_base<_InternT, _ExternT, __enc_traits> 00201 { 00202 public: 00203 // Types: 00204 typedef codecvt_base::result result; 00205 typedef _InternT intern_type; 00206 typedef _ExternT extern_type; 00207 typedef __enc_traits state_type; 00208 typedef __enc_traits::__desc_type __desc_type; 00209 typedef __enc_traits __enc_type; 00210 00211 // Data Members: 00212 static locale::id id; 00213 00214 explicit 00215 codecvt(size_t __refs = 0) 00216 : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs) 00217 { } 00218 00219 explicit 00220 codecvt(__enc_type* __enc, size_t __refs = 0) 00221 : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs) 00222 { } 00223 00224 protected: 00225 virtual 00226 ~codecvt() { } 00227 00228 virtual result 00229 do_out(state_type& __state, const intern_type* __from, 00230 const intern_type* __from_end, const intern_type*& __from_next, 00231 extern_type* __to, extern_type* __to_end, 00232 extern_type*& __to_next) const; 00233 00234 virtual result 00235 do_unshift(state_type& __state, extern_type* __to, 00236 extern_type* __to_end, extern_type*& __to_next) const; 00237 00238 virtual result 00239 do_in(state_type& __state, const extern_type* __from, 00240 const extern_type* __from_end, const extern_type*& __from_next, 00241 intern_type* __to, intern_type* __to_end, 00242 intern_type*& __to_next) const; 00243 00244 virtual int 00245 do_encoding() const throw(); 00246 00247 virtual bool 00248 do_always_noconv() const throw(); 00249 00250 virtual int 00251 do_length(state_type&, const extern_type* __from, 00252 const extern_type* __end, size_t __max) const; 00253 00254 virtual int 00255 do_max_length() const throw(); 00256 }; 00257 00258 template<typename _InternT, typename _ExternT> 00259 locale::id 00260 codecvt<_InternT, _ExternT, __enc_traits>::id; 00261 00262 // This adaptor works around the signature problems of the second 00263 // argument to iconv(): SUSv2 and others use 'const char**', but glibc 2.2 00264 // uses 'char**', which matches the POSIX 1003.1-2001 standard. 00265 // Using this adaptor, g++ will do the work for us. 00266 template<typename _T> 00267 inline size_t 00268 __iconv_adaptor(size_t(*__func)(iconv_t, _T, size_t*, char**, size_t*), 00269 iconv_t __cd, char** __inbuf, size_t* __inbytes, 00270 char** __outbuf, size_t* __outbytes) 00271 { return __func(__cd, (_T)__inbuf, __inbytes, __outbuf, __outbytes); } 00272 00273 template<typename _InternT, typename _ExternT> 00274 codecvt_base::result 00275 codecvt<_InternT, _ExternT, __enc_traits>:: 00276 do_out(state_type& __state, const intern_type* __from, 00277 const intern_type* __from_end, const intern_type*& __from_next, 00278 extern_type* __to, extern_type* __to_end, 00279 extern_type*& __to_next) const 00280 { 00281 result __ret = codecvt_base::error; 00282 if (__state._M_good()) 00283 { 00284 typedef state_type::__desc_type __desc_type; 00285 const __desc_type* __desc = __state._M_get_out_descriptor(); 00286 const size_t __fmultiple = sizeof(intern_type); 00287 size_t __fbytes = __fmultiple * (__from_end - __from); 00288 const size_t __tmultiple = sizeof(extern_type); 00289 size_t __tbytes = __tmultiple * (__to_end - __to); 00290 00291 // Argument list for iconv specifies a byte sequence. Thus, 00292 // all to/from arrays must be brutally casted to char*. 00293 char* __cto = reinterpret_cast<char*>(__to); 00294 char* __cfrom; 00295 size_t __conv; 00296 00297 // Some encodings need a byte order marker as the first item 00298 // in the byte stream, to designate endian-ness. The default 00299 // value for the byte order marker is NULL, so if this is 00300 // the case, it's not necessary and we can just go on our 00301 // merry way. 00302 int __int_bom = __state._M_get_internal_bom(); 00303 if (__int_bom) 00304 { 00305 size_t __size = __from_end - __from; 00306 intern_type* __cfixed = static_cast<intern_type*>(__builtin_alloca(sizeof(intern_type) * (__size + 1))); 00307 __cfixed[0] = static_cast<intern_type>(__int_bom); 00308 char_traits<intern_type>::copy(__cfixed + 1, __from, __size); 00309 __cfrom = reinterpret_cast<char*>(__cfixed); 00310 __conv = __iconv_adaptor(iconv, *__desc, &__cfrom, 00311 &__fbytes, &__cto, &__tbytes); 00312 } 00313 else 00314 { 00315 intern_type* __cfixed = const_cast<intern_type*>(__from); 00316 __cfrom = reinterpret_cast<char*>(__cfixed); 00317 __conv = __iconv_adaptor(iconv, *__desc, &__cfrom, &__fbytes, 00318 &__cto, &__tbytes); 00319 } 00320 00321 if (__conv != size_t(-1)) 00322 { 00323 __from_next = reinterpret_cast<const intern_type*>(__cfrom); 00324 __to_next = reinterpret_cast<extern_type*>(__cto); 00325 __ret = codecvt_base::ok; 00326 } 00327 else 00328 { 00329 if (__fbytes < __fmultiple * (__from_end - __from)) 00330 { 00331 __from_next = reinterpret_cast<const intern_type*>(__cfrom); 00332 __to_next = reinterpret_cast<extern_type*>(__cto); 00333 __ret = codecvt_base::partial; 00334 } 00335 else 00336 __ret = codecvt_base::error; 00337 } 00338 } 00339 return __ret; 00340 } 00341 00342 template<typename _InternT, typename _ExternT> 00343 codecvt_base::result 00344 codecvt<_InternT, _ExternT, __enc_traits>:: 00345 do_unshift(state_type& __state, extern_type* __to, 00346 extern_type* __to_end, extern_type*& __to_next) const 00347 { 00348 result __ret = codecvt_base::error; 00349 if (__state._M_good()) 00350 { 00351 typedef state_type::__desc_type __desc_type; 00352 const __desc_type* __desc = __state._M_get_in_descriptor(); 00353 const size_t __tmultiple = sizeof(intern_type); 00354 size_t __tlen = __tmultiple * (__to_end - __to); 00355 00356 // Argument list for iconv specifies a byte sequence. Thus, 00357 // all to/from arrays must be brutally casted to char*. 00358 char* __cto = reinterpret_cast<char*>(__to); 00359 size_t __conv = __iconv_adaptor(iconv,*__desc, NULL, NULL, 00360 &__cto, &__tlen); 00361 00362 if (__conv != size_t(-1)) 00363 { 00364 __to_next = reinterpret_cast<extern_type*>(__cto); 00365 if (__tlen == __tmultiple * (__to_end - __to)) 00366 __ret = codecvt_base::noconv; 00367 else if (__tlen == 0) 00368 __ret = codecvt_base::ok; 00369 else 00370 __ret = codecvt_base::partial; 00371 } 00372 else 00373 __ret = codecvt_base::error; 00374 } 00375 return __ret; 00376 } 00377 00378 template<typename _InternT, typename _ExternT> 00379 codecvt_base::result 00380 codecvt<_InternT, _ExternT, __enc_traits>:: 00381 do_in(state_type& __state, const extern_type* __from, 00382 const extern_type* __from_end, const extern_type*& __from_next, 00383 intern_type* __to, intern_type* __to_end, 00384 intern_type*& __to_next) const 00385 { 00386 result __ret = codecvt_base::error; 00387 if (__state._M_good()) 00388 { 00389 typedef state_type::__desc_type __desc_type; 00390 const __desc_type* __desc = __state._M_get_in_descriptor(); 00391 const size_t __fmultiple = sizeof(extern_type); 00392 size_t __flen = __fmultiple * (__from_end - __from); 00393 const size_t __tmultiple = sizeof(intern_type); 00394 size_t __tlen = __tmultiple * (__to_end - __to); 00395 00396 // Argument list for iconv specifies a byte sequence. Thus, 00397 // all to/from arrays must be brutally casted to char*. 00398 char* __cto = reinterpret_cast<char*>(__to); 00399 char* __cfrom; 00400 size_t __conv; 00401 00402 // Some encodings need a byte order marker as the first item 00403 // in the byte stream, to designate endian-ness. The default 00404 // value for the byte order marker is NULL, so if this is 00405 // the case, it's not necessary and we can just go on our 00406 // merry way. 00407 int __ext_bom = __state._M_get_external_bom(); 00408 if (__ext_bom) 00409 { 00410 size_t __size = __from_end - __from; 00411 extern_type* __cfixed = static_cast<extern_type*>(__builtin_alloca(sizeof(extern_type) * (__size + 1))); 00412 __cfixed[0] = static_cast<extern_type>(__ext_bom); 00413 char_traits<extern_type>::copy(__cfixed + 1, __from, __size); 00414 __cfrom = reinterpret_cast<char*>(__cfixed); 00415 __conv = __iconv_adaptor(iconv, *__desc, &__cfrom, 00416 &__flen, &__cto, &__tlen); 00417 } 00418 else 00419 { 00420 extern_type* __cfixed = const_cast<extern_type*>(__from); 00421 __cfrom = reinterpret_cast<char*>(__cfixed); 00422 __conv = __iconv_adaptor(iconv, *__desc, &__cfrom, 00423 &__flen, &__cto, &__tlen); 00424 } 00425 00426 00427 if (__conv != size_t(-1)) 00428 { 00429 __from_next = reinterpret_cast<const extern_type*>(__cfrom); 00430 __to_next = reinterpret_cast<intern_type*>(__cto); 00431 __ret = codecvt_base::ok; 00432 } 00433 else 00434 { 00435 if (__flen < static_cast<size_t>(__from_end - __from)) 00436 { 00437 __from_next = reinterpret_cast<const extern_type*>(__cfrom); 00438 __to_next = reinterpret_cast<intern_type*>(__cto); 00439 __ret = codecvt_base::partial; 00440 } 00441 else 00442 __ret = codecvt_base::error; 00443 } 00444 } 00445 return __ret; 00446 } 00447 00448 template<typename _InternT, typename _ExternT> 00449 int 00450 codecvt<_InternT, _ExternT, __enc_traits>:: 00451 do_encoding() const throw() 00452 { 00453 int __ret = 0; 00454 if (sizeof(_ExternT) <= sizeof(_InternT)) 00455 __ret = sizeof(_InternT)/sizeof(_ExternT); 00456 return __ret; 00457 } 00458 00459 template<typename _InternT, typename _ExternT> 00460 bool 00461 codecvt<_InternT, _ExternT, __enc_traits>:: 00462 do_always_noconv() const throw() 00463 { return false; } 00464 00465 template<typename _InternT, typename _ExternT> 00466 int 00467 codecvt<_InternT, _ExternT, __enc_traits>:: 00468 do_length(state_type&, const extern_type* __from, 00469 const extern_type* __end, size_t __max) const 00470 { return std::min(__max, static_cast<size_t>(__end - __from)); } 00471 00472 // _GLIBCXX_RESOLVE_LIB_DEFECTS 00473 // 74. Garbled text for codecvt::do_max_length 00474 template<typename _InternT, typename _ExternT> 00475 int 00476 codecvt<_InternT, _ExternT, __enc_traits>:: 00477 do_max_length() const throw() 00478 { return 1; } 00479

Generated on Wed Sep 8 10:19:28 2004 for libstdc++-v3 Source by doxygen 1.3.8