codecvt_specializations.h

00001 // Locale support (codecvt) -*- C++ -*- 00002 00003 // Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the 00007 // terms of the GNU General Public License as published by the 00008 // Free Software Foundation; either version 2, or (at your option) 00009 // any later version. 00010 00011 // This library is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 00016 // You should have received a copy of the GNU General Public License along 00017 // with this library; see the file COPYING. If not, write to the Free 00018 // Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, 00019 // USA. 00020 00021 // As a special exception, you may use this file as part of a free software 00022 // library without restriction. Specifically, if other files instantiate 00023 // templates or use macros or inline functions from this file, or you compile 00024 // this file and link it with other files to produce an executable, this 00025 // file does not by itself cause the resulting executable to be covered by 00026 // the GNU General Public License. This exception does not however 00027 // invalidate any other reasons why the executable file might be covered by 00028 // the GNU General Public License. 00029 00030 // 00031 // ISO C++ 14882: 22.2.1.5 Template class codecvt 00032 // 00033 00034 // Warning: this file is not meant for user inclusion. Use <locale>. 00035 00036 // Written by Benjamin Kosnik <bkoz@cygnus.com> 00037 00038 // XXX 00039 // Define this here to codecvt.cc can have _S_max_size definition. 00040 #define _GLIBCPP_USE___ENC_TRAITS 1 00041 00042 // Extension to use icov for dealing with character encodings, 00043 // including conversions and comparisons between various character 00044 // sets. This object encapsulates data that may need to be shared between 00045 // char_traits, codecvt and ctype. 00046 class __enc_traits 00047 { 00048 public: 00049 // Types: 00050 // NB: A conversion descriptor subsumes and enhances the 00051 // functionality of a simple state type such as mbstate_t. 00052 typedef iconv_t __desc_type; 00053 00054 protected: 00055 // Data Members: 00056 // Max size of charset encoding name 00057 static const int _S_max_size = 32; 00058 // Name of internal character set encoding. 00059 char _M_int_enc[_S_max_size]; 00060 // Name of external character set encoding. 00061 char _M_ext_enc[_S_max_size]; 00062 00063 // Conversion descriptor between external encoding to internal encoding. 00064 __desc_type _M_in_desc; 00065 // Conversion descriptor between internal encoding to external encoding. 00066 __desc_type _M_out_desc; 00067 00068 // Details the byte-order marker for the external encoding, if necessary. 00069 int _M_ext_bom; 00070 00071 // Details the byte-order marker for the internal encoding, if necessary. 00072 int _M_int_bom; 00073 00074 public: 00075 explicit __enc_traits() 00076 : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0) 00077 { 00078 memset(_M_int_enc, 0, _S_max_size); 00079 memset(_M_ext_enc, 0, _S_max_size); 00080 } 00081 00082 explicit __enc_traits(const char* __int, const char* __ext, 00083 int __ibom = 0, int __ebom = 0) 00084 : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0) 00085 { 00086 strncpy(_M_int_enc, __int, _S_max_size); 00087 strncpy(_M_ext_enc, __ext, _S_max_size); 00088 } 00089 00090 // 21.1.2 traits typedefs 00091 // p4 00092 // typedef STATE_T state_type 00093 // requires: state_type shall meet the requirements of 00094 // CopyConstructible types (20.1.3) 00095 __enc_traits(const __enc_traits& __obj): _M_in_desc(0), _M_out_desc(0) 00096 { 00097 strncpy(_M_int_enc, __obj._M_int_enc, _S_max_size); 00098 strncpy(_M_ext_enc, __obj._M_ext_enc, _S_max_size); 00099 _M_ext_bom = __obj._M_ext_bom; 00100 _M_int_bom = __obj._M_int_bom; 00101 } 00102 00103 // Need assignment operator as well. 00104 __enc_traits& 00105 operator=(const __enc_traits& __obj) 00106 { 00107 strncpy(_M_int_enc, __obj._M_int_enc, _S_max_size); 00108 strncpy(_M_ext_enc, __obj._M_ext_enc, _S_max_size); 00109 _M_in_desc = 0; 00110 _M_out_desc = 0; 00111 _M_ext_bom = __obj._M_ext_bom; 00112 _M_int_bom = __obj._M_int_bom; 00113 return *this; 00114 } 00115 00116 ~__enc_traits() 00117 { 00118 __desc_type __err = reinterpret_cast<iconv_t>(-1); 00119 if (_M_in_desc && _M_in_desc != __err) 00120 iconv_close(_M_in_desc); 00121 if (_M_out_desc && _M_out_desc != __err) 00122 iconv_close(_M_out_desc); 00123 } 00124 00125 void 00126 _M_init() 00127 { 00128 const __desc_type __err = reinterpret_cast<iconv_t>(-1); 00129 if (!_M_in_desc) 00130 { 00131 _M_in_desc = iconv_open(_M_int_enc, _M_ext_enc); 00132 if (_M_in_desc == __err) 00133 __throw_runtime_error("creating iconv input descriptor failed."); 00134 } 00135 if (!_M_out_desc) 00136 { 00137 _M_out_desc = iconv_open(_M_ext_enc, _M_int_enc); 00138 if (_M_out_desc == __err) 00139 __throw_runtime_error("creating iconv output descriptor failed."); 00140 } 00141 } 00142 00143 bool 00144 _M_good() 00145 { 00146 const __desc_type __err = reinterpret_cast<iconv_t>(-1); 00147 bool __test = _M_in_desc && _M_in_desc != __err; 00148 __test &= _M_out_desc && _M_out_desc != __err; 00149 return __test; 00150 } 00151 00152 const __desc_type* 00153 _M_get_in_descriptor() 00154 { return &_M_in_desc; } 00155 00156 const __desc_type* 00157 _M_get_out_descriptor() 00158 { return &_M_out_desc; } 00159 00160 int 00161 _M_get_external_bom() 00162 { return _M_ext_bom; } 00163 00164 int 00165 _M_get_internal_bom() 00166 { return _M_int_bom; } 00167 00168 const char* 00169 _M_get_internal_enc() 00170 { return _M_int_enc; } 00171 00172 const char* 00173 _M_get_external_enc() 00174 { return _M_ext_enc; } 00175 }; 00176 00177 // Partial specialization 00178 // This specialization takes advantage of iconv to provide code 00179 // conversions between a large number of character encodings. 00180 template<typename _InternT, typename _ExternT> 00181 class codecvt<_InternT, _ExternT, __enc_traits> 00182 : public __codecvt_abstract_base<_InternT, _ExternT, __enc_traits> 00183 { 00184 public: 00185 // Types: 00186 typedef codecvt_base::result result; 00187 typedef _InternT intern_type; 00188 typedef _ExternT extern_type; 00189 typedef __enc_traits state_type; 00190 typedef __enc_traits::__desc_type __desc_type; 00191 typedef __enc_traits __enc_type; 00192 00193 // Data Members: 00194 static locale::id id; 00195 00196 explicit 00197 codecvt(size_t __refs = 0) 00198 : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs) 00199 { } 00200 00201 explicit 00202 codecvt(__enc_type* __enc, size_t __refs = 0) 00203 : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs) 00204 { } 00205 00206 protected: 00207 virtual 00208 ~codecvt() { } 00209 00210 virtual result 00211 do_out(state_type& __state, const intern_type* __from, 00212 const intern_type* __from_end, const intern_type*& __from_next, 00213 extern_type* __to, extern_type* __to_end, 00214 extern_type*& __to_next) const; 00215 00216 virtual result 00217 do_unshift(state_type& __state, extern_type* __to, 00218 extern_type* __to_end, extern_type*& __to_next) const; 00219 00220 virtual result 00221 do_in(state_type& __state, const extern_type* __from, 00222 const extern_type* __from_end, const extern_type*& __from_next, 00223 intern_type* __to, intern_type* __to_end, 00224 intern_type*& __to_next) const; 00225 00226 virtual int 00227 do_encoding() const throw(); 00228 00229 virtual bool 00230 do_always_noconv() const throw(); 00231 00232 virtual int 00233 do_length(const state_type&, const extern_type* __from, 00234 const extern_type* __end, size_t __max) const; 00235 00236 virtual int 00237 do_max_length() const throw(); 00238 }; 00239 00240 template<typename _InternT, typename _ExternT> 00241 locale::id 00242 codecvt<_InternT, _ExternT, __enc_traits>::id; 00243 00244 // This adaptor works around the signature problems of the second 00245 // argument to iconv(): SUSv2 and others use 'const char**', but glibc 2.2 00246 // uses 'char**', which matches the POSIX 1003.1-2001 standard. 00247 // Using this adaptor, g++ will do the work for us. 00248 template<typename _T> 00249 inline size_t 00250 __iconv_adaptor(size_t(*__func)(iconv_t, _T, size_t*, char**, size_t*), 00251 iconv_t __cd, char** __inbuf, size_t* __inbytes, 00252 char** __outbuf, size_t* __outbytes) 00253 { return __func(__cd, (_T)__inbuf, __inbytes, __outbuf, __outbytes); } 00254 00255 template<typename _InternT, typename _ExternT> 00256 codecvt_base::result 00257 codecvt<_InternT, _ExternT, __enc_traits>:: 00258 do_out(state_type& __state, const intern_type* __from, 00259 const intern_type* __from_end, const intern_type*& __from_next, 00260 extern_type* __to, extern_type* __to_end, 00261 extern_type*& __to_next) const 00262 { 00263 result __ret = codecvt_base::error; 00264 if (__state._M_good()) 00265 { 00266 typedef state_type::__desc_type __desc_type; 00267 const __desc_type* __desc = __state._M_get_out_descriptor(); 00268 const size_t __fmultiple = sizeof(intern_type); 00269 size_t __fbytes = __fmultiple * (__from_end - __from); 00270 const size_t __tmultiple = sizeof(extern_type); 00271 size_t __tbytes = __tmultiple * (__to_end - __to); 00272 00273 // Argument list for iconv specifies a byte sequence. Thus, 00274 // all to/from arrays must be brutally casted to char*. 00275 char* __cto = reinterpret_cast<char*>(__to); 00276 char* __cfrom; 00277 size_t __conv; 00278 00279 // Some encodings need a byte order marker as the first item 00280 // in the byte stream, to designate endian-ness. The default 00281 // value for the byte order marker is NULL, so if this is 00282 // the case, it's not necessary and we can just go on our 00283 // merry way. 00284 int __int_bom = __state._M_get_internal_bom(); 00285 if (__int_bom) 00286 { 00287 size_t __size = __from_end - __from; 00288 intern_type* __cfixed = static_cast<intern_type*>(__builtin_alloca(sizeof(intern_type) * (__size + 1))); 00289 __cfixed[0] = static_cast<intern_type>(__int_bom); 00290 char_traits<intern_type>::copy(__cfixed + 1, __from, __size); 00291 __cfrom = reinterpret_cast<char*>(__cfixed); 00292 __conv = __iconv_adaptor(iconv, *__desc, &__cfrom, 00293 &__fbytes, &__cto, &__tbytes); 00294 } 00295 else 00296 { 00297 intern_type* __cfixed = const_cast<intern_type*>(__from); 00298 __cfrom = reinterpret_cast<char*>(__cfixed); 00299 __conv = __iconv_adaptor(iconv, *__desc, &__cfrom, &__fbytes, 00300 &__cto, &__tbytes); 00301 } 00302 00303 if (__conv != size_t(-1)) 00304 { 00305 __from_next = reinterpret_cast<const intern_type*>(__cfrom); 00306 __to_next = reinterpret_cast<extern_type*>(__cto); 00307 __ret = codecvt_base::ok; 00308 } 00309 else 00310 { 00311 if (__fbytes < __fmultiple * (__from_end - __from)) 00312 { 00313 __from_next = reinterpret_cast<const intern_type*>(__cfrom); 00314 __to_next = reinterpret_cast<extern_type*>(__cto); 00315 __ret = codecvt_base::partial; 00316 } 00317 else 00318 __ret = codecvt_base::error; 00319 } 00320 } 00321 return __ret; 00322 } 00323 00324 template<typename _InternT, typename _ExternT> 00325 codecvt_base::result 00326 codecvt<_InternT, _ExternT, __enc_traits>:: 00327 do_unshift(state_type& __state, extern_type* __to, 00328 extern_type* __to_end, extern_type*& __to_next) const 00329 { 00330 result __ret = codecvt_base::error; 00331 if (__state._M_good()) 00332 { 00333 typedef state_type::__desc_type __desc_type; 00334 const __desc_type* __desc = __state._M_get_in_descriptor(); 00335 const size_t __tmultiple = sizeof(intern_type); 00336 size_t __tlen = __tmultiple * (__to_end - __to); 00337 00338 // Argument list for iconv specifies a byte sequence. Thus, 00339 // all to/from arrays must be brutally casted to char*. 00340 char* __cto = reinterpret_cast<char*>(__to); 00341 size_t __conv = __iconv_adaptor(iconv,*__desc, NULL, NULL, 00342 &__cto, &__tlen); 00343 00344 if (__conv != size_t(-1)) 00345 { 00346 __to_next = reinterpret_cast<extern_type*>(__cto); 00347 if (__tlen == __tmultiple * (__to_end - __to)) 00348 __ret = codecvt_base::noconv; 00349 else if (__tlen == 0) 00350 __ret = codecvt_base::ok; 00351 else 00352 __ret = codecvt_base::partial; 00353 } 00354 else 00355 __ret = codecvt_base::error; 00356 } 00357 return __ret; 00358 } 00359 00360 template<typename _InternT, typename _ExternT> 00361 codecvt_base::result 00362 codecvt<_InternT, _ExternT, __enc_traits>:: 00363 do_in(state_type& __state, const extern_type* __from, 00364 const extern_type* __from_end, const extern_type*& __from_next, 00365 intern_type* __to, intern_type* __to_end, 00366 intern_type*& __to_next) const 00367 { 00368 result __ret = codecvt_base::error; 00369 if (__state._M_good()) 00370 { 00371 typedef state_type::__desc_type __desc_type; 00372 const __desc_type* __desc = __state._M_get_in_descriptor(); 00373 const size_t __fmultiple = sizeof(extern_type); 00374 size_t __flen = __fmultiple * (__from_end - __from); 00375 const size_t __tmultiple = sizeof(intern_type); 00376 size_t __tlen = __tmultiple * (__to_end - __to); 00377 00378 // Argument list for iconv specifies a byte sequence. Thus, 00379 // all to/from arrays must be brutally casted to char*. 00380 char* __cto = reinterpret_cast<char*>(__to); 00381 char* __cfrom; 00382 size_t __conv; 00383 00384 // Some encodings need a byte order marker as the first item 00385 // in the byte stream, to designate endian-ness. The default 00386 // value for the byte order marker is NULL, so if this is 00387 // the case, it's not necessary and we can just go on our 00388 // merry way. 00389 int __ext_bom = __state._M_get_external_bom(); 00390 if (__ext_bom) 00391 { 00392 size_t __size = __from_end - __from; 00393 extern_type* __cfixed = static_cast<extern_type*>(__builtin_alloca(sizeof(extern_type) * (__size + 1))); 00394 __cfixed[0] = static_cast<extern_type>(__ext_bom); 00395 char_traits<extern_type>::copy(__cfixed + 1, __from, __size); 00396 __cfrom = reinterpret_cast<char*>(__cfixed); 00397 __conv = __iconv_adaptor(iconv, *__desc, &__cfrom, 00398 &__flen, &__cto, &__tlen); 00399 } 00400 else 00401 { 00402 extern_type* __cfixed = const_cast<extern_type*>(__from); 00403 __cfrom = reinterpret_cast<char*>(__cfixed); 00404 __conv = __iconv_adaptor(iconv, *__desc, &__cfrom, 00405 &__flen, &__cto, &__tlen); 00406 } 00407 00408 00409 if (__conv != size_t(-1)) 00410 { 00411 __from_next = reinterpret_cast<const extern_type*>(__cfrom); 00412 __to_next = reinterpret_cast<intern_type*>(__cto); 00413 __ret = codecvt_base::ok; 00414 } 00415 else 00416 { 00417 if (__flen < static_cast<size_t>(__from_end - __from)) 00418 { 00419 __from_next = reinterpret_cast<const extern_type*>(__cfrom); 00420 __to_next = reinterpret_cast<intern_type*>(__cto); 00421 __ret = codecvt_base::partial; 00422 } 00423 else 00424 __ret = codecvt_base::error; 00425 } 00426 } 00427 return __ret; 00428 } 00429 00430 template<typename _InternT, typename _ExternT> 00431 int 00432 codecvt<_InternT, _ExternT, __enc_traits>:: 00433 do_encoding() const throw() 00434 { 00435 int __ret = 0; 00436 if (sizeof(_ExternT) <= sizeof(_InternT)) 00437 __ret = sizeof(_InternT)/sizeof(_ExternT); 00438 return __ret; 00439 } 00440 00441 template<typename _InternT, typename _ExternT> 00442 bool 00443 codecvt<_InternT, _ExternT, __enc_traits>:: 00444 do_always_noconv() const throw() 00445 { return false; } 00446 00447 template<typename _InternT, typename _ExternT> 00448 int 00449 codecvt<_InternT, _ExternT, __enc_traits>:: 00450 do_length(const state_type&, const extern_type* __from, 00451 const extern_type* __end, size_t __max) const 00452 { return min(__max, static_cast<size_t>(__end - __from)); } 00453 00454 #ifdef _GLIBCPP_RESOLVE_LIB_DEFECTS 00455 // 74. Garbled text for codecvt::do_max_length 00456 template<typename _InternT, typename _ExternT> 00457 int 00458 codecvt<_InternT, _ExternT, __enc_traits>:: 00459 do_max_length() const throw() 00460 { return 1; } 00461 #endif

Generated on Wed Aug 4 21:43:09 2004 for libstdc++-v3 Source by doxygen 1.3.8