00001 /** 00002 * @copyright 00003 * ==================================================================== 00004 * Copyright (c) 2000-2004 CollabNet. All rights reserved. 00005 * 00006 * This software is licensed as described in the file COPYING, which 00007 * you should have received as part of this distribution. The terms 00008 * are also available at http://subversion.tigris.org/license-1.html. 00009 * If newer versions of this license are posted there, you may use a 00010 * newer version instead, at your option. 00011 * 00012 * This software consists of voluntary contributions made by many 00013 * individuals. For exact contribution history, see the revision 00014 * history and logs, available at http://subversion.tigris.org/. 00015 * ==================================================================== 00016 * @endcopyright 00017 * 00018 * @file svn_utf.h 00019 * @brief UTF-8 conversion routines 00020 */ 00021 00022 00023 00024 #ifndef SVN_UTF_H 00025 #define SVN_UTF_H 00026 00027 #include <apr_xlate.h> 00028 00029 #include "svn_error.h" 00030 #include "svn_string.h" 00031 00032 #ifdef __cplusplus 00033 extern "C" { 00034 #endif /* __cplusplus */ 00035 00036 00037 #ifndef AS400 00038 #define SVN_APR_LOCALE_CHARSET APR_LOCALE_CHARSET 00039 #define SVN_APR_DEFAULT_CHARSET APR_DEFAULT_CHARSET 00040 #else 00041 /* APR_LOCALE_CHARSET and APR_DEFAULT_CHARSET are defined as ints on 00042 * OS400. */ 00043 #define SVN_APR_LOCALE_CHARSET (const char*)APR_LOCALE_CHARSET 00044 #define SVN_APR_DEFAULT_CHARSET (const char*)APR_DEFAULT_CHARSET 00045 #endif 00046 00047 /** 00048 * Initialize the UTF-8 encoding/decoding routines. 00049 * Allocate cached translation handles in a subpool of @a pool. 00050 * 00051 * @note It is optional to call this function, but if it is used, no other 00052 * svn function may be in use in other threads during the call of this 00053 * function or when @a pool is cleared or destroyed. 00054 * Initializing the UTF-8 routines will improve performance. 00055 * 00056 * @since New in 1.1. 00057 */ 00058 void svn_utf_initialize(apr_pool_t *pool); 00059 00060 /** Set @a *dest to a utf8-encoded stringbuf from native stringbuf @a src; 00061 * allocate @a *dest in @a pool. 00062 */ 00063 svn_error_t *svn_utf_stringbuf_to_utf8(svn_stringbuf_t **dest, 00064 const svn_stringbuf_t *src, 00065 apr_pool_t *pool); 00066 00067 00068 /** Set @a *dest to a utf8-encoded string from native string @a src; allocate 00069 * @a *dest in @a pool. 00070 */ 00071 svn_error_t *svn_utf_string_to_utf8(const svn_string_t **dest, 00072 const svn_string_t *src, 00073 apr_pool_t *pool); 00074 00075 00076 /** Set @a *dest to a utf8-encoded C string from native C string @a src; 00077 * allocate @a *dest in @a pool. 00078 */ 00079 svn_error_t *svn_utf_cstring_to_utf8(const char **dest, 00080 const char *src, 00081 apr_pool_t *pool); 00082 00083 00084 /** Set @a *dest to a utf8 encoded C string from @a frompage encoded C 00085 * string @a src; allocate @a *dest in @a pool. 00086 * 00087 * @since New in 1.4. 00088 */ 00089 svn_error_t *svn_utf_cstring_to_utf8_ex2(const char **dest, 00090 const char *src, 00091 const char *frompage, 00092 apr_pool_t *pool); 00093 00094 00095 /** Like svn_utf_cstring_to_utf8_ex2() but with @a convset_key which is 00096 * ignored. 00097 * 00098 * @deprecated Provided for backward compatibility with the 1.3 API. 00099 */ 00100 svn_error_t *svn_utf_cstring_to_utf8_ex(const char **dest, 00101 const char *src, 00102 const char *frompage, 00103 const char *convset_key, 00104 apr_pool_t *pool); 00105 00106 00107 /** Set @a *dest to a natively-encoded stringbuf from utf8 stringbuf @a src; 00108 * allocate @a *dest in @a pool. 00109 */ 00110 svn_error_t *svn_utf_stringbuf_from_utf8(svn_stringbuf_t **dest, 00111 const svn_stringbuf_t *src, 00112 apr_pool_t *pool); 00113 00114 00115 /** Set @a *dest to a natively-encoded string from utf8 string @a src; 00116 * allocate @a *dest in @a pool. 00117 */ 00118 svn_error_t *svn_utf_string_from_utf8(const svn_string_t **dest, 00119 const svn_string_t *src, 00120 apr_pool_t *pool); 00121 00122 00123 /** Set @a *dest to a natively-encoded C string from utf8 C string @a src; 00124 * allocate @a *dest in @a pool. 00125 */ 00126 svn_error_t *svn_utf_cstring_from_utf8(const char **dest, 00127 const char *src, 00128 apr_pool_t *pool); 00129 00130 00131 /** Set @a *dest to a @a topage encoded C string from utf8 encoded C string 00132 * @a src; allocate @a *dest in @a pool. 00133 * 00134 * @since New in 1.4. 00135 */ 00136 svn_error_t *svn_utf_cstring_from_utf8_ex2(const char **dest, 00137 const char *src, 00138 const char *topage, 00139 apr_pool_t *pool); 00140 00141 00142 /** Like svn_utf_cstring_from_utf8_ex2() but with @a convset_key which is 00143 * ignored. 00144 * 00145 * @deprecated Provided for backward compatibility with the 1.3 API. 00146 */ 00147 svn_error_t *svn_utf_cstring_from_utf8_ex(const char **dest, 00148 const char *src, 00149 const char *topage, 00150 const char *convset_key, 00151 apr_pool_t *pool); 00152 00153 00154 /** Return a fuzzily native-encoded C string from utf8 C string @a src, 00155 * allocated in @a pool. A fuzzy recoding leaves all 7-bit ascii 00156 * characters the same, and substitutes "?\\XXX" for others, where XXX 00157 * is the unsigned decimal code for that character. 00158 * 00159 * This function cannot error; it is guaranteed to return something. 00160 * First it will recode as described above and then attempt to convert 00161 * the (new) 7-bit UTF-8 string to native encoding. If that fails, it 00162 * will return the raw fuzzily recoded string, which may or may not be 00163 * meaningful in the client's locale, but is (presumably) better than 00164 * nothing. 00165 * 00166 * ### Notes: 00167 * 00168 * Improvement is possible, even imminent. The original problem was 00169 * that if you converted a UTF-8 string (say, a log message) into a 00170 * locale that couldn't represent all the characters, you'd just get a 00171 * static placeholder saying "[unconvertible log message]". Then 00172 * Justin Erenkrantz pointed out how on platforms that didn't support 00173 * conversion at all, "svn log" would still fail completely when it 00174 * encountered unconvertible data. 00175 * 00176 * Now for both cases, the caller can at least fall back on this 00177 * function, which converts the message as best it can, substituting 00178 * "?\\XXX" escape codes for the non-ascii characters. 00179 * 00180 * Ultimately, some callers may prefer the iconv "//TRANSLIT" option, 00181 * so when we can detect that at configure time, things will change. 00182 * Also, this should (?) be moved to apr/apu eventually. 00183 * 00184 * See http://subversion.tigris.org/issues/show_bug.cgi?id=807 for 00185 * details. 00186 */ 00187 const char *svn_utf_cstring_from_utf8_fuzzy(const char *src, 00188 apr_pool_t *pool); 00189 00190 00191 /** Set @a *dest to a natively-encoded C string from utf8 stringbuf @a src; 00192 * allocate @a *dest in @a pool. 00193 */ 00194 svn_error_t *svn_utf_cstring_from_utf8_stringbuf(const char **dest, 00195 const svn_stringbuf_t *src, 00196 apr_pool_t *pool); 00197 00198 00199 /** Set @a *dest to a natively-encoded C string from utf8 string @a src; 00200 * allocate @a *dest in @a pool. 00201 */ 00202 svn_error_t *svn_utf_cstring_from_utf8_string(const char **dest, 00203 const svn_string_t *src, 00204 apr_pool_t *pool); 00205 00206 #ifdef __cplusplus 00207 } 00208 #endif /* __cplusplus */ 00209 00210 #endif /* SVN_UTF_H */