00001 /* -*- Mode: C++ -*- 00002 * Worldvisions Weaver Software: 00003 * Copyright (C) 1997-2002 Net Integration Technologies, Inc. 00004 * 00005 * Functions to handle "tcl-style" strings and lists. 00006 * 00007 * Using wvtcl_encode(), you can encode _any_ list of strings into a single 00008 * string, then reliably split the single string back into the list using 00009 * wvtcl_decode(). 00010 * 00011 * You can create recursive lists of lists by simply running wvtcl_encode() 00012 * on a list of strings returned from wvtcl_encode(). 00013 * 00014 * Example list encodings (all of the following lists have exactly 3 elements): 00015 * foo blah weasels 00016 * e1 elem2 {element 3} 00017 * x1 {} "element 3" 00018 * w x y\ z 00019 * 00020 * Example list of lists: 00021 * foo\ blah\ weasels {e1 elem2 {element 3}} {w x y\ z} 00022 * 00023 * FIXME: 00024 * It would be possible to represent arbitrary binary blobs using this 00025 * technique, but we'd have to avoid using null-terminated strings in a few 00026 * places, particularly in the input to wvtcl_escape(). 00027 * 00028 * We could even make encoded binary blobs printable (although that's not 00029 * _strictly_ necessary in all cases) by encoding non-printable characters 00030 * using \x## notation, if wvtcl_escape() or wvtcl_unescape() supported it. 00031 */ 00032 #ifndef __WVTCLSTRING_H 00033 #define __WVTCLSTRING_H 00034 00035 #include "wvbuf.h" 00036 00037 // the default set of "nasties", ie. characters that need to be escaped if 00038 // they occur somewhere in a string. 00039 #define WVTCL_NASTIES " \t\n\r" 00040 00041 00042 // {, }, \, and " are always considered "nasty." 00043 #define WVTCL_ALWAYS_NASTY "{}\\\"" 00044 00045 00046 // the default set of split characters, ie. characters that separate elements 00047 // in a list. If these characters appear unescaped and not between {} or "" 00048 // in a list, they signify the end of the current element. 00049 #define WVTCL_SPLITCHARS " \t\n\r" 00050 00051 00052 // tcl-escape a string. There are three ways to do this: 00053 // 1) Strings that need no escaping are unchanged. 00054 // 2) Strings containing characters in 'nasties' are usually encoded just 00055 // by enclosing the unmodified string in braces. 00056 // (For example, "foo blah" becomes "{foo blah}") 00057 // 3) Strings containing nasties _and_ unmatched braces are encoded using 00058 // backslash notation. (For example, " foo} " becomes "\ foo\}\ " 00059 WvString wvtcl_escape(WvStringParm s, const char *nasties = WVTCL_NASTIES); 00060 00061 00062 // tcl-unescape a string. This is generally the reverse of wvtcl_escape, 00063 // except we can reverse any backslashified or embraced string, even if it 00064 // doesn't follow the "simplest encoding" rules used by wvtcl_escape. We 00065 // can also handle strings in double-quotes, ie. '"foo"' becomes 'foo'. 00066 WvString wvtcl_unescape(WvStringParm s); 00067 00068 00069 // encode a tcl-style list. This is easily done by tcl-escaping each 00070 // string in 'l', then appending the escaped strings together, separated by 00071 // the first char in splitchars. 00072 WvString wvtcl_encode(WvList<WvString> &l, const char *nasties = WVTCL_NASTIES, 00073 const char *splitchars = WVTCL_SPLITCHARS); 00074 00075 // Get a single tcl word from an input buffer, and return the rest of the 00076 // buffer untouched. If no word can be created from the buffer, return 00077 // a null string and reset the buffer to its original state. 00078 WvString wvtcl_getword(WvBuf &buf, const char *splitchars = WVTCL_SPLITCHARS, 00079 bool do_unescape = true); 00080 00081 // split a tcl-style list. There are some special "convenience" features 00082 // here, which allow users to create lists more flexibly than wvtcl_encode 00083 // would do. 00084 // 00085 // Elements of the list are separated by any number of any characters from 00086 // the 'splitchars' list. 00087 // 00088 // Quotes are allowed around elements: '"foo"' becomes 'foo'. These work 00089 // mostly like braces, except the string is assumed to be backslashified. 00090 // That is, '"\ "' becomes ' ', whereas '{\ }' becomes '\ ' (ie. the backslash 00091 // wouldn't be removed). 00092 // 00093 // Zero-length elements must be represented by {} 00094 // 00095 void wvtcl_decode(WvList<WvString> &l, WvStringParm _s, 00096 const char *splitchars = WVTCL_SPLITCHARS, 00097 bool do_unescape = true); 00098 00099 #endif // __WVTCLSTRING_H