00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059 #include <ctype.h>
00060
00061 #include <iomanip>
00062 #include <string>
00063 #include <sstream>
00064
00065 #include "GNURegex.h"
00066 #include "Error.h"
00067 #include "InternalErr.h"
00068
00069 #include "debug.h"
00070
00071 using namespace std;
00072
00073
00074
00075
00076
00077 string
00078 hexstring(unsigned char val)
00079 {
00080 ostringstream buf;
00081 buf << hex << setw(2) << setfill('0')
00082 << static_cast<unsigned int>(val);
00083
00084 return buf.str();
00085 }
00086
00087 string
00088 unhexstring(string s)
00089 {
00090 int val;
00091 istringstream ss(s);
00092 ss >> hex >> val;
00093 char tmp_str[2];
00094 tmp_str[0] = static_cast<char>(val);
00095 tmp_str[1] = '\0';
00096 return string(tmp_str);
00097 }
00098
00099 string
00100 octstring(unsigned char val)
00101 {
00102 ostringstream buf;
00103 buf << oct << setw(3) << setfill('0')
00104 << static_cast<unsigned int>(val);
00105
00106 return buf.str();
00107 }
00108
00109 string
00110 unoctstring(string s)
00111 {
00112 int val;
00113
00114 istringstream ss(s);
00115 ss >> oct >> val;
00116
00117 DBG(cerr << "unoctstring: " << val << endl);
00118
00119 char tmp_str[2];
00120 tmp_str[0] = static_cast<char>(val);
00121 tmp_str[1] = '\0';
00122 return string(tmp_str);
00123 }
00124
00149 string
00150 id2www(string in, const string &allowable)
00151 {
00152 string::size_type i = 0;
00153
00154 while ((i = in.find_first_not_of(allowable, i)) != string::npos) {
00155 in.replace(i, 1, "%" + hexstring(in[i]));
00156 i++;
00157 }
00158
00159 return in;
00160 }
00161
00172 string
00173 id2www_ce(string in, const string &allowable)
00174 {
00175 return id2www(in, allowable);
00176 }
00177
00206 string
00207 www2id(const string &in, const string &escape, const string &except)
00208 {
00209 string::size_type i = 0;
00210 string res = in;
00211 while ((i = res.find_first_of(escape, i)) != string::npos) {
00212 if (res.substr(i, 3) == except) {
00213 i += 3;
00214 continue;
00215 }
00216 res.replace(i, 3, unhexstring(res.substr(i + 1, 2)));
00217 }
00218
00219 return res;
00220 }
00221
00222 static string
00223 entity(char c)
00224 {
00225 switch (c) {
00226 case '>': return ">";
00227 case '<': return "<";
00228 case '&': return "&";
00229 case '\'': return "'";
00230 case '\"': return """;
00231 default:
00232 throw InternalErr(__FILE__, __LINE__, "Unrecognized character.");
00233 }
00234 }
00235
00242 string
00243 id2xml(string in, const string ¬_allowed)
00244 {
00245 string::size_type i = 0;
00246
00247 while ((i = in.find_first_of(not_allowed, i)) != string::npos) {
00248 in.replace(i, 1, entity(in[i]));
00249 i++;
00250 }
00251
00252 return in;
00253 }
00254
00260 string
00261 xml2id(string in)
00262 {
00263 string::size_type i = 0;
00264
00265 while ((i = in.find(">", i)) != string::npos)
00266 in.replace(i, 4, ">");
00267
00268 i = 0;
00269 while ((i = in.find("<", i)) != string::npos)
00270 in.replace(i, 4, "<");
00271
00272 i = 0;
00273 while ((i = in.find("&", i)) != string::npos)
00274 in.replace(i, 5, "&");
00275
00276 i = 0;
00277 while ((i = in.find("'", i)) != string::npos)
00278 in.replace(i, 6, "'");
00279
00280 i = 0;
00281 while ((i = in.find(""", i)) != string::npos)
00282 in.replace(i, 6, "\"");
00283
00284 return in;
00285 }
00286
00292 string
00293 esc2underscore(string s)
00294 {
00295 string::size_type pos;
00296 while ((pos = s.find('%')) != string::npos)
00297 s.replace(pos, 3, "_");
00298
00299 return s;
00300 }
00301
00302
00306 string
00307 escattr(string s)
00308 {
00309 const string printable = " ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789~`!@#$%^&*()_-+={[}]|\\:;<,>.?/'\"";
00310 const string ESC = "\\";
00311 const string DOUBLE_ESC = ESC + ESC;
00312 const string QUOTE = "\"";
00313 const string ESCQUOTE = ESC + QUOTE;
00314
00315
00316 string::size_type ind = 0;
00317 while ((ind = s.find_first_not_of(printable, ind)) != s.npos)
00318 s.replace(ind, 1, ESC + octstring(s[ind]));
00319
00320
00321 ind = 0;
00322 while ((ind = s.find(ESC, ind)) != s.npos) {
00323 s.replace(ind, 1, DOUBLE_ESC);
00324 ind += DOUBLE_ESC.length();
00325 }
00326
00327
00328 ind = 0;
00329 while ((ind = s.find(QUOTE, ind)) != s.npos) {
00330 s.replace(ind, 1, ESCQUOTE);
00331 ind += ESCQUOTE.length();
00332 }
00333
00334 return s;
00335 }
00336
00345 string
00346 unescattr(string s)
00347 {
00348 Regex octal("\\\\[0-3][0-7][0-7]");
00349 Regex esc_quote("\\\\\"");
00350 Regex esc_esc("\\\\\\\\");
00351 const string ESC = "\\";
00352 const string QUOTE = "\"";
00353 int matchlen;
00354 unsigned int index;
00355
00356 DBG(cerr << "0XX" << s << "XXX" << endl);
00357
00358 index = 0;
00359 index = esc_esc.search(s.c_str(), s.length(), matchlen, 0);
00360 while (index < s.length()) {
00361 DBG(cerr << "1aXX" << s << "XXX index: " << index << endl);
00362 s.replace(index, 2, ESC);
00363 DBG(cerr << "1bXX" << s << "XXX index: " << index << endl);
00364 index = esc_esc.search(s.c_str(), s.length(), matchlen, 0);
00365 }
00366
00367
00368 index = esc_quote.search(s.c_str(), s.length(), matchlen, 0);
00369 while (index < s.length()) {
00370 s.replace(index, 2, QUOTE);
00371 DBG(cerr << "2XX" << s << "XXX index: " << index << endl);
00372 index = esc_quote.search(s.c_str(), s.length(), matchlen, 0);
00373 }
00374
00375
00376 index = octal.search(s.c_str(), s.length(), matchlen, 0);
00377 while (index < s.length()) {
00378 s.replace(index, 4, unoctstring(s.substr(index + 1, 3)));
00379 DBG(cerr << "3XX" << s << "XXX index: " << index << endl);
00380 index = octal.search(s.c_str(), s.length(), matchlen, 0);
00381 }
00382
00383 DBG(cerr << "4XX" << s << "XXX" << endl);
00384 return s;
00385 }
00386
00387 string
00388 munge_error_message(string msg)
00389 {
00390
00391 if (*msg.begin() != '"')
00392 msg.insert(msg.begin(), '"');
00393 if (*(msg.end() - 1) != '"')
00394 msg += "\"";
00395
00396
00397 string::iterator miter;
00398 for (miter = msg.begin() + 1; miter != msg.end() - 1; miter++)
00399 if (*miter == '"' && *(miter - 1) != '\\')
00400 miter = msg.insert(miter, '\\');
00401
00402 return msg;
00403 }
00404