00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059 #include <ctype.h>
00060
00061 #include <iomanip>
00062 #include <string>
00063 #include <sstream>
00064
00065 #include "GNURegex.h"
00066 #include "Error.h"
00067 #include "InternalErr.h"
00068
00069 #include "debug.h"
00070
00071 using namespace std;
00072
00073 namespace libdap {
00074
00075
00076
00077
00078
00079 string
00080 hexstring(unsigned char val)
00081 {
00082 ostringstream buf;
00083 buf << hex << setw(2) << setfill('0')
00084 << static_cast<unsigned int>(val);
00085
00086 return buf.str();
00087 }
00088
00089 string
00090 unhexstring(string s)
00091 {
00092 int val;
00093 istringstream ss(s);
00094 ss >> hex >> val;
00095 char tmp_str[2];
00096 tmp_str[0] = static_cast<char>(val);
00097 tmp_str[1] = '\0';
00098 return string(tmp_str);
00099 }
00100
00101 string
00102 octstring(unsigned char val)
00103 {
00104 ostringstream buf;
00105 buf << oct << setw(3) << setfill('0')
00106 << static_cast<unsigned int>(val);
00107
00108 return buf.str();
00109 }
00110
00111 string
00112 unoctstring(string s)
00113 {
00114 int val;
00115
00116 istringstream ss(s);
00117 ss >> oct >> val;
00118
00119 DBG(cerr << "unoctstring: " << val << endl);
00120
00121 char tmp_str[2];
00122 tmp_str[0] = static_cast<char>(val);
00123 tmp_str[1] = '\0';
00124 return string(tmp_str);
00125 }
00126
00151 string
00152 id2www(string in, const string &allowable)
00153 {
00154 string::size_type i = 0;
00155
00156 while ((i = in.find_first_not_of(allowable, i)) != string::npos) {
00157 in.replace(i, 1, "%" + hexstring(in[i]));
00158 i++;
00159 }
00160
00161 return in;
00162 }
00163
00174 string
00175 id2www_ce(string in, const string &allowable)
00176 {
00177 return id2www(in, allowable);
00178 }
00179
00212 string
00213 www2id(const string &in, const string &escape, const string &except)
00214 {
00215 string::size_type i = 0;
00216 string res = in;
00217 while ((i = res.find_first_of(escape, i)) != string::npos) {
00218 if (except.find(res.substr(i, 3)) != string::npos) {
00219 i += 3;
00220 continue;
00221 }
00222 res.replace(i, 3, unhexstring(res.substr(i + 1, 2)));
00223 }
00224
00225 return res;
00226 }
00227
00228 static string
00229 entity(char c)
00230 {
00231 switch (c) {
00232 case '>': return ">";
00233 case '<': return "<";
00234 case '&': return "&";
00235 case '\'': return "'";
00236 case '\"': return """;
00237 default:
00238 throw InternalErr(__FILE__, __LINE__, "Unrecognized character.");
00239 }
00240 }
00241
00242
00243
00244 string
00245 octal_to_hex(const string &octal_digits)
00246 {
00247 int val;
00248
00249 istringstream ss(octal_digits);
00250 ss >> oct >> val;
00251
00252 ostringstream ds;
00253 ds << hex << setw(2) << setfill('0') << val;
00254 return ds.str();
00255 }
00256
00263 string
00264 id2xml(string in, const string ¬_allowed)
00265 {
00266 string::size_type i = 0;
00267
00268 while ((i = in.find_first_of(not_allowed, i)) != string::npos) {
00269 in.replace(i, 1, entity(in[i]));
00270 ++i;
00271 }
00272 #if 0
00273
00274
00275
00276
00277
00278
00279
00280
00281
00282
00283
00284 string octal_escape = "\\\\";
00285 i = 0;
00286 string::size_type length = in.length();
00287 while ((i = in.find(octal_escape, i)) != string::npos) {
00288
00289 string::size_type j = i + 2;
00290 if (j + 1 >= length)
00291 break;
00292 string octal_digits = in.substr(j, 3);
00293
00294 string hex_escape = string("&#x");
00295 hex_escape.append(octal_to_hex(octal_digits));
00296 hex_escape.append(string(";"));
00297
00298
00299 in.replace(i, 5, hex_escape);
00300
00301
00302 i += 6;
00303 }
00304 #endif
00305 return in;
00306 }
00307
00313 string
00314 xml2id(string in)
00315 {
00316 string::size_type i = 0;
00317
00318 while ((i = in.find(">", i)) != string::npos)
00319 in.replace(i, 4, ">");
00320
00321 i = 0;
00322 while ((i = in.find("<", i)) != string::npos)
00323 in.replace(i, 4, "<");
00324
00325 i = 0;
00326 while ((i = in.find("&", i)) != string::npos)
00327 in.replace(i, 5, "&");
00328
00329 i = 0;
00330 while ((i = in.find("'", i)) != string::npos)
00331 in.replace(i, 6, "'");
00332
00333 i = 0;
00334 while ((i = in.find(""", i)) != string::npos)
00335 in.replace(i, 6, "\"");
00336
00337 return in;
00338 }
00339
00345 string
00346 esc2underscore(string s)
00347 {
00348 string::size_type pos;
00349 while ((pos = s.find('%')) != string::npos)
00350 s.replace(pos, 3, "_");
00351
00352 return s;
00353 }
00354
00355
00359 string
00360 escattr(string s)
00361 {
00362 const string printable = " ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789~`!@#$%^&*()_-+={[}]|\\:;<,>.?/'\"";
00363 const string ESC = "\\";
00364 const string DOUBLE_ESC = ESC + ESC;
00365 const string QUOTE = "\"";
00366 const string ESCQUOTE = ESC + QUOTE;
00367
00368
00369 string::size_type ind = 0;
00370 while ((ind = s.find_first_not_of(printable, ind)) != s.npos)
00371 s.replace(ind, 1, ESC + octstring(s[ind]));
00372
00373
00374 ind = 0;
00375 while ((ind = s.find(ESC, ind)) != s.npos) {
00376 s.replace(ind, 1, DOUBLE_ESC);
00377 ind += DOUBLE_ESC.length();
00378 }
00379
00380
00381 ind = 0;
00382 while ((ind = s.find(QUOTE, ind)) != s.npos) {
00383 s.replace(ind, 1, ESCQUOTE);
00384 ind += ESCQUOTE.length();
00385 }
00386
00387 return s;
00388 }
00389
00398 string
00399 unescattr(string s)
00400 {
00401 Regex octal("\\\\[0-3][0-7][0-7]");
00402 Regex esc_quote("\\\\\"");
00403 Regex esc_esc("\\\\\\\\");
00404 const string ESC = "\\";
00405 const string QUOTE = "\"";
00406 int matchlen;
00407 unsigned int index;
00408
00409 DBG(cerr << "0XX" << s << "XXX" << endl);
00410
00411 index = esc_esc.search(s.c_str(), s.length(), matchlen, 0);
00412 while (index < s.length()) {
00413 DBG(cerr << "1aXX" << s << "XXX index: " << index << endl);
00414 s.replace(index, 2, ESC);
00415 DBG(cerr << "1bXX" << s << "XXX index: " << index << endl);
00416 index = esc_esc.search(s.c_str(), s.length(), matchlen, 0);
00417 }
00418
00419
00420 index = esc_quote.search(s.c_str(), s.length(), matchlen, 0);
00421 while (index < s.length()) {
00422 s.replace(index, 2, QUOTE);
00423 DBG(cerr << "2XX" << s << "XXX index: " << index << endl);
00424 index = esc_quote.search(s.c_str(), s.length(), matchlen, 0);
00425 }
00426
00427
00428 index = octal.search(s.c_str(), s.length(), matchlen, 0);
00429 while (index < s.length()) {
00430 s.replace(index, 4, unoctstring(s.substr(index + 1, 3)));
00431 DBG(cerr << "3XX" << s << "XXX index: " << index << endl);
00432 index = octal.search(s.c_str(), s.length(), matchlen, 0);
00433 }
00434
00435 DBG(cerr << "4XX" << s << "XXX" << endl);
00436 return s;
00437 }
00438
00439 string
00440 munge_error_message(string msg)
00441 {
00442
00443 if (*msg.begin() != '"')
00444 msg.insert(msg.begin(), '"');
00445 if (*(msg.end() - 1) != '"')
00446 msg += "\"";
00447
00448
00449 string::iterator miter;
00450 for (miter = msg.begin() + 1; miter != msg.end() - 1; miter++)
00451 if (*miter == '"' && *(miter - 1) != '\\')
00452 miter = msg.insert(miter, '\\');
00453
00454 return msg;
00455 }
00456
00461 string
00462 escape_double_quotes(string source)
00463 {
00464 string::size_type idx = 0;
00465 while((idx = source.find('\"', idx)) != string::npos) {
00466 source.replace(idx, 1, "\\\"");
00467 idx += 2;
00468 }
00469
00470 return source;
00471 }
00472
00478 string
00479 unescape_double_quotes(string source)
00480 {
00481 string::size_type idx = 0;
00482 while((idx = source.find("\\\"", idx)) != string::npos) {
00483 source.replace(idx, 2, "\"");
00484 ++idx;
00485 }
00486
00487 return source;
00488 }
00489
00490 }
00491