00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #include "config.h"
00028
00029 static char rcsid[] not_used =
00030 { "$Id: HTTPConnect.cc 16759 2007-06-26 19:37:30Z jimg $"
00031 };
00032
00033 #include <stdio.h>
00034
00035 #ifdef WIN32
00036 #include <io.h>
00037 #else
00038 #include <unistd.h>
00039 #endif
00040
00041 #include <string>
00042 #include <vector>
00043 #include <functional>
00044 #include <algorithm>
00045 #include <sstream>
00046 #include <iterator>
00047
00048
00049
00050
00051 #include "debug.h"
00052 #include "GNURegex.h"
00053 #include "HTTPCache.h"
00054 #include "HTTPConnect.h"
00055 #include "RCReader.h"
00056 #include "HTTPResponse.h"
00057 #include "HTTPCacheResponse.h"
00058
00059 using namespace std;
00060
00061
00062
00063
00064
00065
00066
00067 int www_trace = 0;
00068
00069
00070 int dods_keep_temps = 0;
00071
00072 #define CLIENT_ERR_MIN 400
00073 #define CLIENT_ERR_MAX 417
00074 static char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] =
00075 {
00076 "Bad Request:",
00077 "Unauthorized: Contact the server administrator.",
00078 "Payment Required.",
00079 "Forbidden: Contact the server administrator.",
00080 "Not Found: The data source or server could not be found.\n\
00081 Often this means that the OPeNDAP server is missing or needs attention;\n\
00082 Please contact the server administrator.",
00083 "Method Not Allowed.",
00084 "Not Acceptable.",
00085 "Proxy Authentication Required.",
00086 "Request Time-out.",
00087 "Conflict.",
00088 "Gone:.",
00089 "Length Required.",
00090 "Precondition Failed.",
00091 "Request Entity Too Large.",
00092 "Request URI Too Large.",
00093 "Unsupported Media Type.",
00094 "Requested Range Not Satisfiable.",
00095 "Expectation Failed."
00096 };
00097
00098 #define SERVER_ERR_MIN 500
00099 #define SERVER_ERR_MAX 505
00100 static char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN +1] =
00101 {
00102 "Internal Server Error.",
00103 "Not Implemented.",
00104 "Bad Gateway.",
00105 "Service Unavailable.",
00106 "Gateway Time-out.",
00107 "HTTP Version Not Supported."
00108 };
00109
00112 static string
00113 http_status_to_string(int status)
00114 {
00115 if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
00116 return string(http_client_errors[status - CLIENT_ERR_MIN]);
00117 else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
00118 return string(http_server_errors[status - SERVER_ERR_MIN]);
00119 else
00120 return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
00121 }
00122
00127 class ParseHeader : public unary_function<const string &, void>
00128 {
00129 ObjectType type;
00130 string server;
00131 string protocol;
00132
00133 public:
00134 ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0")
00135 { }
00136
00137 void operator()(const string &header)
00138 {
00139 std::istringstream line(header);
00140
00141 string name;
00142 line >> name;
00143 downcase(name);
00144 if (name == "content-description:") {
00145 string value;
00146 line >> value;
00147 downcase(value);
00148 DBG2(cout << name << ": " << value << endl);
00149 type = get_type(value);
00150 }
00151
00152
00153
00154 else if (name == "xdods-server:" && server == "dods/0.0") {
00155 string value;
00156 line >> value;
00157 downcase(value);
00158 DBG2(cout << name << ": " << value << endl);
00159 server = value;
00160 }
00161 else if (name == "xopendap-server:") {
00162 string value;
00163 line >> value;
00164 downcase(value);
00165 DBG2(cout << name << ": " << value << endl);
00166 server = value;
00167 }
00168 else if (name == "xdap:") {
00169 string value;
00170 line >> value;
00171 downcase(value);
00172 DBG2(cout << name << ": " << value << endl);
00173 protocol = value;
00174 }
00175 else if (server == "dods/0.0" && name == "server:") {
00176 string value;
00177 line >> value;
00178 downcase(value);
00179 DBG2(cout << name << ": " << value << endl);
00180 server = value;
00181 }
00182 else if (type == unknown_type && name == "content-type:"
00183 && line.str().find("text/html") != string::npos) {
00184 DBG2(cout << name << ": text/html..." << endl);
00185 type = web_error;
00186 }
00187 }
00188
00189 ObjectType get_object_type()
00190 {
00191 return type;
00192 }
00193
00194 string get_server()
00195 {
00196 return server;
00197 }
00198
00199 string get_protocol()
00200 {
00201 return protocol;
00202 }
00203 };
00204
00221 static size_t
00222 save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs)
00223 {
00224 DBG2(cerr << "Inside the header parser." << endl);
00225 vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs);
00226
00227
00228 string complete_line;
00229 if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r')
00230 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
00231 else
00232 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
00233
00234
00235 if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
00236 DBG(cerr << "Header line: " << complete_line << endl);
00237 hdrs->push_back(complete_line);
00238 }
00239
00240 return size * nmemb;
00241 }
00242
00244 static int
00245 curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *)
00246 {
00247 string message(msg, size);
00248
00249 switch (info) {
00250 case CURLINFO_TEXT:
00251 cerr << "Text: " << message; break;
00252 case CURLINFO_HEADER_IN:
00253 cerr << "Header in: " << message; break;
00254 case CURLINFO_HEADER_OUT:
00255 cerr << "Header out: " << message; break;
00256 case CURLINFO_DATA_IN:
00257 cerr << "Data in: " << message; break;
00258 case CURLINFO_DATA_OUT:
00259 cerr << "Data out: " << message; break;
00260 case CURLINFO_END:
00261 cerr << "End: " << message; break;
00262 #ifdef CURLINFO_SSL_DATA_IN
00263 case CURLINFO_SSL_DATA_IN:
00264 cerr << "SSL Data in: " << message; break;
00265 #endif
00266 #ifdef CURLINFO_SSL_DATA_OUT
00267 case CURLINFO_SSL_DATA_OUT:
00268 cerr << "SSL Data out: " << message; break;
00269 #endif
00270 default:
00271 cerr << "Curl info: " << message; break;
00272 }
00273 return 0;
00274 }
00275
00279 void
00280 HTTPConnect::www_lib_init()
00281 {
00282 d_curl = curl_easy_init();
00283 if (!d_curl)
00284 throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl.");
00285
00286
00287
00288
00289
00290 if (!d_rcr->get_proxy_server_host().empty()) {
00291 DBG(cerr << "Setting up a proxy server." << endl);
00292 DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host()
00293 << endl);
00294 DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port()
00295 << endl);
00296 DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw()
00297 << endl);
00298 curl_easy_setopt(d_curl, CURLOPT_PROXY,
00299 d_rcr->get_proxy_server_host().c_str());
00300 curl_easy_setopt(d_curl, CURLOPT_PROXYPORT,
00301 d_rcr->get_proxy_server_port());
00302
00303 if (!d_rcr->get_proxy_server_userpw().empty())
00304 curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD,
00305 d_rcr->get_proxy_server_userpw().c_str());
00306 }
00307
00308 curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer);
00309
00310
00311 curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0);
00312
00313
00314
00315
00316 curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
00317
00318 curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1);
00319 curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1);
00320 curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
00321
00322
00323
00324
00325 if (!d_rcr->get_validate_ssl() == 0) {
00326 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0);
00327 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0);
00328 }
00329
00330 if (www_trace) {
00331 cerr << "Curl version: " << curl_version() << endl;
00332 curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1);
00333 curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug);
00334 }
00335 }
00336
00340 class BuildHeaders : public unary_function<const string &, void>
00341 {
00342 struct curl_slist *d_cl;
00343
00344 public:
00345 BuildHeaders() : d_cl(0)
00346 {}
00347
00348 void operator()(const string &header)
00349 {
00350 DBG(cerr << "Adding '" << header.c_str() << "' to the header list."
00351 << endl);
00352 d_cl = curl_slist_append(d_cl, header.c_str());
00353 }
00354
00355 struct curl_slist *get_headers()
00356 {
00357 return d_cl;
00358 }
00359 };
00360
00375 long
00376 HTTPConnect::read_url(const string &url, FILE *stream,
00377 vector<string> *resp_hdrs,
00378 const vector<string> *headers)
00379 {
00380 curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str());
00381
00382 #ifdef WIN32
00383
00384
00385
00386
00387
00388
00389
00390 curl_easy_setopt(d_curl, CURLOPT_FILE, stream);
00391 curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite);
00392 #else
00393 curl_easy_setopt(d_curl, CURLOPT_FILE, stream);
00394 #endif
00395
00396 DBG(copy(d_request_headers.begin(), d_request_headers.end(),
00397 ostream_iterator<string>(cerr, "\n")));
00398
00399 BuildHeaders req_hdrs;
00400 req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(),
00401 req_hdrs);
00402 if (headers)
00403 req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs);
00404 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
00405
00406 if (d_accept_deflate)
00407 curl_easy_setopt(d_curl, CURLOPT_ENCODING, "deflate");
00408
00409
00410 bool temporary_proxy = false;
00411 if ((temporary_proxy = url_uses_no_proxy_for(url))) {
00412 DBG(cerr << "Suppress proxy for url: " << url << endl);
00413 curl_easy_setopt(d_curl, CURLOPT_PROXY, 0);
00414 }
00415
00416 string::size_type at_sign = url.find('@');
00417
00418
00419
00420 if (at_sign != url.npos)
00421 d_upstring = url.substr(7, at_sign - 7);
00422
00423 if (!d_upstring.empty())
00424 curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str());
00425
00426
00427
00428
00429 curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs);
00430
00431 CURLcode res = curl_easy_perform(d_curl);
00432
00433
00434 curl_slist_free_all(req_hdrs.get_headers());
00435 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0);
00436
00437
00438 if (temporary_proxy && !d_rcr->get_proxy_server_host().empty())
00439 curl_easy_setopt(d_curl, CURLOPT_PROXY,
00440 d_rcr->get_proxy_server_host().c_str());
00441
00442 if (res != 0)
00443 throw Error(d_error_buffer);
00444
00445 long status;
00446 res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status);
00447 if (res != 0)
00448 throw Error(d_error_buffer);
00449
00450 return status;
00451 }
00452
00456 bool
00457 HTTPConnect::url_uses_proxy_for(const string &url) throw()
00458 {
00459 if (d_rcr->is_proxy_for_used()) {
00460 Regex host_regex(d_rcr->get_proxy_for_regexp().c_str());
00461 int index = 0, matchlen;
00462 return host_regex.search(url.c_str(), url.size(), matchlen, index)
00463 != -1;
00464 }
00465
00466 return false;
00467 }
00468
00472 bool
00473 HTTPConnect::url_uses_no_proxy_for(const string &url) throw()
00474 {
00475 return d_rcr->is_no_proxy_for_used()
00476 && url.find(d_rcr->get_no_proxy_for_host()) != string::npos;
00477 }
00478
00479
00480
00487 HTTPConnect::HTTPConnect(RCReader *rcr) throw(Error, InternalErr)
00488 : d_username(""), d_password("")
00489 {
00490 d_accept_deflate = rcr->get_deflate();
00491 d_rcr = rcr;
00492
00493
00494
00495
00496
00497 d_request_headers.push_back(string("Pragma:"));
00498 string user_agent = string("User-Agent: ") + string(CNAME)
00499 + string("/") + string(CVER);
00500 d_request_headers.push_back(user_agent);
00501 if (d_accept_deflate)
00502 d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
00503
00504
00505 if (d_rcr->get_use_cache())
00506 d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(),
00507 false);
00508 else
00509 d_http_cache = 0;
00510
00511 DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec
00512 << ")" << endl);
00513
00514 if (d_http_cache) {
00515 d_http_cache->set_cache_enabled(d_rcr->get_use_cache());
00516 d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0);
00517 d_http_cache->set_max_size(d_rcr->get_max_cache_size());
00518 d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj());
00519 d_http_cache->set_default_expiration(d_rcr->get_default_expires());
00520 d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0);
00521 }
00522
00523 www_lib_init();
00524 }
00525
00526 HTTPConnect::~HTTPConnect()
00527 {
00528 DBG2(cerr << "Entering the HTTPConnect dtor" << endl);
00529
00530 curl_easy_cleanup(d_curl);
00531
00532 DBG2(cerr << "Leaving the HTTPConnect dtor" << endl);
00533 }
00534
00547 HTTPResponse *
00548 HTTPConnect::fetch_url(const string &url)
00549 {
00550 HTTPResponse *stream;
00551
00552 if (d_http_cache && d_http_cache->is_cache_enabled()) {
00553 stream = caching_fetch_url(url);
00554 }
00555 else {
00556 stream = plain_fetch_url(url);
00557 }
00558
00559 ParseHeader parser;
00560
00561 parser = for_each(stream->get_headers()->begin(),
00562 stream->get_headers()->end(), ParseHeader());
00563
00564 stream->set_type(parser.get_object_type());
00565 stream->set_version(parser.get_server());
00566 stream->set_protocol(parser.get_protocol());
00567
00568 return stream;
00569 }
00570
00589 string
00590 get_temp_file(FILE *&stream) throw(InternalErr)
00591 {
00592
00593 char *dods_temp = get_tempfile_template("dodsXXXXXX");
00594
00595
00596 #if defined(WIN32) || defined(TEST_WIN32_TEMPS)
00597 stream = fopen(_mktemp(dods_temp), "w+b");
00598 #else
00599 stream = fdopen(mkstemp(dods_temp), "w+");
00600 #endif
00601
00602 if (!stream)
00603 throw InternalErr("I/O Error: Failed to open a temporary file for the data values.");
00604
00605 string dods_temp_s = dods_temp;
00606 delete[] dods_temp; dods_temp = 0;
00607
00608 return dods_temp_s;
00609 }
00610
00612 void
00613 close_temp(FILE *s, const string &name)
00614 {
00615 int res = fclose(s);
00616 if (res)
00617 DBG(cerr << "Failed to close " << (void *)s << endl);
00618
00619 unlink(name.c_str());
00620 }
00621
00643 HTTPResponse *
00644 HTTPConnect::caching_fetch_url(const string &url)
00645 {
00646 DBG(cerr << "Is this URL (" << url << ") in the cache?... ");
00647
00648 if (d_http_cache->is_url_in_cache(url)) {
00649 DBGN(cerr << "yes... ");
00650
00651 if (d_http_cache->is_url_valid(url)) {
00652 DBGN(cerr << "and it's valid; using cached response." << endl);
00653
00654 vector<string> *headers = new vector<string>;;
00655 FILE *s = d_http_cache->get_cached_response(url, *headers);
00656 HTTPCacheResponse *crs = new HTTPCacheResponse(s, headers, d_http_cache);
00657
00658 return crs;
00659 }
00660 else {
00661 DBGN(cerr << "but it's not valid; validating... ");
00662
00663
00664 vector<string> *resp_hdrs = new vector<string>;
00665 vector<string> cond_hdrs
00666 = d_http_cache->get_conditional_request_headers(url);
00667 FILE *body = 0;
00668 string dods_temp = get_temp_file(body);
00669 time_t now = time(0);
00670 long http_status;
00671
00672 try {
00673 http_status = read_url(url, body, resp_hdrs, &cond_hdrs);
00674 rewind(body);
00675 }
00676 catch (Error &e) {
00677 close_temp(body, dods_temp);
00678 throw;
00679 }
00680
00681 switch (http_status) {
00682 case 200: {
00683 DBGN(cerr << "read a new response; caching." << endl);
00684
00685 d_http_cache->cache_response(url, now, *resp_hdrs, body);
00686 HTTPResponse *rs = new HTTPResponse(body, resp_hdrs,
00687 dods_temp);
00688
00689 return rs;
00690 }
00691 break;
00692
00693 case 304: {
00694 DBGN(cerr << "cached response valid; updating." << endl);
00695
00696 close_temp(body, dods_temp);
00697 d_http_cache->update_response(url, now, *resp_hdrs);
00698
00699 vector<string> *headers = new vector<string>;;
00700 FILE *s = d_http_cache->get_cached_response(url, *headers);
00701 HTTPCacheResponse *crs
00702 = new HTTPCacheResponse(s, headers, d_http_cache);
00703 return crs;
00704 }
00705 break;
00706
00707 default: {
00708 close_temp(body, dods_temp);
00709 if (http_status >= 400) {
00710 string msg = "Error while reading the URL: ";
00711 msg += url;
00712 msg += ".\nThe OPeNDAP server returned the following message:\n";
00713 msg += http_status_to_string(http_status);
00714 throw Error(msg);
00715 }
00716 else
00717 throw InternalErr(__FILE__, __LINE__,
00718 "Bad response from the HTTP server: " + long_to_string(http_status));
00719 }
00720 break;
00721 }
00722 }
00723 }
00724 else {
00725 DBGN(cerr << "no; getting response and caching." << endl);
00726 time_t now = time(0);
00727 HTTPResponse *rs = plain_fetch_url(url);
00728 d_http_cache->cache_response(url, now, *(rs->get_headers()),
00729 rs->get_stream());
00730
00731 return rs;
00732 }
00733
00734 throw InternalErr(__FILE__, __LINE__, "Unexpected cache response.");
00735 }
00736
00737
00749 HTTPResponse *
00750 HTTPConnect::plain_fetch_url(const string &url)
00751 {
00752 DBG(cerr << "Getting URL: " << url << endl);
00753 FILE *stream = 0;
00754 string dods_temp = get_temp_file(stream);
00755 vector<string> *resp_hdrs = new vector<string>;
00756
00757 try {
00758 int status = read_url(url, stream, resp_hdrs);
00759 if (status >= 400) {
00760 string msg = "Error while reading the URL: ";
00761 msg += url;
00762 msg += ".\nThe OPeNDAP server returned the following message:\n";
00763 msg += http_status_to_string(status);
00764 throw Error(msg);
00765 }
00766 }
00767
00768 catch (Error &e) {
00769 close_temp(stream, dods_temp);
00770 throw e;
00771 }
00772
00773 rewind(stream);
00774
00775 return new HTTPResponse(stream, resp_hdrs, dods_temp);
00776 }
00777
00789 void
00790 HTTPConnect::set_accept_deflate(bool deflate)
00791 {
00792 d_accept_deflate = deflate;
00793
00794 if (d_accept_deflate) {
00795 if (find(d_request_headers.begin(), d_request_headers.end(),
00796 "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end())
00797 d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
00798 DBG(copy(d_request_headers.begin(), d_request_headers.end(),
00799 ostream_iterator<string>(cerr, "\n")));
00800 }
00801 else {
00802 vector<string>::iterator i;
00803 i = remove_if(d_request_headers.begin(), d_request_headers.end(),
00804 bind2nd(equal_to<string>(),
00805 string("Accept-Encoding: deflate, gzip, compress")));
00806 d_request_headers.erase(i, d_request_headers.end());
00807 }
00808 }
00809
00825 void
00826 HTTPConnect::set_credentials(const string &u, const string &p)
00827 {
00828 if (u.empty())
00829 return;
00830
00831
00832 d_username = u;
00833 d_password = p;
00834
00835 d_upstring = u + ":" + p;
00836 }