00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #include "config.h"
00028
00029 static char rcsid[] not_used =
00030 { "$Id: HTTPConnect.cc 16943 2007-08-15 20:15:39Z jimg $"
00031 };
00032
00033 #include <stdio.h>
00034
00035 #ifdef WIN32
00036 #include <io.h>
00037 #else
00038 #include <unistd.h>
00039 #endif
00040
00041 #include <string>
00042 #include <vector>
00043 #include <functional>
00044 #include <algorithm>
00045 #include <sstream>
00046 #include <iterator>
00047
00048
00049
00050
00051 #include "debug.h"
00052 #include "GNURegex.h"
00053 #include "HTTPCache.h"
00054 #include "HTTPConnect.h"
00055 #include "RCReader.h"
00056 #include "HTTPResponse.h"
00057 #include "HTTPCacheResponse.h"
00058
00059 using namespace std;
00060
00061
00062
00063
00064
00065
00066
00067 int www_trace = 0;
00068
00069
00070 int dods_keep_temps = 0;
00071
00072 #define CLIENT_ERR_MIN 400
00073 #define CLIENT_ERR_MAX 417
00074 static char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] =
00075 {
00076 "Bad Request:",
00077 "Unauthorized: Contact the server administrator.",
00078 "Payment Required.",
00079 "Forbidden: Contact the server administrator.",
00080 "Not Found: The data source or server could not be found.\n\
00081 Often this means that the OPeNDAP server is missing or needs attention;\n\
00082 Please contact the server administrator.",
00083 "Method Not Allowed.",
00084 "Not Acceptable.",
00085 "Proxy Authentication Required.",
00086 "Request Time-out.",
00087 "Conflict.",
00088 "Gone:.",
00089 "Length Required.",
00090 "Precondition Failed.",
00091 "Request Entity Too Large.",
00092 "Request URI Too Large.",
00093 "Unsupported Media Type.",
00094 "Requested Range Not Satisfiable.",
00095 "Expectation Failed."
00096 };
00097
00098 #define SERVER_ERR_MIN 500
00099 #define SERVER_ERR_MAX 505
00100 static char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN +1] =
00101 {
00102 "Internal Server Error.",
00103 "Not Implemented.",
00104 "Bad Gateway.",
00105 "Service Unavailable.",
00106 "Gateway Time-out.",
00107 "HTTP Version Not Supported."
00108 };
00109
00112 static string
00113 http_status_to_string(int status)
00114 {
00115 if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
00116 return string(http_client_errors[status - CLIENT_ERR_MIN]);
00117 else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
00118 return string(http_server_errors[status - SERVER_ERR_MIN]);
00119 else
00120 return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
00121 }
00122
00127 class ParseHeader : public unary_function<const string &, void>
00128 {
00129 ObjectType type;
00130 string server;
00131 string protocol;
00132 string location;
00133
00134 public:
00135 ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0")
00136 { }
00137
00138 void operator()(const string &header)
00139 {
00140 std::istringstream line(header);
00141
00142 string name;
00143 line >> name;
00144 downcase(name);
00145 if (name == "content-description:") {
00146 string value;
00147 line >> value;
00148 downcase(value);
00149 DBG2(cout << name << ": " << value << endl);
00150 type = get_type(value);
00151 }
00152
00153
00154
00155 else if (name == "xdods-server:" && server == "dods/0.0") {
00156 string value;
00157 line >> value;
00158 downcase(value);
00159 DBG2(cout << name << ": " << value << endl);
00160 server = value;
00161 }
00162 else if (name == "xopendap-server:") {
00163 string value;
00164 line >> value;
00165 downcase(value);
00166 DBG2(cout << name << ": " << value << endl);
00167 server = value;
00168 }
00169 else if (name == "xdap:") {
00170 string value;
00171 line >> value;
00172 downcase(value);
00173 DBG2(cout << name << ": " << value << endl);
00174 protocol = value;
00175 }
00176 else if (server == "dods/0.0" && name == "server:") {
00177 string value;
00178 line >> value;
00179 downcase(value);
00180 DBG2(cout << name << ": " << value << endl);
00181 server = value;
00182 }
00183 else if (name == "location:") {
00184 string value;
00185 line >> value;
00186 DBG2(cout << name << ": " << value << endl);
00187 location = value;
00188 }
00189 else if (type == unknown_type && name == "content-type:"
00190 && line.str().find("text/html") != string::npos) {
00191 DBG2(cout << name << ": text/html..." << endl);
00192 type = web_error;
00193 }
00194 }
00195
00196 ObjectType get_object_type()
00197 {
00198 return type;
00199 }
00200
00201 string get_server()
00202 {
00203 return server;
00204 }
00205
00206 string get_protocol()
00207 {
00208 return protocol;
00209 }
00210
00211 string get_location() {
00212 return location;
00213 }
00214 };
00215
00232 static size_t
00233 save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs)
00234 {
00235 DBG2(cerr << "Inside the header parser." << endl);
00236 vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs);
00237
00238
00239 string complete_line;
00240 if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r')
00241 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
00242 else
00243 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
00244
00245
00246 if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
00247 DBG(cerr << "Header line: " << complete_line << endl);
00248 hdrs->push_back(complete_line);
00249 }
00250
00251 return size * nmemb;
00252 }
00253
00255 static int
00256 curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *)
00257 {
00258 string message(msg, size);
00259
00260 switch (info) {
00261 case CURLINFO_TEXT:
00262 cerr << "Text: " << message; break;
00263 case CURLINFO_HEADER_IN:
00264 cerr << "Header in: " << message; break;
00265 case CURLINFO_HEADER_OUT:
00266 cerr << "Header out: " << message; break;
00267 case CURLINFO_DATA_IN:
00268 cerr << "Data in: " << message; break;
00269 case CURLINFO_DATA_OUT:
00270 cerr << "Data out: " << message; break;
00271 case CURLINFO_END:
00272 cerr << "End: " << message; break;
00273 #ifdef CURLINFO_SSL_DATA_IN
00274 case CURLINFO_SSL_DATA_IN:
00275 cerr << "SSL Data in: " << message; break;
00276 #endif
00277 #ifdef CURLINFO_SSL_DATA_OUT
00278 case CURLINFO_SSL_DATA_OUT:
00279 cerr << "SSL Data out: " << message; break;
00280 #endif
00281 default:
00282 cerr << "Curl info: " << message; break;
00283 }
00284 return 0;
00285 }
00286
00290 void
00291 HTTPConnect::www_lib_init()
00292 {
00293 d_curl = curl_easy_init();
00294 if (!d_curl)
00295 throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl.");
00296
00297
00298
00299
00300
00301 if (!d_rcr->get_proxy_server_host().empty()) {
00302 DBG(cerr << "Setting up a proxy server." << endl);
00303 DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host()
00304 << endl);
00305 DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port()
00306 << endl);
00307 DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw()
00308 << endl);
00309 curl_easy_setopt(d_curl, CURLOPT_PROXY,
00310 d_rcr->get_proxy_server_host().c_str());
00311 curl_easy_setopt(d_curl, CURLOPT_PROXYPORT,
00312 d_rcr->get_proxy_server_port());
00313
00314 if (!d_rcr->get_proxy_server_userpw().empty())
00315 curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD,
00316 d_rcr->get_proxy_server_userpw().c_str());
00317 }
00318
00319 curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer);
00320
00321
00322 curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0);
00323
00324
00325
00326
00327 curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
00328
00329 curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1);
00330 curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1);
00331 curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
00332
00333
00334
00335
00336 if (!d_rcr->get_validate_ssl() == 0) {
00337 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0);
00338 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0);
00339 }
00340
00341 if (www_trace) {
00342 cerr << "Curl version: " << curl_version() << endl;
00343 curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1);
00344 curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug);
00345 }
00346 }
00347
00351 class BuildHeaders : public unary_function<const string &, void>
00352 {
00353 struct curl_slist *d_cl;
00354
00355 public:
00356 BuildHeaders() : d_cl(0)
00357 {}
00358
00359 void operator()(const string &header)
00360 {
00361 DBG(cerr << "Adding '" << header.c_str() << "' to the header list."
00362 << endl);
00363 d_cl = curl_slist_append(d_cl, header.c_str());
00364 }
00365
00366 struct curl_slist *get_headers()
00367 {
00368 return d_cl;
00369 }
00370 };
00371
00386 long
00387 HTTPConnect::read_url(const string &url, FILE *stream,
00388 vector<string> *resp_hdrs,
00389 const vector<string> *headers)
00390 {
00391 curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str());
00392
00393 #ifdef WIN32
00394
00395
00396
00397
00398
00399
00400
00401 curl_easy_setopt(d_curl, CURLOPT_FILE, stream);
00402 curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite);
00403 #else
00404 curl_easy_setopt(d_curl, CURLOPT_FILE, stream);
00405 #endif
00406
00407 DBG(copy(d_request_headers.begin(), d_request_headers.end(),
00408 ostream_iterator<string>(cerr, "\n")));
00409
00410 BuildHeaders req_hdrs;
00411 req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(),
00412 req_hdrs);
00413 if (headers)
00414 req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs);
00415 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
00416
00417 if (d_accept_deflate)
00418 curl_easy_setopt(d_curl, CURLOPT_ENCODING, "deflate");
00419
00420
00421 bool temporary_proxy = false;
00422 if ((temporary_proxy = url_uses_no_proxy_for(url))) {
00423 DBG(cerr << "Suppress proxy for url: " << url << endl);
00424 curl_easy_setopt(d_curl, CURLOPT_PROXY, 0);
00425 }
00426
00427 string::size_type at_sign = url.find('@');
00428
00429
00430
00431 if (at_sign != url.npos)
00432 d_upstring = url.substr(7, at_sign - 7);
00433
00434 if (!d_upstring.empty())
00435 curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str());
00436
00437
00438
00439
00440 curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs);
00441
00442 CURLcode res = curl_easy_perform(d_curl);
00443
00444
00445 curl_slist_free_all(req_hdrs.get_headers());
00446 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0);
00447
00448
00449 if (temporary_proxy && !d_rcr->get_proxy_server_host().empty())
00450 curl_easy_setopt(d_curl, CURLOPT_PROXY,
00451 d_rcr->get_proxy_server_host().c_str());
00452
00453 if (res != 0)
00454 throw Error(d_error_buffer);
00455
00456 long status;
00457 res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status);
00458 if (res != 0)
00459 throw Error(d_error_buffer);
00460
00461 return status;
00462 }
00463
00467 bool
00468 HTTPConnect::url_uses_proxy_for(const string &url) throw()
00469 {
00470 if (d_rcr->is_proxy_for_used()) {
00471 Regex host_regex(d_rcr->get_proxy_for_regexp().c_str());
00472 int index = 0, matchlen;
00473 return host_regex.search(url.c_str(), url.size(), matchlen, index)
00474 != -1;
00475 }
00476
00477 return false;
00478 }
00479
00483 bool
00484 HTTPConnect::url_uses_no_proxy_for(const string &url) throw()
00485 {
00486 return d_rcr->is_no_proxy_for_used()
00487 && url.find(d_rcr->get_no_proxy_for_host()) != string::npos;
00488 }
00489
00490
00491
00498 HTTPConnect::HTTPConnect(RCReader *rcr) throw(Error, InternalErr)
00499 : d_username(""), d_password("")
00500 {
00501 d_accept_deflate = rcr->get_deflate();
00502 d_rcr = rcr;
00503
00504
00505
00506
00507
00508 d_request_headers.push_back(string("Pragma:"));
00509 string user_agent = string("User-Agent: ") + string(CNAME)
00510 + string("/") + string(CVER);
00511 d_request_headers.push_back(user_agent);
00512 if (d_accept_deflate)
00513 d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
00514
00515
00516 if (d_rcr->get_use_cache())
00517 d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(),
00518 false);
00519 else
00520 d_http_cache = 0;
00521
00522 DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec
00523 << ")" << endl);
00524
00525 if (d_http_cache) {
00526 d_http_cache->set_cache_enabled(d_rcr->get_use_cache());
00527 d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0);
00528 d_http_cache->set_max_size(d_rcr->get_max_cache_size());
00529 d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj());
00530 d_http_cache->set_default_expiration(d_rcr->get_default_expires());
00531 d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0);
00532 }
00533
00534 www_lib_init();
00535 }
00536
00537 HTTPConnect::~HTTPConnect()
00538 {
00539 DBG2(cerr << "Entering the HTTPConnect dtor" << endl);
00540
00541 curl_easy_cleanup(d_curl);
00542
00543 DBG2(cerr << "Leaving the HTTPConnect dtor" << endl);
00544 }
00545
00558 HTTPResponse *
00559 HTTPConnect::fetch_url(const string &url)
00560 {
00561 #ifdef HTTP_TRACE
00562 cout << "GET " << url << " HTTP/1.0" << endl;
00563 #endif
00564
00565 HTTPResponse *stream;
00566
00567 if (d_http_cache && d_http_cache->is_cache_enabled()) {
00568 stream = caching_fetch_url(url);
00569 }
00570 else {
00571 stream = plain_fetch_url(url);
00572 }
00573
00574 #ifdef HTTP_TRACE
00575 stringstream ss;
00576 ss << "HTTP/1.0 " << stream->get_status() << " -" << endl;
00577 for (size_t i = 0; i < stream->get_headers()->size(); i++) {
00578 ss << stream->get_headers()->at(i) << endl;
00579 }
00580 cout << ss.str();
00581 #endif
00582
00583 ParseHeader parser;
00584
00585 parser = for_each(stream->get_headers()->begin(),
00586 stream->get_headers()->end(), ParseHeader());
00587
00588 #ifdef HTTP_TRACE
00589 cout << endl << endl;
00590 #endif
00591
00592
00593 if (parser.get_location() != "" &&
00594 url.substr(0,url.find("?",0)).compare(parser.get_location().substr(0,url.find("?",0))) != 0) {
00595 return fetch_url(parser.get_location());
00596 }
00597
00598 stream->set_type(parser.get_object_type());
00599 stream->set_version(parser.get_server());
00600 stream->set_protocol(parser.get_protocol());
00601
00602 return stream;
00603 }
00604
00623 string
00624 get_temp_file(FILE *&stream) throw(InternalErr)
00625 {
00626
00627 char *dods_temp = get_tempfile_template("dodsXXXXXX");
00628
00629
00630 #if defined(WIN32) || defined(TEST_WIN32_TEMPS)
00631 stream = fopen(_mktemp(dods_temp), "w+b");
00632 #else
00633 stream = fdopen(mkstemp(dods_temp), "w+");
00634 #endif
00635
00636 if (!stream)
00637 throw InternalErr("I/O Error: Failed to open a temporary file for the data values.");
00638
00639 string dods_temp_s = dods_temp;
00640 delete[] dods_temp; dods_temp = 0;
00641
00642 return dods_temp_s;
00643 }
00644
00646 void
00647 close_temp(FILE *s, const string &name)
00648 {
00649 int res = fclose(s);
00650 if (res)
00651 DBG(cerr << "Failed to close " << (void *)s << endl);
00652
00653 unlink(name.c_str());
00654 }
00655
00677 HTTPResponse *
00678 HTTPConnect::caching_fetch_url(const string &url)
00679 {
00680 DBG(cerr << "Is this URL (" << url << ") in the cache?... ");
00681
00682 if (d_http_cache->is_url_in_cache(url)) {
00683 DBGN(cerr << "yes... ");
00684
00685 if (d_http_cache->is_url_valid(url)) {
00686 DBGN(cerr << "and it's valid; using cached response." << endl);
00687
00688 vector<string> *headers = new vector<string>;;
00689 FILE *s = d_http_cache->get_cached_response(url, *headers);
00690 HTTPCacheResponse *crs = new HTTPCacheResponse(s, 200, headers, d_http_cache);
00691
00692 return crs;
00693 }
00694 else {
00695 DBGN(cerr << "but it's not valid; validating... ");
00696
00697
00698 vector<string> *resp_hdrs = new vector<string>;
00699 vector<string> cond_hdrs
00700 = d_http_cache->get_conditional_request_headers(url);
00701 FILE *body = 0;
00702 string dods_temp = get_temp_file(body);
00703 time_t now = time(0);
00704 long http_status;
00705
00706 try {
00707 http_status = read_url(url, body, resp_hdrs, &cond_hdrs);
00708 rewind(body);
00709 }
00710 catch (Error &e) {
00711 close_temp(body, dods_temp);
00712 throw;
00713 }
00714
00715 switch (http_status) {
00716 case 200: {
00717 DBGN(cerr << "read a new response; caching." << endl);
00718
00719 d_http_cache->cache_response(url, now, *resp_hdrs, body);
00720 HTTPResponse *rs = new HTTPResponse(body, http_status, resp_hdrs,
00721 dods_temp);
00722
00723 return rs;
00724 }
00725 break;
00726
00727 case 304: {
00728 DBGN(cerr << "cached response valid; updating." << endl);
00729
00730 close_temp(body, dods_temp);
00731 d_http_cache->update_response(url, now, *resp_hdrs);
00732
00733 vector<string> *headers = new vector<string>;;
00734 FILE *s = d_http_cache->get_cached_response(url, *headers);
00735 HTTPCacheResponse *crs
00736 = new HTTPCacheResponse(s, 304, headers, d_http_cache);
00737 return crs;
00738 }
00739 break;
00740
00741 default: {
00742 close_temp(body, dods_temp);
00743 if (http_status >= 400) {
00744 string msg = "Error while reading the URL: ";
00745 msg += url;
00746 msg += ".\nThe OPeNDAP server returned the following message:\n";
00747 msg += http_status_to_string(http_status);
00748 throw Error(msg);
00749 }
00750 else
00751 throw InternalErr(__FILE__, __LINE__,
00752 "Bad response from the HTTP server: " + long_to_string(http_status));
00753 }
00754 break;
00755 }
00756 }
00757 }
00758 else {
00759 DBGN(cerr << "no; getting response and caching." << endl);
00760 time_t now = time(0);
00761 HTTPResponse *rs = plain_fetch_url(url);
00762 d_http_cache->cache_response(url, now, *(rs->get_headers()),
00763 rs->get_stream());
00764
00765 return rs;
00766 }
00767
00768 throw InternalErr(__FILE__, __LINE__, "Unexpected cache response.");
00769 }
00770
00771
00783 HTTPResponse *
00784 HTTPConnect::plain_fetch_url(const string &url)
00785 {
00786 DBG(cerr << "Getting URL: " << url << endl);
00787 FILE *stream = 0;
00788 string dods_temp = get_temp_file(stream);
00789 vector<string> *resp_hdrs = new vector<string>;
00790
00791 int status = -1;
00792 try {
00793 status = read_url(url, stream, resp_hdrs);
00794 if (status >= 400) {
00795 string msg = "Error while reading the URL: ";
00796 msg += url;
00797 msg += ".\nThe OPeNDAP server returned the following message:\n";
00798 msg += http_status_to_string(status);
00799 throw Error(msg);
00800 }
00801 }
00802
00803 catch (Error &e) {
00804 close_temp(stream, dods_temp);
00805 throw e;
00806 }
00807
00808 rewind(stream);
00809
00810 return new HTTPResponse(stream, status, resp_hdrs, dods_temp);
00811 }
00812
00824 void
00825 HTTPConnect::set_accept_deflate(bool deflate)
00826 {
00827 d_accept_deflate = deflate;
00828
00829 if (d_accept_deflate) {
00830 if (find(d_request_headers.begin(), d_request_headers.end(),
00831 "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end())
00832 d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
00833 DBG(copy(d_request_headers.begin(), d_request_headers.end(),
00834 ostream_iterator<string>(cerr, "\n")));
00835 }
00836 else {
00837 vector<string>::iterator i;
00838 i = remove_if(d_request_headers.begin(), d_request_headers.end(),
00839 bind2nd(equal_to<string>(),
00840 string("Accept-Encoding: deflate, gzip, compress")));
00841 d_request_headers.erase(i, d_request_headers.end());
00842 }
00843 }
00844
00860 void
00861 HTTPConnect::set_credentials(const string &u, const string &p)
00862 {
00863 if (u.empty())
00864 return;
00865
00866
00867 d_username = u;
00868 d_password = p;
00869
00870 d_upstring = u + ":" + p;
00871 }