HTTPConnect.cc

Go to the documentation of this file.
00001 
00002 // -*- mode: c++; c-basic-offset:4 -*-
00003 
00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
00005 // Access Protocol.
00006 
00007 // Copyright (c) 2002,2003 OPeNDAP, Inc.
00008 // Author: James Gallagher <jgallagher@opendap.org>
00009 //
00010 // This library is free software; you can redistribute it and/or
00011 // modify it under the terms of the GNU Lesser General Public
00012 // License as published by the Free Software Foundation; either
00013 // version 2.1 of the License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful,
00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00023 //
00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
00025 
00026 
00027 #include "config.h"
00028 
00029 static char rcsid[] not_used =
00030     { "$Id: HTTPConnect.cc 18319 2008-03-03 21:42:36Z jimg $"
00031     };
00032 
00033 #ifdef HAVE_UNISTD_H
00034 #include <unistd.h>
00035 #endif
00036 
00037 #ifdef WIN32
00038 #include <io.h>
00039 #endif
00040 
00041 #include <string>
00042 #include <vector>
00043 #include <functional>
00044 #include <algorithm>
00045 #include <sstream>
00046 #include <iterator>
00047 #include <cstdlib>
00048 
00049 //#define DODS_DEBUG
00050 //#define DODS_DEBUG2
00051 
00052 #include "debug.h"
00053 #include "GNURegex.h"
00054 #include "HTTPCache.h"
00055 #include "HTTPConnect.h"
00056 #include "RCReader.h"
00057 #include "HTTPResponse.h"
00058 #include "HTTPCacheResponse.h"
00059 
00060 using namespace std;
00061 
00062 namespace libdap {
00063 
00064 // These global variables are not MT-Safe, but I'm leaving them as is because
00065 // they are used only for debugging (set them in a debugger like gdb or ddd).
00066 // They are not static because I *believe* that many debuggers cannot access
00067 // static variables. 08/07/02 jhrg
00068 
00069 // Set this to 1 to turn on libcurl's verbose mode (for debugging).
00070 int www_trace = 0;
00071 
00072 // Keep the temporary files; useful for debugging.
00073 int dods_keep_temps = 0;
00074 
00075 #define CLIENT_ERR_MIN 400
00076 #define CLIENT_ERR_MAX 417
00077 static char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] =
00078     {
00079         "Bad Request:",
00080         "Unauthorized: Contact the server administrator.",
00081         "Payment Required.",
00082         "Forbidden: Contact the server administrator.",
00083         "Not Found: The data source or server could not be found.\n\
00084         Often this means that the OPeNDAP server is missing or needs attention;\n\
00085         Please contact the server administrator.",
00086         "Method Not Allowed.",
00087         "Not Acceptable.",
00088         "Proxy Authentication Required.",
00089         "Request Time-out.",
00090         "Conflict.",
00091         "Gone:.",
00092         "Length Required.",
00093         "Precondition Failed.",
00094         "Request Entity Too Large.",
00095         "Request URI Too Large.",
00096         "Unsupported Media Type.",
00097         "Requested Range Not Satisfiable.",
00098         "Expectation Failed."
00099     };
00100 
00101 #define SERVER_ERR_MIN 500
00102 #define SERVER_ERR_MAX 505
00103 static char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN +1] =
00104     {
00105         "Internal Server Error.",
00106         "Not Implemented.",
00107         "Bad Gateway.",
00108         "Service Unavailable.",
00109         "Gateway Time-out.",
00110         "HTTP Version Not Supported."
00111     };
00112 
00115 static string
00116 http_status_to_string(int status)
00117 {
00118     if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
00119         return string(http_client_errors[status - CLIENT_ERR_MIN]);
00120     else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
00121         return string(http_server_errors[status - SERVER_ERR_MIN]);
00122     else
00123         return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
00124 }
00125 
00130 class ParseHeader : public unary_function<const string &, void>
00131 {
00132     ObjectType type;  // What type of object is in the stream?
00133     string server;  // Server's version string.
00134     string protocol;            // Server's protocol version.
00135     string location;            // Url returned by server
00136 
00137 public:
00138     ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0")
00139     { }
00140 
00141     void operator()(const string &header)
00142     {
00143         std::istringstream line(header);
00144 
00145         string name;
00146         line >> name;
00147         downcase(name);
00148         if (name == "content-description:") {
00149             string value;
00150             line >> value;
00151             downcase(value);
00152             DBG2(cout << name << ": " << value << endl);
00153             type = get_type(value);
00154         }
00155         // The second test (== "dods/0.0") tests if xopendap-server has already
00156         // been seen. If so, use that header in preference to the old
00157         // XDODS-Server header. jhrg 2/7/06
00158         else if (name == "xdods-server:" && server == "dods/0.0") {
00159             string value;
00160             line >> value;
00161             downcase(value);
00162             DBG2(cout << name << ": " << value << endl);
00163             server = value;
00164         }
00165         else if (name == "xopendap-server:") {
00166             string value;
00167             line >> value;
00168             downcase(value);
00169             DBG2(cout << name << ": " << value << endl);
00170             server = value;
00171         }
00172         else if (name == "xdap:") {
00173             string value;
00174             line >> value;
00175             downcase(value);
00176             DBG2(cout << name << ": " << value << endl);
00177             protocol = value;
00178         }
00179         else if (server == "dods/0.0" && name == "server:") {
00180             string value;
00181             line >> value;
00182             downcase(value);
00183             DBG2(cout << name << ": " << value << endl);
00184             server = value;
00185         }
00186         else if (name == "location:") {
00187             string value; 
00188             line >> value;
00189             DBG2(cout << name << ": " << value << endl);
00190             location = value;
00191         }
00192         else if (type == unknown_type && name == "content-type:"
00193                  && line.str().find("text/html") != string::npos) {
00194             DBG2(cout << name << ": text/html..." << endl);
00195             type = web_error;
00196         }
00197     }
00198 
00199     ObjectType get_object_type()
00200     {
00201         return type;
00202     }
00203 
00204     string get_server()
00205     {
00206         return server;
00207     }
00208 
00209     string get_protocol()
00210     {
00211         return protocol;
00212     }
00213 
00214     string get_location() {
00215            return location;
00216     }
00217 };
00218 
00235 static size_t
00236 save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs)
00237 {
00238     DBG2(cerr << "Inside the header parser." << endl);
00239     vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs);
00240 
00241     // Grab the header, minus the trailing newline. Or \r\n pair.
00242     string complete_line;
00243     if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r')
00244         complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
00245     else
00246         complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
00247 
00248     // Store all non-empty headers that are not HTTP status codes
00249     if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
00250         DBG(cerr << "Header line: " << complete_line << endl);
00251         hdrs->push_back(complete_line);
00252     }
00253 
00254     return size * nmemb;
00255 }
00256 
00258 static int
00259 curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void  *)
00260 {
00261     string message(msg, size);
00262 
00263     switch (info) {
00264     case CURLINFO_TEXT:
00265         cerr << "Text: " << message; break;
00266     case CURLINFO_HEADER_IN:
00267         cerr << "Header in: " << message; break;
00268     case CURLINFO_HEADER_OUT:
00269         cerr << "Header out: " << message; break;
00270     case CURLINFO_DATA_IN:
00271         cerr << "Data in: " << message; break;
00272     case CURLINFO_DATA_OUT:
00273         cerr << "Data out: " << message; break;
00274     case CURLINFO_END:
00275         cerr << "End: " << message; break;
00276 #ifdef CURLINFO_SSL_DATA_IN
00277     case CURLINFO_SSL_DATA_IN:
00278         cerr << "SSL Data in: " << message; break;
00279 #endif
00280 #ifdef CURLINFO_SSL_DATA_OUT
00281     case CURLINFO_SSL_DATA_OUT:
00282         cerr << "SSL Data out: " << message; break;
00283 #endif
00284     default:
00285         cerr << "Curl info: " << message; break;
00286     }
00287     return 0;
00288 }
00289 
00293 void
00294 HTTPConnect::www_lib_init()
00295 {
00296     d_curl = curl_easy_init();
00297     if (!d_curl)
00298         throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl.");
00299 
00300     // Now set options that will remain constant for the duration of this
00301     // CURL object.
00302 
00303     // Set the proxy host.
00304     if (!d_rcr->get_proxy_server_host().empty()) {
00305         DBG(cerr << "Setting up a proxy server." << endl);
00306         DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host()
00307             << endl);
00308         DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port()
00309             << endl);
00310         DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw()
00311             << endl);
00312         curl_easy_setopt(d_curl, CURLOPT_PROXY,
00313                          d_rcr->get_proxy_server_host().c_str());
00314         curl_easy_setopt(d_curl, CURLOPT_PROXYPORT,
00315                          d_rcr->get_proxy_server_port());
00316         // Password might not be required. 06/21/04 jhrg
00317         if (!d_rcr->get_proxy_server_userpw().empty())
00318             curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD,
00319                              d_rcr->get_proxy_server_userpw().c_str());
00320     }
00321 
00322     curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer);
00323     // We have to set FailOnError to false for any of the non-Basic
00324     // authentication schemes to work. 07/28/03 jhrg
00325     curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0);
00326 
00327     // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
00328     // choosing the the 'safest' one supported by the server.
00329     // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
00330     curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
00331 
00332     curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1);
00333     curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1);
00334     curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
00335     // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth
00336     // param of save_raw_http_headers to a vector<string> object.
00337 
00338     // If the user turns off SSL validation...
00339     if (!d_rcr->get_validate_ssl() == 0) {
00340         curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0);
00341         curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0);
00342     }
00343 
00344     if (www_trace) {
00345         cerr << "Curl version: " << curl_version() << endl;
00346         curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1);
00347         curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug);
00348     }
00349 }
00350 
00354 class BuildHeaders : public unary_function<const string &, void>
00355 {
00356     struct curl_slist *d_cl;
00357 
00358 public:
00359     BuildHeaders() : d_cl(0)
00360     {}
00361 
00362     void operator()(const string &header)
00363     {
00364         DBG(cerr << "Adding '" << header.c_str() << "' to the header list."
00365             << endl);
00366         d_cl = curl_slist_append(d_cl, header.c_str());
00367     }
00368 
00369     struct curl_slist *get_headers()
00370     {
00371         return d_cl;
00372     }
00373 };
00374 
00389 long
00390 HTTPConnect::read_url(const string &url, FILE *stream,
00391                       vector<string> *resp_hdrs,
00392                       const vector<string> *headers)
00393 {
00394     curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str());
00395 
00396 #ifdef WIN32
00397     //  See the curl documentation for CURLOPT_FILE (nka CURLOPT_WRITEDATA)
00398     //  and the CURLOPT_WRITEFUNCTION option.  Quote: "If you are using libcurl as
00399     //  a win32 DLL, you MUST use the CURLOPT_WRITEFUNCTION option if you set the
00400     //  CURLOPT_WRITEDATA option or you will experience crashes".  At the root of
00401     //  this issue is that one should not pass a FILE * to a windows DLL.  Close
00402     //  inspection of libcurl yields that their default write function when using
00403     //  the CURLOPT_WRITEDATA is just "fwrite".
00404     curl_easy_setopt(d_curl, CURLOPT_FILE, stream);
00405     curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite);
00406 #else
00407     curl_easy_setopt(d_curl, CURLOPT_FILE, stream);
00408 #endif
00409 
00410     DBG(copy(d_request_headers.begin(), d_request_headers.end(),
00411              ostream_iterator<string>(cerr, "\n")));
00412 
00413     BuildHeaders req_hdrs;
00414     req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(),
00415                         req_hdrs);
00416     if (headers)
00417         req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs);
00418     curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
00419 
00420     if (d_accept_deflate)
00421         curl_easy_setopt(d_curl, CURLOPT_ENCODING, "deflate");
00422 
00423     // Turn off the proxy for this URL?
00424     bool temporary_proxy = false;
00425     if ((temporary_proxy = url_uses_no_proxy_for(url))) {
00426         DBG(cerr << "Suppress proxy for url: " << url << endl);
00427         curl_easy_setopt(d_curl, CURLOPT_PROXY, 0);
00428     }
00429 
00430     string::size_type at_sign = url.find('@');
00431     // Assume username:password present *and* assume it's an HTTP URL; it *is*
00432     // HTTPConnect, after all. 7 is position after "http://"; the second arg
00433     // to substr() is the sub string length.
00434     if (at_sign != url.npos)
00435         d_upstring = url.substr(7, at_sign - 7);
00436 
00437     if (!d_upstring.empty())
00438         curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str());
00439 
00440     // Pass save_raw_http_headers() a pointer to the vector<string> where the
00441     // response headers may be stored. Callers can use the resp_hdrs
00442     // value/result parameter to get the raw response header information .
00443     curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs);
00444 
00445     CURLcode res = curl_easy_perform(d_curl);
00446 
00447     // Free the header list and null the value in d_curl.
00448     curl_slist_free_all(req_hdrs.get_headers());
00449     curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0);
00450 
00451     // Reset the proxy?
00452     if (temporary_proxy && !d_rcr->get_proxy_server_host().empty())
00453         curl_easy_setopt(d_curl, CURLOPT_PROXY,
00454                          d_rcr->get_proxy_server_host().c_str());
00455 
00456     if (res != 0)
00457         throw Error(d_error_buffer);
00458 
00459     long status;
00460     res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status);
00461     if (res != 0)
00462         throw Error(d_error_buffer);
00463 
00464     return status;
00465 }
00466 
00470 bool
00471 HTTPConnect::url_uses_proxy_for(const string &url) throw()
00472 {
00473     if (d_rcr->is_proxy_for_used()) {
00474         Regex host_regex(d_rcr->get_proxy_for_regexp().c_str());
00475         int index = 0, matchlen;
00476         return host_regex.search(url.c_str(), url.size(), matchlen, index)
00477                != -1;
00478     }
00479 
00480     return false;
00481 }
00482 
00486 bool
00487 HTTPConnect::url_uses_no_proxy_for(const string &url) throw()
00488 {
00489     return d_rcr->is_no_proxy_for_used()
00490            && url.find(d_rcr->get_no_proxy_for_host()) != string::npos;
00491 }
00492 
00493 // Public methods. Mostly...
00494 
00501 HTTPConnect::HTTPConnect(RCReader *rcr) throw(Error, InternalErr)
00502         : d_username(""), d_password("")
00503 {
00504     d_accept_deflate = rcr->get_deflate();
00505     d_rcr = rcr;
00506 
00507     // Load in the default headers to send with a request. The empty Pragma
00508     // headers overrides libcurl's default Pragma: no-cache header (which
00509     // will disable caching by Squid, et c.). The User-Agent header helps
00510     // make server logs more readable. 05/05/03 jhrg
00511     d_request_headers.push_back(string("Pragma:"));
00512     string user_agent = string("User-Agent: ") + string(CNAME)
00513                         + string("/") + string(CVER);
00514     d_request_headers.push_back(user_agent);
00515     if (d_accept_deflate)
00516         d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
00517 
00518     // HTTPCache::instance returns a valid ptr or 0.
00519     if (d_rcr->get_use_cache())
00520         d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(),
00521                                            false);
00522     else
00523         d_http_cache = 0;
00524 
00525     DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec
00526          << ")" << endl);
00527 
00528     if (d_http_cache) {
00529         d_http_cache->set_cache_enabled(d_rcr->get_use_cache());
00530         d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0);
00531         d_http_cache->set_max_size(d_rcr->get_max_cache_size());
00532         d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj());
00533         d_http_cache->set_default_expiration(d_rcr->get_default_expires());
00534         d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0);
00535     }
00536 
00537     www_lib_init();  // This may throw either Error or InternalErr
00538 }
00539 
00540 HTTPConnect::~HTTPConnect()
00541 {
00542     DBG2(cerr << "Entering the HTTPConnect dtor" << endl);
00543 
00544     curl_easy_cleanup(d_curl);
00545 
00546     DBG2(cerr << "Leaving the HTTPConnect dtor" << endl);
00547 }
00548 
00561 HTTPResponse *
00562 HTTPConnect::fetch_url(const string &url)
00563 {
00564 #ifdef HTTP_TRACE
00565     cout << "GET " << url << " HTTP/1.0" << endl;
00566 #endif
00567 
00568     HTTPResponse *stream;
00569 
00570     if (d_http_cache && d_http_cache->is_cache_enabled()) {
00571         stream = caching_fetch_url(url);
00572     }
00573     else {
00574         stream = plain_fetch_url(url);
00575     }
00576 
00577 #ifdef HTTP_TRACE
00578     stringstream ss;
00579     ss << "HTTP/1.0 " << stream->get_status() << " -" << endl;
00580     for (size_t i = 0; i < stream->get_headers()->size(); i++) {
00581         ss << stream->get_headers()->at(i) << endl;
00582     }
00583     cout << ss.str();
00584 #endif
00585 
00586     ParseHeader parser;
00587 
00588     parser = for_each(stream->get_headers()->begin(),
00589                       stream->get_headers()->end(), ParseHeader());
00590 
00591 #ifdef HTTP_TRACE
00592     cout << endl << endl;
00593 #endif
00594 
00595     // handle redirection case (2007-04-27, gaffigan@sfos.uaf.edu)
00596     if (parser.get_location() != "" &&
00597         url.substr(0,url.find("?",0)).compare(parser.get_location().substr(0,url.find("?",0))) != 0) {
00598            return fetch_url(parser.get_location());
00599     }
00600 
00601     stream->set_type(parser.get_object_type());
00602     stream->set_version(parser.get_server());
00603     stream->set_protocol(parser.get_protocol());
00604 
00605     return stream;
00606 }
00607 
00608 // Look around for a reasonable place to put a temporary file. Check first
00609 // the value of the TMPDIR env var. If that does not yeild a path that's
00610 // writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as
00611 // defined in stdio.h. If both come up empty, then use `./'.
00612 //
00613 // This function allocates storage using new. The caller must delete the char
00614 // array.
00615 
00616 // Change this to a version that either returns a string or an open file
00617 // descriptor. Use information from https://buildsecurityin.us-cert.gov/
00618 // (see open()) to make it more secure. Ideal solution: get deserialize()
00619 // methods to read from a stream returned by libcurl, not from a temporary
00620 // file. 9/21/07 jhrg
00621 static char *
00622 get_tempfile_template(char *file_template)
00623 {
00624     char *c;
00625     
00626 #ifdef WIN32
00627     // whitelist for a WIN32 directory
00628     Regex directory("[-a-zA-Z0-9_\\]*");
00629         
00630     c = getenv("TEMP");
00631     if (c && directory.match(c, strlen(c)) && (access(getenv("TEMP"), 6) == 0))
00632         goto valid_temp_directory;
00633 
00634     c= getenv("TMP");
00635     if (c && directory.match(c, strlen(c)) && (access(getenv("TEMP"), 6) == 0))
00636         goto valid_temp_directory;
00637 #else
00638         // whitelist for a directory
00639         Regex directory("[-a-zA-Z0-9_/]*");
00640         
00641         c = getenv("TMPDIR");
00642         if (c && directory.match(c, strlen(c)) && (access(c, W_OK | R_OK) == 0))
00643         goto valid_temp_directory;
00644 
00645 #ifdef P_tmpdir
00646         if (access(P_tmpdir, W_OK | R_OK) == 0) {
00647         c = P_tmpdir;
00648         goto valid_temp_directory;
00649         }
00650 #endif
00651 
00652 #endif  // WIN32
00653 
00654     c = ".";
00655     
00656 valid_temp_directory:
00657         // Sanitize allocation
00658         int size = strlen(c) + strlen(file_template) + 2;
00659         if (!size_ok(1, size))
00660                 throw Error("Bad temporary file name.");
00661                 
00662     char *temp = new char[size];
00663     strncpy(temp, c, size-2);
00664     strcat(temp, "/");
00665 
00666     strcat(temp, file_template);
00667 
00668     return temp;
00669 }
00670 
00689 string
00690 get_temp_file(FILE *&stream) throw(InternalErr)
00691 {
00692     // get_tempfile_template() uses new, must call delete
00693     char *dods_temp = get_tempfile_template("dodsXXXXXX");
00694 
00695     // Open truncated for update. NB: mkstemp() returns a file descriptor.
00696 #if defined(WIN32) || defined(TEST_WIN32_TEMPS)
00697     stream = fopen(_mktemp(dods_temp), "w+b");
00698 #else
00699     stream = fdopen(mkstemp(dods_temp), "w+");
00700 #endif
00701 
00702     if (!stream)
00703         throw InternalErr("I/O Error: Failed to open a temporary file for the data values.");
00704 
00705     string dods_temp_s = dods_temp;
00706     delete[] dods_temp; dods_temp = 0;
00707 
00708     return dods_temp_s;
00709 }
00710 
00712 void
00713 close_temp(FILE *s, const string &name)
00714 {
00715     int res = fclose(s);
00716     if (res)
00717         DBG(cerr << "Failed to close " << (void *)s << endl);
00718 
00719     unlink(name.c_str());
00720 }
00721 
00743 HTTPResponse *
00744 HTTPConnect::caching_fetch_url(const string &url)
00745 {
00746     DBG(cerr << "Is this URL (" << url << ") in the cache?... ");
00747 
00748     if (d_http_cache->is_url_in_cache(url)) { // url in cache
00749         DBGN(cerr << "yes... ");
00750 
00751         if (d_http_cache->is_url_valid(url)) { // url in cache and valid
00752             DBGN(cerr << "and it's valid; using cached response." << endl);
00753 
00754             vector<string> *headers = new vector<string>;;
00755             FILE *s = d_http_cache->get_cached_response(url, *headers);
00756             HTTPCacheResponse *crs = new HTTPCacheResponse(s, 200, headers, d_http_cache);
00757 
00758             return crs;
00759         }
00760         else {   // url in cache but not valid; validate
00761             DBGN(cerr << "but it's not valid; validating... ");
00762 
00763             // *** auto_ptr??? resp_hdrs not deleted! 10/10/03 jhrg
00764             vector<string> *resp_hdrs = new vector<string>;
00765             vector<string> cond_hdrs
00766                 = d_http_cache->get_conditional_request_headers(url);
00767             FILE *body = 0;
00768             string dods_temp = get_temp_file(body);
00769             time_t now = time(0); // When was the request made (now).
00770             long http_status;
00771 
00772             try {
00773                 http_status = read_url(url, body, resp_hdrs, &cond_hdrs);
00774                 rewind(body);
00775             }
00776             catch (Error &e) {
00777                 close_temp(body, dods_temp);
00778                 throw;
00779             }
00780 
00781             switch (http_status) {
00782             case 200: {  // New headers and new body
00783                     DBGN(cerr << "read a new response; caching." << endl);
00784 
00785                     d_http_cache->cache_response(url, now, *resp_hdrs, body);
00786                     HTTPResponse *rs = new HTTPResponse(body, http_status, resp_hdrs,
00787                                                         dods_temp);
00788 
00789                     return rs;
00790                 }
00791                 break;
00792 
00793             case 304: {  // Just new headers, use cached body
00794                     DBGN(cerr << "cached response valid; updating." << endl);
00795 
00796                     close_temp(body, dods_temp);
00797                     d_http_cache->update_response(url, now, *resp_hdrs);
00798 
00799                     vector<string> *headers = new vector<string>;;
00800                     FILE *s = d_http_cache->get_cached_response(url, *headers);
00801                     HTTPCacheResponse *crs
00802                     = new HTTPCacheResponse(s, 304, headers, d_http_cache);
00803                     return crs;
00804                 }
00805                 break;
00806 
00807             default: {  // Oops.
00808                     close_temp(body, dods_temp);
00809                     if (http_status >= 400) {
00810                         string msg = "Error while reading the URL: ";
00811                         msg += url;
00812                         msg += ".\nThe OPeNDAP server returned the following message:\n";
00813                         msg += http_status_to_string(http_status);
00814                         throw Error(msg);
00815                     }
00816                     else
00817                         throw InternalErr(__FILE__, __LINE__,
00818                                           "Bad response from the HTTP server: " + long_to_string(http_status));
00819                 }
00820                 break;
00821             }
00822         }
00823     }
00824     else {   // url not in cache; get it and cache it
00825         DBGN(cerr << "no; getting response and caching." << endl);
00826         time_t now = time(0);
00827         HTTPResponse *rs = plain_fetch_url(url);
00828         d_http_cache->cache_response(url, now, *(rs->get_headers()),
00829                                      rs->get_stream());
00830 
00831         return rs;
00832     }
00833 
00834     throw InternalErr(__FILE__, __LINE__, "Unexpected cache response.");
00835 }
00836 
00837 
00849 HTTPResponse *
00850 HTTPConnect::plain_fetch_url(const string &url)
00851 {
00852     DBG(cerr << "Getting URL: " << url << endl);
00853     FILE *stream = 0;
00854     string dods_temp = get_temp_file(stream);
00855     vector<string> *resp_hdrs = new vector<string>;
00856 
00857     int status = -1;
00858     try {
00859         status = read_url(url, stream, resp_hdrs); // Throws Error.
00860         if (status >= 400) {
00861             string msg = "Error while reading the URL: ";
00862             msg += url;
00863             msg += ".\nThe OPeNDAP server returned the following message:\n";
00864             msg += http_status_to_string(status);
00865             throw Error(msg);
00866         }
00867     }
00868 
00869     catch (Error &e) {
00870         close_temp(stream, dods_temp);
00871         throw e;
00872     }
00873 
00874     rewind(stream);
00875 
00876     return new HTTPResponse(stream, status, resp_hdrs, dods_temp);
00877 }
00878 
00890 void
00891 HTTPConnect::set_accept_deflate(bool deflate)
00892 {
00893     d_accept_deflate = deflate;
00894 
00895     if (d_accept_deflate) {
00896         if (find(d_request_headers.begin(), d_request_headers.end(),
00897                  "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end())
00898             d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
00899         DBG(copy(d_request_headers.begin(), d_request_headers.end(),
00900                  ostream_iterator<string>(cerr, "\n")));
00901     }
00902     else {
00903         vector<string>::iterator i;
00904         i = remove_if(d_request_headers.begin(), d_request_headers.end(),
00905                       bind2nd(equal_to<string>(),
00906                               string("Accept-Encoding: deflate, gzip, compress")));
00907         d_request_headers.erase(i, d_request_headers.end());
00908     }
00909 }
00910 
00926 void
00927 HTTPConnect::set_credentials(const string &u, const string &p)
00928 {
00929     if (u.empty())
00930         return;
00931 
00932     // Store the credentials locally.
00933     d_username = u;
00934     d_password = p;
00935 
00936     d_upstring = u + ":" + p;
00937 }
00938 
00939 } // namespace libdap

Generated on Wed Mar 5 15:27:11 2008 for libdap++ by  doxygen 1.5.4