HTTPConnect.cc

Go to the documentation of this file.
00001 
00002 // -*- mode: c++; c-basic-offset:4 -*-
00003 
00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
00005 // Access Protocol.
00006 
00007 // Copyright (c) 2002,2003 OPeNDAP, Inc.
00008 // Author: James Gallagher <jgallagher@opendap.org>
00009 //
00010 // This library is free software; you can redistribute it and/or
00011 // modify it under the terms of the GNU Lesser General Public
00012 // License as published by the Free Software Foundation; either
00013 // version 2.1 of the License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful,
00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00023 //
00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
00025 
00026 
00027 #include "config.h"
00028 
00029 static char rcsid[] not_used =
00030     { "$Id: HTTPConnect.cc 18650 2008-04-25 20:47:50Z jimg $"
00031     };
00032 
00033 #ifdef HAVE_UNISTD_H
00034 #include <unistd.h>
00035 #endif
00036 
00037 #ifdef WIN32
00038 #include <io.h>
00039 #endif
00040 
00041 #include <string>
00042 #include <vector>
00043 #include <functional>
00044 #include <algorithm>
00045 #include <sstream>
00046 #include <iterator>
00047 #include <cstdlib>
00048 #include <cstring>
00049 
00050 //#define DODS_DEBUG
00051 //#define DODS_DEBUG2
00052 
00053 #include "debug.h"
00054 #include "GNURegex.h"
00055 #include "HTTPCache.h"
00056 #include "HTTPConnect.h"
00057 #include "RCReader.h"
00058 #include "HTTPResponse.h"
00059 #include "HTTPCacheResponse.h"
00060 
00061 using namespace std;
00062 
00063 namespace libdap {
00064 
00065 // These global variables are not MT-Safe, but I'm leaving them as is because
00066 // they are used only for debugging (set them in a debugger like gdb or ddd).
00067 // They are not static because I *believe* that many debuggers cannot access
00068 // static variables. 08/07/02 jhrg
00069 
00070 // Set this to 1 to turn on libcurl's verbose mode (for debugging).
00071 int www_trace = 0;
00072 
00073 // Keep the temporary files; useful for debugging.
00074 int dods_keep_temps = 0;
00075 
00076 #define CLIENT_ERR_MIN 400
00077 #define CLIENT_ERR_MAX 417
00078 static char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] =
00079     {
00080         "Bad Request:",
00081         "Unauthorized: Contact the server administrator.",
00082         "Payment Required.",
00083         "Forbidden: Contact the server administrator.",
00084         "Not Found: The data source or server could not be found.\n\
00085         Often this means that the OPeNDAP server is missing or needs attention;\n\
00086         Please contact the server administrator.",
00087         "Method Not Allowed.",
00088         "Not Acceptable.",
00089         "Proxy Authentication Required.",
00090         "Request Time-out.",
00091         "Conflict.",
00092         "Gone:.",
00093         "Length Required.",
00094         "Precondition Failed.",
00095         "Request Entity Too Large.",
00096         "Request URI Too Large.",
00097         "Unsupported Media Type.",
00098         "Requested Range Not Satisfiable.",
00099         "Expectation Failed."
00100     };
00101 
00102 #define SERVER_ERR_MIN 500
00103 #define SERVER_ERR_MAX 505
00104 static char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN +1] =
00105     {
00106         "Internal Server Error.",
00107         "Not Implemented.",
00108         "Bad Gateway.",
00109         "Service Unavailable.",
00110         "Gateway Time-out.",
00111         "HTTP Version Not Supported."
00112     };
00113 
00116 static string
00117 http_status_to_string(int status)
00118 {
00119     if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
00120         return string(http_client_errors[status - CLIENT_ERR_MIN]);
00121     else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
00122         return string(http_server_errors[status - SERVER_ERR_MIN]);
00123     else
00124         return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
00125 }
00126 
00131 class ParseHeader : public unary_function<const string &, void>
00132 {
00133     ObjectType type;  // What type of object is in the stream?
00134     string server;  // Server's version string.
00135     string protocol;            // Server's protocol version.
00136     string location;            // Url returned by server
00137 
00138 public:
00139     ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0")
00140     { }
00141 
00142     void operator()(const string &header)
00143     {
00144         std::istringstream line(header);
00145 
00146         string name;
00147         line >> name;
00148         downcase(name);
00149         if (name == "content-description:") {
00150             string value;
00151             line >> value;
00152             downcase(value);
00153             DBG2(cout << name << ": " << value << endl);
00154             type = get_type(value);
00155         }
00156         // The second test (== "dods/0.0") tests if xopendap-server has already
00157         // been seen. If so, use that header in preference to the old
00158         // XDODS-Server header. jhrg 2/7/06
00159         else if (name == "xdods-server:" && server == "dods/0.0") {
00160             string value;
00161             line >> value;
00162             downcase(value);
00163             DBG2(cout << name << ": " << value << endl);
00164             server = value;
00165         }
00166         else if (name == "xopendap-server:") {
00167             string value;
00168             line >> value;
00169             downcase(value);
00170             DBG2(cout << name << ": " << value << endl);
00171             server = value;
00172         }
00173         else if (name == "xdap:") {
00174             string value;
00175             line >> value;
00176             downcase(value);
00177             DBG2(cout << name << ": " << value << endl);
00178             protocol = value;
00179         }
00180         else if (server == "dods/0.0" && name == "server:") {
00181             string value;
00182             line >> value;
00183             downcase(value);
00184             DBG2(cout << name << ": " << value << endl);
00185             server = value;
00186         }
00187         else if (name == "location:") {
00188             string value; 
00189             line >> value;
00190             DBG2(cout << name << ": " << value << endl);
00191             location = value;
00192         }
00193         else if (type == unknown_type && name == "content-type:"
00194                  && line.str().find("text/html") != string::npos) {
00195             DBG2(cout << name << ": text/html..." << endl);
00196             type = web_error;
00197         }
00198     }
00199 
00200     ObjectType get_object_type()
00201     {
00202         return type;
00203     }
00204 
00205     string get_server()
00206     {
00207         return server;
00208     }
00209 
00210     string get_protocol()
00211     {
00212         return protocol;
00213     }
00214 
00215     string get_location() {
00216            return location;
00217     }
00218 };
00219 
00236 static size_t
00237 save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs)
00238 {
00239     DBG2(cerr << "Inside the header parser." << endl);
00240     vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs);
00241 
00242     // Grab the header, minus the trailing newline. Or \r\n pair.
00243     string complete_line;
00244     if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r')
00245         complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
00246     else
00247         complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
00248 
00249     // Store all non-empty headers that are not HTTP status codes
00250     if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
00251         DBG(cerr << "Header line: " << complete_line << endl);
00252         hdrs->push_back(complete_line);
00253     }
00254 
00255     return size * nmemb;
00256 }
00257 
00259 static int
00260 curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void  *)
00261 {
00262     string message(msg, size);
00263 
00264     switch (info) {
00265     case CURLINFO_TEXT:
00266         cerr << "Text: " << message; break;
00267     case CURLINFO_HEADER_IN:
00268         cerr << "Header in: " << message; break;
00269     case CURLINFO_HEADER_OUT:
00270         cerr << "Header out: " << message; break;
00271     case CURLINFO_DATA_IN:
00272         cerr << "Data in: " << message; break;
00273     case CURLINFO_DATA_OUT:
00274         cerr << "Data out: " << message; break;
00275     case CURLINFO_END:
00276         cerr << "End: " << message; break;
00277 #ifdef CURLINFO_SSL_DATA_IN
00278     case CURLINFO_SSL_DATA_IN:
00279         cerr << "SSL Data in: " << message; break;
00280 #endif
00281 #ifdef CURLINFO_SSL_DATA_OUT
00282     case CURLINFO_SSL_DATA_OUT:
00283         cerr << "SSL Data out: " << message; break;
00284 #endif
00285     default:
00286         cerr << "Curl info: " << message; break;
00287     }
00288     return 0;
00289 }
00290 
00294 void
00295 HTTPConnect::www_lib_init()
00296 {
00297     d_curl = curl_easy_init();
00298     if (!d_curl)
00299         throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl.");
00300 
00301     // Now set options that will remain constant for the duration of this
00302     // CURL object.
00303 
00304     // Set the proxy host.
00305     if (!d_rcr->get_proxy_server_host().empty()) {
00306         DBG(cerr << "Setting up a proxy server." << endl);
00307         DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host()
00308             << endl);
00309         DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port()
00310             << endl);
00311         DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw()
00312             << endl);
00313         curl_easy_setopt(d_curl, CURLOPT_PROXY,
00314                          d_rcr->get_proxy_server_host().c_str());
00315         curl_easy_setopt(d_curl, CURLOPT_PROXYPORT,
00316                          d_rcr->get_proxy_server_port());
00317 
00318         // As of 4/21/08 only NTLM, Digest and Basic work.
00319 #ifdef CURLOPT_PROXYAUTH
00320         curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (long)CURLAUTH_ANY);
00321 #endif
00322 
00323         // Password might not be required. 06/21/04 jhrg
00324         if (!d_rcr->get_proxy_server_userpw().empty())
00325             curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD,
00326                              d_rcr->get_proxy_server_userpw().c_str());
00327     }
00328 
00329     curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer);
00330     // We have to set FailOnError to false for any of the non-Basic
00331     // authentication schemes to work. 07/28/03 jhrg
00332     curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0);
00333 
00334     // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
00335     // choosing the the 'safest' one supported by the server.
00336     // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
00337     curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
00338 
00339     curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1);
00340     curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1);
00341     curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
00342     // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth
00343     // param of save_raw_http_headers to a vector<string> object.
00344 
00345     // Follow 302 (redirect) responses
00346     curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1);
00347     curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5);
00348 
00349     // If the user turns off SSL validation...
00350     if (!d_rcr->get_validate_ssl() == 0) {
00351         curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0);
00352         curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0);
00353     }
00354 
00355     if (www_trace) {
00356         cerr << "Curl version: " << curl_version() << endl;
00357         curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1);
00358         curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug);
00359     }
00360 }
00361 
00365 class BuildHeaders : public unary_function<const string &, void>
00366 {
00367     struct curl_slist *d_cl;
00368 
00369 public:
00370     BuildHeaders() : d_cl(0)
00371     {}
00372 
00373     void operator()(const string &header)
00374     {
00375         DBG(cerr << "Adding '" << header.c_str() << "' to the header list."
00376             << endl);
00377         d_cl = curl_slist_append(d_cl, header.c_str());
00378     }
00379 
00380     struct curl_slist *get_headers()
00381     {
00382         return d_cl;
00383     }
00384 };
00385 
00400 long
00401 HTTPConnect::read_url(const string &url, FILE *stream,
00402                       vector<string> *resp_hdrs,
00403                       const vector<string> *headers)
00404 {
00405     curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str());
00406 
00407 #ifdef WIN32
00408     //  See the curl documentation for CURLOPT_FILE (nka CURLOPT_WRITEDATA)
00409     //  and the CURLOPT_WRITEFUNCTION option.  Quote: "If you are using libcurl as
00410     //  a win32 DLL, you MUST use the CURLOPT_WRITEFUNCTION option if you set the
00411     //  CURLOPT_WRITEDATA option or you will experience crashes".  At the root of
00412     //  this issue is that one should not pass a FILE * to a windows DLL.  Close
00413     //  inspection of libcurl yields that their default write function when using
00414     //  the CURLOPT_WRITEDATA is just "fwrite".
00415     curl_easy_setopt(d_curl, CURLOPT_FILE, stream);
00416     curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite);
00417 #else
00418     curl_easy_setopt(d_curl, CURLOPT_FILE, stream);
00419 #endif
00420 
00421     DBG(copy(d_request_headers.begin(), d_request_headers.end(),
00422              ostream_iterator<string>(cerr, "\n")));
00423 
00424     BuildHeaders req_hdrs;
00425     req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(),
00426                         req_hdrs);
00427     if (headers)
00428         req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs);
00429     curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
00430 
00431     if (d_accept_deflate)
00432         curl_easy_setopt(d_curl, CURLOPT_ENCODING, "deflate");
00433 
00434     // Turn off the proxy for this URL?
00435     bool temporary_proxy = false;
00436     if ((temporary_proxy = url_uses_no_proxy_for(url))) {
00437         DBG(cerr << "Suppress proxy for url: " << url << endl);
00438         curl_easy_setopt(d_curl, CURLOPT_PROXY, 0);
00439     }
00440 
00441     string::size_type at_sign = url.find('@');
00442     // Assume username:password present *and* assume it's an HTTP URL; it *is*
00443     // HTTPConnect, after all. 7 is position after "http://"; the second arg
00444     // to substr() is the sub string length.
00445     if (at_sign != url.npos)
00446         d_upstring = url.substr(7, at_sign - 7);
00447 
00448     if (!d_upstring.empty())
00449         curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str());
00450 
00451     // Pass save_raw_http_headers() a pointer to the vector<string> where the
00452     // response headers may be stored. Callers can use the resp_hdrs
00453     // value/result parameter to get the raw response header information .
00454     curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs);
00455 
00456     CURLcode res = curl_easy_perform(d_curl);
00457 
00458     // Free the header list and null the value in d_curl.
00459     curl_slist_free_all(req_hdrs.get_headers());
00460     curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0);
00461 
00462     // Reset the proxy?
00463     if (temporary_proxy && !d_rcr->get_proxy_server_host().empty())
00464         curl_easy_setopt(d_curl, CURLOPT_PROXY,
00465                          d_rcr->get_proxy_server_host().c_str());
00466 
00467     if (res != 0)
00468         throw Error(d_error_buffer);
00469 
00470     long status;
00471     res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status);
00472     if (res != 0)
00473         throw Error(d_error_buffer);
00474 
00475     return status;
00476 }
00477 
00481 bool
00482 HTTPConnect::url_uses_proxy_for(const string &url) throw()
00483 {
00484     if (d_rcr->is_proxy_for_used()) {
00485         Regex host_regex(d_rcr->get_proxy_for_regexp().c_str());
00486         int index = 0, matchlen;
00487         return host_regex.search(url.c_str(), url.size(), matchlen, index)
00488                != -1;
00489     }
00490 
00491     return false;
00492 }
00493 
00497 bool
00498 HTTPConnect::url_uses_no_proxy_for(const string &url) throw()
00499 {
00500     return d_rcr->is_no_proxy_for_used()
00501            && url.find(d_rcr->get_no_proxy_for_host()) != string::npos;
00502 }
00503 
00504 // Public methods. Mostly...
00505 
00512 HTTPConnect::HTTPConnect(RCReader *rcr) throw(Error, InternalErr)
00513         : d_username(""), d_password("")
00514 {
00515     d_accept_deflate = rcr->get_deflate();
00516     d_rcr = rcr;
00517 
00518     // Load in the default headers to send with a request. The empty Pragma
00519     // headers overrides libcurl's default Pragma: no-cache header (which
00520     // will disable caching by Squid, et c.). The User-Agent header helps
00521     // make server logs more readable. 05/05/03 jhrg
00522     d_request_headers.push_back(string("Pragma:"));
00523     string user_agent = string("User-Agent: ") + string(CNAME)
00524                         + string("/") + string(CVER);
00525     d_request_headers.push_back(user_agent);
00526     if (d_accept_deflate)
00527         d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
00528 
00529     // HTTPCache::instance returns a valid ptr or 0.
00530     if (d_rcr->get_use_cache())
00531         d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(),
00532                                            false);
00533     else
00534         d_http_cache = 0;
00535 
00536     DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec
00537          << ")" << endl);
00538 
00539     if (d_http_cache) {
00540         d_http_cache->set_cache_enabled(d_rcr->get_use_cache());
00541         d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0);
00542         d_http_cache->set_max_size(d_rcr->get_max_cache_size());
00543         d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj());
00544         d_http_cache->set_default_expiration(d_rcr->get_default_expires());
00545         d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0);
00546     }
00547 
00548     www_lib_init();  // This may throw either Error or InternalErr
00549 }
00550 
00551 HTTPConnect::~HTTPConnect()
00552 {
00553     DBG2(cerr << "Entering the HTTPConnect dtor" << endl);
00554 
00555     curl_easy_cleanup(d_curl);
00556 
00557     DBG2(cerr << "Leaving the HTTPConnect dtor" << endl);
00558 }
00559 
00572 HTTPResponse *
00573 HTTPConnect::fetch_url(const string &url)
00574 {
00575 #ifdef HTTP_TRACE
00576     cout << "GET " << url << " HTTP/1.0" << endl;
00577 #endif
00578 
00579     HTTPResponse *stream;
00580 
00581     if (d_http_cache && d_http_cache->is_cache_enabled()) {
00582         stream = caching_fetch_url(url);
00583     }
00584     else {
00585         stream = plain_fetch_url(url);
00586     }
00587 
00588 #ifdef HTTP_TRACE
00589     stringstream ss;
00590     ss << "HTTP/1.0 " << stream->get_status() << " -" << endl;
00591     for (size_t i = 0; i < stream->get_headers()->size(); i++) {
00592         ss << stream->get_headers()->at(i) << endl;
00593     }
00594     cout << ss.str();
00595 #endif
00596 
00597     ParseHeader parser;
00598 
00599     parser = for_each(stream->get_headers()->begin(),
00600                       stream->get_headers()->end(), ParseHeader());
00601 
00602 #ifdef HTTP_TRACE
00603     cout << endl << endl;
00604 #endif
00605 
00606     // handle redirection case (2007-04-27, gaffigan@sfos.uaf.edu)
00607     if (parser.get_location() != "" &&
00608         url.substr(0,url.find("?",0)).compare(parser.get_location().substr(0,url.find("?",0))) != 0) {
00609            return fetch_url(parser.get_location());
00610     }
00611 
00612     stream->set_type(parser.get_object_type());
00613     stream->set_version(parser.get_server());
00614     stream->set_protocol(parser.get_protocol());
00615 
00616     return stream;
00617 }
00618 
00619 // Look around for a reasonable place to put a temporary file. Check first
00620 // the value of the TMPDIR env var. If that does not yeild a path that's
00621 // writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as
00622 // defined in stdio.h. If both come up empty, then use `./'.
00623 //
00624 // This function allocates storage using new. The caller must delete the char
00625 // array.
00626 
00627 // Change this to a version that either returns a string or an open file
00628 // descriptor. Use information from https://buildsecurityin.us-cert.gov/
00629 // (see open()) to make it more secure. Ideal solution: get deserialize()
00630 // methods to read from a stream returned by libcurl, not from a temporary
00631 // file. 9/21/07 jhrg
00632 static char *
00633 get_tempfile_template(char *file_template)
00634 {
00635     char *c;
00636     
00637 #ifdef WIN32
00638     // whitelist for a WIN32 directory
00639     Regex directory("[-a-zA-Z0-9_\\]*");
00640         
00641     c = getenv("TEMP");
00642     if (c && directory.match(c, strlen(c)) && (access(getenv("TEMP"), 6) == 0))
00643         goto valid_temp_directory;
00644 
00645     c= getenv("TMP");
00646     if (c && directory.match(c, strlen(c)) && (access(getenv("TEMP"), 6) == 0))
00647         goto valid_temp_directory;
00648 #else
00649         // whitelist for a directory
00650         Regex directory("[-a-zA-Z0-9_/]*");
00651         
00652         c = getenv("TMPDIR");
00653         if (c && directory.match(c, strlen(c)) && (access(c, W_OK | R_OK) == 0))
00654         goto valid_temp_directory;
00655 
00656 #ifdef P_tmpdir
00657         if (access(P_tmpdir, W_OK | R_OK) == 0) {
00658         c = P_tmpdir;
00659         goto valid_temp_directory;
00660         }
00661 #endif
00662 
00663 #endif  // WIN32
00664 
00665     c = ".";
00666     
00667 valid_temp_directory:
00668         // Sanitize allocation
00669         int size = strlen(c) + strlen(file_template) + 2;
00670         if (!size_ok(1, size))
00671                 throw Error("Bad temporary file name.");
00672                 
00673     char *temp = new char[size];
00674     strncpy(temp, c, size-2);
00675     strcat(temp, "/");
00676 
00677     strcat(temp, file_template);
00678 
00679     return temp;
00680 }
00681 
00700 string
00701 get_temp_file(FILE *&stream) throw(InternalErr)
00702 {
00703     // get_tempfile_template() uses new, must call delete
00704     char *dods_temp = get_tempfile_template("dodsXXXXXX");
00705 
00706     // Open truncated for update. NB: mkstemp() returns a file descriptor.
00707 #if defined(WIN32) || defined(TEST_WIN32_TEMPS)
00708     stream = fopen(_mktemp(dods_temp), "w+b");
00709 #else
00710     stream = fdopen(mkstemp(dods_temp), "w+");
00711 #endif
00712 
00713     if (!stream)
00714         throw InternalErr("I/O Error: Failed to open a temporary file for the data values.");
00715 
00716     string dods_temp_s = dods_temp;
00717     delete[] dods_temp; dods_temp = 0;
00718 
00719     return dods_temp_s;
00720 }
00721 
00723 void
00724 close_temp(FILE *s, const string &name)
00725 {
00726     int res = fclose(s);
00727     if (res)
00728         DBG(cerr << "Failed to close " << (void *)s << endl);
00729 
00730     unlink(name.c_str());
00731 }
00732 
00754 HTTPResponse *
00755 HTTPConnect::caching_fetch_url(const string &url)
00756 {
00757     DBG(cerr << "Is this URL (" << url << ") in the cache?... ");
00758 
00759     if (d_http_cache->is_url_in_cache(url)) { // url in cache
00760         DBGN(cerr << "yes... ");
00761 
00762         if (d_http_cache->is_url_valid(url)) { // url in cache and valid
00763             DBGN(cerr << "and it's valid; using cached response." << endl);
00764 
00765             vector<string> *headers = new vector<string>;;
00766             FILE *s = d_http_cache->get_cached_response(url, *headers);
00767             HTTPCacheResponse *crs = new HTTPCacheResponse(s, 200, headers, d_http_cache);
00768 
00769             return crs;
00770         }
00771         else {   // url in cache but not valid; validate
00772             DBGN(cerr << "but it's not valid; validating... ");
00773 
00774             // *** auto_ptr??? resp_hdrs not deleted! 10/10/03 jhrg
00775             vector<string> *resp_hdrs = new vector<string>;
00776             vector<string> cond_hdrs
00777                 = d_http_cache->get_conditional_request_headers(url);
00778             FILE *body = 0;
00779             string dods_temp = get_temp_file(body);
00780             time_t now = time(0); // When was the request made (now).
00781             long http_status;
00782 
00783             try {
00784                 http_status = read_url(url, body, resp_hdrs, &cond_hdrs);
00785                 rewind(body);
00786             }
00787             catch (Error &e) {
00788                 close_temp(body, dods_temp);
00789                 throw;
00790             }
00791 
00792             switch (http_status) {
00793             case 200: {  // New headers and new body
00794                     DBGN(cerr << "read a new response; caching." << endl);
00795 
00796                     d_http_cache->cache_response(url, now, *resp_hdrs, body);
00797                     HTTPResponse *rs = new HTTPResponse(body, http_status, resp_hdrs,
00798                                                         dods_temp);
00799 
00800                     return rs;
00801                 }
00802                 break;
00803 
00804             case 304: {  // Just new headers, use cached body
00805                     DBGN(cerr << "cached response valid; updating." << endl);
00806 
00807                     close_temp(body, dods_temp);
00808                     d_http_cache->update_response(url, now, *resp_hdrs);
00809 
00810                     vector<string> *headers = new vector<string>;;
00811                     FILE *s = d_http_cache->get_cached_response(url, *headers);
00812                     HTTPCacheResponse *crs
00813                     = new HTTPCacheResponse(s, 304, headers, d_http_cache);
00814                     return crs;
00815                 }
00816                 break;
00817 
00818             default: {  // Oops.
00819                     close_temp(body, dods_temp);
00820                     if (http_status >= 400) {
00821                         string msg = "Error while reading the URL: ";
00822                         msg += url;
00823                         msg += ".\nThe OPeNDAP server returned the following message:\n";
00824                         msg += http_status_to_string(http_status);
00825                         throw Error(msg);
00826                     }
00827                     else
00828                         throw InternalErr(__FILE__, __LINE__,
00829                                           "Bad response from the HTTP server: " + long_to_string(http_status));
00830                 }
00831                 break;
00832             }
00833         }
00834     }
00835     else {   // url not in cache; get it and cache it
00836         DBGN(cerr << "no; getting response and caching." << endl);
00837         time_t now = time(0);
00838         HTTPResponse *rs = plain_fetch_url(url);
00839         d_http_cache->cache_response(url, now, *(rs->get_headers()),
00840                                      rs->get_stream());
00841 
00842         return rs;
00843     }
00844 
00845     throw InternalErr(__FILE__, __LINE__, "Unexpected cache response.");
00846 }
00847 
00848 
00860 HTTPResponse *
00861 HTTPConnect::plain_fetch_url(const string &url)
00862 {
00863     DBG(cerr << "Getting URL: " << url << endl);
00864     FILE *stream = 0;
00865     string dods_temp = get_temp_file(stream);
00866     vector<string> *resp_hdrs = new vector<string>;
00867 
00868     int status = -1;
00869     try {
00870         status = read_url(url, stream, resp_hdrs); // Throws Error.
00871         if (status >= 400) {
00872             string msg = "Error while reading the URL: ";
00873             msg += url;
00874             msg += ".\nThe OPeNDAP server returned the following message:\n";
00875             msg += http_status_to_string(status);
00876             throw Error(msg);
00877         }
00878     }
00879 
00880     catch (Error &e) {
00881         close_temp(stream, dods_temp);
00882         throw e;
00883     }
00884 
00885     rewind(stream);
00886 
00887     return new HTTPResponse(stream, status, resp_hdrs, dods_temp);
00888 }
00889 
00901 void
00902 HTTPConnect::set_accept_deflate(bool deflate)
00903 {
00904     d_accept_deflate = deflate;
00905 
00906     if (d_accept_deflate) {
00907         if (find(d_request_headers.begin(), d_request_headers.end(),
00908                  "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end())
00909             d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
00910         DBG(copy(d_request_headers.begin(), d_request_headers.end(),
00911                  ostream_iterator<string>(cerr, "\n")));
00912     }
00913     else {
00914         vector<string>::iterator i;
00915         i = remove_if(d_request_headers.begin(), d_request_headers.end(),
00916                       bind2nd(equal_to<string>(),
00917                               string("Accept-Encoding: deflate, gzip, compress")));
00918         d_request_headers.erase(i, d_request_headers.end());
00919     }
00920 }
00921 
00937 void
00938 HTTPConnect::set_credentials(const string &u, const string &p)
00939 {
00940     if (u.empty())
00941         return;
00942 
00943     // Store the credentials locally.
00944     d_username = u;
00945     d_password = p;
00946 
00947     d_upstring = u + ":" + p;
00948 }
00949 
00950 } // namespace libdap

Generated on Tue Jun 10 18:00:31 2008 for libdap++ by  doxygen 1.5.4