HTTPConnect.cc

Go to the documentation of this file.
00001 
00002 // -*- mode: c++; c-basic-offset:4 -*-
00003 
00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
00005 // Access Protocol.
00006 
00007 // Copyright (c) 2002,2003 OPeNDAP, Inc.
00008 // Author: James Gallagher <jgallagher@opendap.org>
00009 //
00010 // This library is free software; you can redistribute it and/or
00011 // modify it under the terms of the GNU Lesser General Public
00012 // License as published by the Free Software Foundation; either
00013 // version 2.1 of the License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful,
00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00023 //
00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
00025 
00026 
00027 #include "config.h"
00028 
00029 static char rcsid[] not_used =
00030     { "$Id: HTTPConnect.cc 19864 2008-11-14 17:01:35Z jimg $"
00031     };
00032 
00033 #ifdef HAVE_UNISTD_H
00034 #include <unistd.h>
00035 #endif
00036 
00037 #ifdef WIN32
00038 #include <io.h>
00039 #endif
00040 
00041 #include <string>
00042 #include <vector>
00043 #include <functional>
00044 #include <algorithm>
00045 #include <sstream>
00046 #include <iterator>
00047 #include <cstdlib>
00048 #include <cstring>
00049 
00050 //#define DODS_DEBUG
00051 //#define DODS_DEBUG2
00052 
00053 #include "debug.h"
00054 #include "GNURegex.h"
00055 #include "HTTPCache.h"
00056 #include "HTTPConnect.h"
00057 #include "RCReader.h"
00058 #include "HTTPResponse.h"
00059 #include "HTTPCacheResponse.h"
00060 
00061 using namespace std;
00062 
00063 namespace libdap {
00064 
00065 // These global variables are not MT-Safe, but I'm leaving them as is because
00066 // they are used only for debugging (set them in a debugger like gdb or ddd).
00067 // They are not static because I *believe* that many debuggers cannot access
00068 // static variables. 08/07/02 jhrg
00069 
00070 // Set this to 1 to turn on libcurl's verbose mode (for debugging).
00071 int www_trace = 0;
00072 
00073 // Keep the temporary files; useful for debugging.
00074 int dods_keep_temps = 0;
00075 
00076 #define CLIENT_ERR_MIN 400
00077 #define CLIENT_ERR_MAX 417
00078 static char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] =
00079     {
00080         "Bad Request:",
00081         "Unauthorized: Contact the server administrator.",
00082         "Payment Required.",
00083         "Forbidden: Contact the server administrator.",
00084         "Not Found: The data source or server could not be found.\n\
00085         Often this means that the OPeNDAP server is missing or needs attention;\n\
00086         Please contact the server administrator.",
00087         "Method Not Allowed.",
00088         "Not Acceptable.",
00089         "Proxy Authentication Required.",
00090         "Request Time-out.",
00091         "Conflict.",
00092         "Gone:.",
00093         "Length Required.",
00094         "Precondition Failed.",
00095         "Request Entity Too Large.",
00096         "Request URI Too Large.",
00097         "Unsupported Media Type.",
00098         "Requested Range Not Satisfiable.",
00099         "Expectation Failed."
00100     };
00101 
00102 #define SERVER_ERR_MIN 500
00103 #define SERVER_ERR_MAX 505
00104 static char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN +1] =
00105     {
00106         "Internal Server Error.",
00107         "Not Implemented.",
00108         "Bad Gateway.",
00109         "Service Unavailable.",
00110         "Gateway Time-out.",
00111         "HTTP Version Not Supported."
00112     };
00113 
00120 ObjectType
00121 get_type(const string &value)
00122 {
00123     if (value == "dods_das" | value == "dods-das")
00124         return dods_das;
00125     else if (value == "dods_dds" | value == "dods-dds")
00126         return dods_dds;
00127     else if (value == "dods_data" | value == "dods-data")
00128         return dods_data;
00129     else if (value == "dods_error" | value == "dods-error")
00130         return dods_error;
00131     else if (value == "web_error" | value == "web-error")
00132         return web_error;
00133     else if (value == "dap4_ddx" | value == "dap4-ddx")
00134         return dap4_ddx;
00135     else if (value == "dap4_datax" | value == "dap4-datax")
00136         return dap4_datax;
00137     else if (value == "dap4_errorx" | value == "dap4-errorx")
00138         return dap4_errorx;
00139     else
00140         return unknown_type;
00141 }
00142 
00148 ObjectType
00149 get_description_type(const string &value)
00150 {
00151     if (value == "dods_das" | value == "dods-das")
00152         return dods_das;
00153     else if (value == "dods_dds" | value == "dods-dds")
00154         return dods_dds;
00155     else if (value == "dods_data" | value == "dods-data")
00156         return dods_data;
00157     else if (value == "dods_error" | value == "dods-error")
00158         return dods_error;
00159     else if (value == "web_error" | value == "web-error")
00160         return web_error;
00161     else if (value == "dap4_ddx" | value == "dap4-ddx")
00162         return dap4_ddx;
00163     else if (value == "dap4_datax" | value == "dap4-datax")
00164         return dap4_datax;
00165     else if (value == "dap4_errorx" | value == "dap4-errorx")
00166         return dap4_errorx;
00167     else
00168         return unknown_type;
00169 }
00170 
00173 static string
00174 http_status_to_string(int status)
00175 {
00176     if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
00177         return string(http_client_errors[status - CLIENT_ERR_MIN]);
00178     else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
00179         return string(http_server_errors[status - SERVER_ERR_MIN]);
00180     else
00181         return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
00182 }
00183 
00188 class ParseHeader : public unary_function<const string &, void>
00189 {
00190     ObjectType type;  // What type of object is in the stream?
00191     string server;  // Server's version string.
00192     string protocol;            // Server's protocol version.
00193     string location;            // Url returned by server
00194 
00195 public:
00196     ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0")
00197     { }
00198 
00199     void operator()(const string &header)
00200     {
00201         std::istringstream line(header);
00202 
00203         string name;
00204         line >> name;
00205         downcase(name);
00206         if (name == "content-description:") {
00207             string value;
00208             line >> value;
00209             downcase(value);
00210             DBG2(cout << name << ": " << value << endl);
00211             type = get_description_type(value);
00212         }
00213         // The second test (== "dods/0.0") tests if xopendap-server has already
00214         // been seen. If so, use that header in preference to the old
00215         // XDODS-Server header. jhrg 2/7/06
00216         else if (name == "xdods-server:" && server == "dods/0.0") {
00217             string value;
00218             line >> value;
00219             downcase(value);
00220             DBG2(cout << name << ": " << value << endl);
00221             server = value;
00222         }
00223         else if (name == "xopendap-server:") {
00224             string value;
00225             line >> value;
00226             downcase(value);
00227             DBG2(cout << name << ": " << value << endl);
00228             server = value;
00229         }
00230         else if (name == "xdap:") {
00231             string value;
00232             line >> value;
00233             downcase(value);
00234             DBG2(cout << name << ": " << value << endl);
00235             protocol = value;
00236         }
00237         else if (server == "dods/0.0" && name == "server:") {
00238             string value;
00239             line >> value;
00240             downcase(value);
00241             DBG2(cout << name << ": " << value << endl);
00242             server = value;
00243         }
00244         else if (name == "location:") {
00245             string value;
00246             line >> value;
00247             DBG2(cout << name << ": " << value << endl);
00248             location = value;
00249         }
00250         else if (type == unknown_type && name == "content-type:"
00251                  && line.str().find("text/html") != string::npos) {
00252             DBG2(cout << name << ": text/html..." << endl);
00253             type = web_error;
00254         }
00255     }
00256 
00257     ObjectType get_object_type()
00258     {
00259         return type;
00260     }
00261 
00262     string get_server()
00263     {
00264         return server;
00265     }
00266 
00267     string get_protocol()
00268     {
00269         return protocol;
00270     }
00271 
00272     string get_location() {
00273            return location;
00274     }
00275 };
00276 
00293 static size_t
00294 save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs)
00295 {
00296     DBG2(cerr << "Inside the header parser." << endl);
00297     vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs);
00298 
00299     // Grab the header, minus the trailing newline. Or \r\n pair.
00300     string complete_line;
00301     if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r')
00302         complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
00303     else
00304         complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
00305 
00306     // Store all non-empty headers that are not HTTP status codes
00307     if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
00308         DBG(cerr << "Header line: " << complete_line << endl);
00309         hdrs->push_back(complete_line);
00310     }
00311 
00312     return size * nmemb;
00313 }
00314 
00316 static int
00317 curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void  *)
00318 {
00319     string message(msg, size);
00320 
00321     switch (info) {
00322     case CURLINFO_TEXT:
00323         cerr << "Text: " << message; break;
00324     case CURLINFO_HEADER_IN:
00325         cerr << "Header in: " << message; break;
00326     case CURLINFO_HEADER_OUT:
00327         cerr << "Header out: " << message; break;
00328     case CURLINFO_DATA_IN:
00329         cerr << "Data in: " << message; break;
00330     case CURLINFO_DATA_OUT:
00331         cerr << "Data out: " << message; break;
00332     case CURLINFO_END:
00333         cerr << "End: " << message; break;
00334 #ifdef CURLINFO_SSL_DATA_IN
00335     case CURLINFO_SSL_DATA_IN:
00336         cerr << "SSL Data in: " << message; break;
00337 #endif
00338 #ifdef CURLINFO_SSL_DATA_OUT
00339     case CURLINFO_SSL_DATA_OUT:
00340         cerr << "SSL Data out: " << message; break;
00341 #endif
00342     default:
00343         cerr << "Curl info: " << message; break;
00344     }
00345     return 0;
00346 }
00347 
00351 void
00352 HTTPConnect::www_lib_init()
00353 {
00354     d_curl = curl_easy_init();
00355     if (!d_curl)
00356         throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl.");
00357 
00358     // Now set options that will remain constant for the duration of this
00359     // CURL object.
00360 
00361     // Set the proxy host.
00362     if (!d_rcr->get_proxy_server_host().empty()) {
00363         DBG(cerr << "Setting up a proxy server." << endl);
00364         DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host()
00365             << endl);
00366         DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port()
00367             << endl);
00368         DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw()
00369             << endl);
00370         curl_easy_setopt(d_curl, CURLOPT_PROXY,
00371                          d_rcr->get_proxy_server_host().c_str());
00372         curl_easy_setopt(d_curl, CURLOPT_PROXYPORT,
00373                          d_rcr->get_proxy_server_port());
00374 
00375         // As of 4/21/08 only NTLM, Digest and Basic work.
00376 #ifdef CURLOPT_PROXYAUTH
00377         curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (long)CURLAUTH_ANY);
00378 #endif
00379 
00380         // Password might not be required. 06/21/04 jhrg
00381         if (!d_rcr->get_proxy_server_userpw().empty())
00382             curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD,
00383                              d_rcr->get_proxy_server_userpw().c_str());
00384     }
00385 
00386     curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer);
00387     // We have to set FailOnError to false for any of the non-Basic
00388     // authentication schemes to work. 07/28/03 jhrg
00389     curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0);
00390 
00391     // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
00392     // choosing the the 'safest' one supported by the server.
00393     // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
00394     curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
00395 
00396     curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1);
00397     curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1);
00398     curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
00399     // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth
00400     // param of save_raw_http_headers to a vector<string> object.
00401 
00402     // Follow 302 (redirect) responses
00403     curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1);
00404     curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5);
00405 
00406     // If the user turns off SSL validation...
00407     if (!d_rcr->get_validate_ssl() == 0) {
00408         curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0);
00409         curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0);
00410     }
00411 
00412     // Look to see if cookies are turned on in the .dodsrc file. If so,
00413     // activate here. We honor 'session cookies' (cookies without an
00414     // expiration date) here so that session-base SSO systems will work as
00415     // expected.  
00416     if (!d_cookie_jar.empty()) {
00417         DBG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl);
00418         curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str());
00419         curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1);
00420     }
00421 
00422     if (www_trace) {
00423         cerr << "Curl version: " << curl_version() << endl;
00424         curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1);
00425         curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug);
00426     }
00427 }
00428 
00432 class BuildHeaders : public unary_function<const string &, void>
00433 {
00434     struct curl_slist *d_cl;
00435 
00436 public:
00437     BuildHeaders() : d_cl(0)
00438     {}
00439 
00440     void operator()(const string &header)
00441     {
00442         DBG(cerr << "Adding '" << header.c_str() << "' to the header list."
00443             << endl);
00444         d_cl = curl_slist_append(d_cl, header.c_str());
00445     }
00446 
00447     struct curl_slist *get_headers()
00448     {
00449         return d_cl;
00450     }
00451 };
00452 
00467 long
00468 HTTPConnect::read_url(const string &url, FILE *stream,
00469                       vector<string> *resp_hdrs,
00470                       const vector<string> *headers)
00471 {
00472     curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str());
00473 
00474 #ifdef WIN32
00475     //  See the curl documentation for CURLOPT_FILE (nka CURLOPT_WRITEDATA)
00476     //  and the CURLOPT_WRITEFUNCTION option.  Quote: "If you are using libcurl as
00477     //  a win32 DLL, you MUST use the CURLOPT_WRITEFUNCTION option if you set the
00478     //  CURLOPT_WRITEDATA option or you will experience crashes".  At the root of
00479     //  this issue is that one should not pass a FILE * to a windows DLL.  Close
00480     //  inspection of libcurl yields that their default write function when using
00481     //  the CURLOPT_WRITEDATA is just "fwrite".
00482     curl_easy_setopt(d_curl, CURLOPT_FILE, stream);
00483     curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite);
00484 #else
00485     curl_easy_setopt(d_curl, CURLOPT_FILE, stream);
00486 #endif
00487 
00488     DBG(copy(d_request_headers.begin(), d_request_headers.end(),
00489              ostream_iterator<string>(cerr, "\n")));
00490 
00491     BuildHeaders req_hdrs;
00492     req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(),
00493                         req_hdrs);
00494     if (headers)
00495         req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs);
00496     curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
00497 
00498     if (d_accept_deflate)
00499         curl_easy_setopt(d_curl, CURLOPT_ENCODING, "deflate");
00500 
00501     // Turn off the proxy for this URL?
00502     bool temporary_proxy = false;
00503     if ((temporary_proxy = url_uses_no_proxy_for(url))) {
00504         DBG(cerr << "Suppress proxy for url: " << url << endl);
00505         curl_easy_setopt(d_curl, CURLOPT_PROXY, 0);
00506     }
00507 
00508     string::size_type at_sign = url.find('@');
00509     // Assume username:password present *and* assume it's an HTTP URL; it *is*
00510     // HTTPConnect, after all. 7 is position after "http://"; the second arg
00511     // to substr() is the sub string length.
00512     if (at_sign != url.npos)
00513         d_upstring = url.substr(7, at_sign - 7);
00514 
00515     if (!d_upstring.empty())
00516         curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str());
00517 
00518     // Pass save_raw_http_headers() a pointer to the vector<string> where the
00519     // response headers may be stored. Callers can use the resp_hdrs
00520     // value/result parameter to get the raw response header information .
00521     curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs);
00522 
00523     CURLcode res = curl_easy_perform(d_curl);
00524 
00525     // Free the header list and null the value in d_curl.
00526     curl_slist_free_all(req_hdrs.get_headers());
00527     curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0);
00528 
00529     // Reset the proxy?
00530     if (temporary_proxy && !d_rcr->get_proxy_server_host().empty())
00531         curl_easy_setopt(d_curl, CURLOPT_PROXY,
00532                          d_rcr->get_proxy_server_host().c_str());
00533 
00534     if (res != 0)
00535         throw Error(d_error_buffer);
00536 
00537     long status;
00538     res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status);
00539     if (res != 0)
00540         throw Error(d_error_buffer);
00541 
00542     return status;
00543 }
00544 
00548 bool
00549 HTTPConnect::url_uses_proxy_for(const string &url) throw()
00550 {
00551     if (d_rcr->is_proxy_for_used()) {
00552         Regex host_regex(d_rcr->get_proxy_for_regexp().c_str());
00553         int index = 0, matchlen;
00554         return host_regex.search(url.c_str(), url.size(), matchlen, index)
00555                != -1;
00556     }
00557 
00558     return false;
00559 }
00560 
00564 bool
00565 HTTPConnect::url_uses_no_proxy_for(const string &url) throw()
00566 {
00567     return d_rcr->is_no_proxy_for_used()
00568            && url.find(d_rcr->get_no_proxy_for_host()) != string::npos;
00569 }
00570 
00571 // Public methods. Mostly...
00572 
00579 HTTPConnect::HTTPConnect(RCReader *rcr) : d_username(""), d_password(""),
00580                                           d_cookie_jar(""),
00581                                           d_dap_client_protocol_major(2), 
00582                                           d_dap_client_protocol_minor(0)
00583 
00584 {
00585     d_accept_deflate = rcr->get_deflate();
00586     d_rcr = rcr;
00587 
00588     // Load in the default headers to send with a request. The empty Pragma
00589     // headers overrides libcurl's default Pragma: no-cache header (which
00590     // will disable caching by Squid, et c.). The User-Agent header helps
00591     // make server logs more readable. 05/05/03 jhrg
00592     d_request_headers.push_back(string("Pragma:"));
00593     string user_agent = string("User-Agent: ") + string(CNAME)
00594                         + string("/") + string(CVER);
00595     d_request_headers.push_back(user_agent);
00596     if (d_accept_deflate)
00597         d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
00598 
00599     // HTTPCache::instance returns a valid ptr or 0.
00600     if (d_rcr->get_use_cache())
00601         d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(),
00602                                            true);
00603     else
00604         d_http_cache = 0;
00605 
00606     DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec
00607          << ")" << endl);
00608 
00609     if (d_http_cache) {
00610         d_http_cache->set_cache_enabled(d_rcr->get_use_cache());
00611         d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0);
00612         d_http_cache->set_max_size(d_rcr->get_max_cache_size());
00613         d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj());
00614         d_http_cache->set_default_expiration(d_rcr->get_default_expires());
00615         d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0);
00616     }
00617 
00618     d_cookie_jar = rcr->get_cookie_jar();
00619 
00620     www_lib_init();  // This may throw either Error or InternalErr
00621 }
00622 
00623 HTTPConnect::~HTTPConnect()
00624 {
00625     DBG2(cerr << "Entering the HTTPConnect dtor" << endl);
00626 
00627     curl_easy_cleanup(d_curl);
00628 
00629     DBG2(cerr << "Leaving the HTTPConnect dtor" << endl);
00630 }
00631 
00644 HTTPResponse *
00645 HTTPConnect::fetch_url(const string &url)
00646 {
00647 #ifdef HTTP_TRACE
00648     cout << "GET " << url << " HTTP/1.0" << endl;
00649 #endif
00650 
00651     HTTPResponse *stream;
00652 
00653     if (d_http_cache && d_http_cache->is_cache_enabled()) {
00654         stream = caching_fetch_url(url);
00655     }
00656     else {
00657         stream = plain_fetch_url(url);
00658     }
00659 
00660 #ifdef HTTP_TRACE
00661     stringstream ss;
00662     ss << "HTTP/1.0 " << stream->get_status() << " -" << endl;
00663     for (size_t i = 0; i < stream->get_headers()->size(); i++) {
00664         ss << stream->get_headers()->at(i) << endl;
00665     }
00666     cout << ss.str();
00667 #endif
00668 
00669     ParseHeader parser;
00670 
00671     parser = for_each(stream->get_headers()->begin(),
00672                       stream->get_headers()->end(), ParseHeader());
00673 
00674 #ifdef HTTP_TRACE
00675     cout << endl << endl;
00676 #endif
00677 
00678     // handle redirection case (2007-04-27, gaffigan@sfos.uaf.edu)
00679     if (parser.get_location() != "" &&
00680         url.substr(0,url.find("?",0)).compare(parser.get_location().substr(0,url.find("?",0))) != 0) {
00681            return fetch_url(parser.get_location());
00682     }
00683 
00684     stream->set_type(parser.get_object_type());
00685     stream->set_version(parser.get_server());
00686     stream->set_protocol(parser.get_protocol());
00687 
00688     return stream;
00689 }
00690 
00691 // Look around for a reasonable place to put a temporary file. Check first
00692 // the value of the TMPDIR env var. If that does not yeild a path that's
00693 // writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as
00694 // defined in stdio.h. If both come up empty, then use `./'.
00695 //
00696 // This function allocates storage using new. The caller must delete the char
00697 // array.
00698 
00699 // Change this to a version that either returns a string or an open file
00700 // descriptor. Use information from https://buildsecurityin.us-cert.gov/
00701 // (see open()) to make it more secure. Ideal solution: get deserialize()
00702 // methods to read from a stream returned by libcurl, not from a temporary
00703 // file. 9/21/07 jhrg
00704 static char *
00705 get_tempfile_template(char *file_template)
00706 {
00707     char *c;
00708 
00709 #ifdef WIN32
00710     // whitelist for a WIN32 directory
00711     Regex directory("[-a-zA-Z0-9_\\]*");
00712 
00713     c = getenv("TEMP");
00714     if (c && directory.match(c, strlen(c)) && (access(getenv("TEMP"), 6) == 0))
00715         goto valid_temp_directory;
00716 
00717     c= getenv("TMP");
00718     if (c && directory.match(c, strlen(c)) && (access(getenv("TEMP"), 6) == 0))
00719         goto valid_temp_directory;
00720 #else
00721         // whitelist for a directory
00722         Regex directory("[-a-zA-Z0-9_/]*");
00723 
00724         c = getenv("TMPDIR");
00725         if (c && directory.match(c, strlen(c)) && (access(c, W_OK | R_OK) == 0))
00726         goto valid_temp_directory;
00727 
00728 #ifdef P_tmpdir
00729         if (access(P_tmpdir, W_OK | R_OK) == 0) {
00730         c = P_tmpdir;
00731         goto valid_temp_directory;
00732         }
00733 #endif
00734 
00735 #endif  // WIN32
00736 
00737     c = ".";
00738 
00739 valid_temp_directory:
00740         // Sanitize allocation
00741         int size = strlen(c) + strlen(file_template) + 2;
00742         if (!size_ok(1, size))
00743                 throw Error("Bad temporary file name.");
00744 
00745     char *temp = new char[size];
00746     strncpy(temp, c, size-2);
00747     strcat(temp, "/");
00748 
00749     strcat(temp, file_template);
00750 
00751     return temp;
00752 }
00753 
00772 string
00773 get_temp_file(FILE *&stream) throw(InternalErr)
00774 {
00775     // get_tempfile_template() uses new, must call delete
00776     char *dods_temp = get_tempfile_template("dodsXXXXXX");
00777 
00778     // Open truncated for update. NB: mkstemp() returns a file descriptor.
00779 #if defined(WIN32) || defined(TEST_WIN32_TEMPS)
00780     stream = fopen(_mktemp(dods_temp), "w+b");
00781 #else
00782     stream = fdopen(mkstemp(dods_temp), "w+");
00783 #endif
00784 
00785     if (!stream)
00786         throw InternalErr("I/O Error: Failed to open a temporary file for the data values.");
00787 
00788     string dods_temp_s = dods_temp;
00789     delete[] dods_temp; dods_temp = 0;
00790 
00791     return dods_temp_s;
00792 }
00793 
00795 void
00796 close_temp(FILE *s, const string &name)
00797 {
00798     int res = fclose(s);
00799     if (res)
00800         DBG(cerr << "Failed to close " << (void *)s << endl);
00801 
00802     unlink(name.c_str());
00803 }
00804 
00826 HTTPResponse *
00827 HTTPConnect::caching_fetch_url(const string &url)
00828 {
00829     DBG(cerr << "Is this URL (" << url << ") in the cache?... ");
00830 
00831     vector<string> *headers = new vector<string> ;
00832     FILE *s = d_http_cache->get_cached_response(url, *headers);
00833     if (!s) {
00834         // url not in cache; get it and cache it
00835         DBGN(cerr << "no; getting response and caching." << endl);
00836         time_t now = time(0);
00837         HTTPResponse *rs = plain_fetch_url(url);
00838         d_http_cache->cache_response(url, now, *(rs->get_headers()),
00839                 rs->get_stream());
00840 
00841         return rs;
00842     }
00843     else { // url in cache
00844         DBGN(cerr << "yes... ");
00845 
00846         if (d_http_cache->is_url_valid(url)) { // url in cache and valid
00847             DBGN(cerr << "and it's valid; using cached response." << endl);
00848             HTTPCacheResponse *crs =
00849                     new HTTPCacheResponse(s, 200, headers, d_http_cache);
00850             return crs;
00851         }
00852         else { // url in cache but not valid; validate
00853             DBGN(cerr << "but it's not valid; validating... ");
00854 
00855             d_http_cache->release_cached_response(s);
00856 
00857             vector<string> *resp_hdrs = new vector<string> ;
00858             vector<string> cond_hdrs =
00859                     d_http_cache->get_conditional_request_headers(url);
00860             FILE *body = 0;
00861             string dods_temp = get_temp_file(body);
00862             time_t now = time(0); // When was the request made (now).
00863             long http_status;
00864 
00865             try {
00866                 http_status = read_url(url, body, resp_hdrs, &cond_hdrs);
00867                 rewind(body);
00868             }
00869             catch (Error &e) {
00870                 close_temp(body, dods_temp);
00871                 throw ;
00872             }
00873 
00874             switch (http_status) {
00875                 case 200: { // New headers and new body
00876                     DBGN(cerr << "read a new response; caching." << endl);
00877 
00878                     d_http_cache->cache_response(url, now, *resp_hdrs, body);
00879                     HTTPResponse *rs = new HTTPResponse(body, http_status, resp_hdrs,
00880                             dods_temp);
00881 
00882                     return rs;
00883                 }
00884 
00885                 case 304: { // Just new headers, use cached body
00886                     DBGN(cerr << "cached response valid; updating." << endl);
00887 
00888                     close_temp(body, dods_temp);
00889                     d_http_cache->update_response(url, now, *resp_hdrs);
00890 
00891                     vector<string> *headers = new vector<string>;
00892                     FILE *hs = d_http_cache->get_cached_response(url, *headers);
00893                     HTTPCacheResponse *crs = new HTTPCacheResponse(hs, 304, headers, d_http_cache);
00894                     return crs;
00895                 }
00896 
00897                 default: { // Oops.
00898                     close_temp(body, dods_temp);
00899                     if (http_status >= 400) {
00900                         string msg = "Error while reading the URL: ";
00901                         msg += url;
00902                         msg
00903                         += ".\nThe OPeNDAP server returned the following message:\n";
00904                         msg += http_status_to_string(http_status);
00905                         throw Error(msg);
00906                     }
00907                     else {
00908                         throw InternalErr(__FILE__, __LINE__,
00909                                 "Bad response from the HTTP server: " + long_to_string(http_status));
00910                     }
00911                 }
00912             }
00913         }
00914     }
00915 
00916     throw InternalErr(__FILE__, __LINE__, "Should never get here");
00917 }
00918 
00930 HTTPResponse *
00931 HTTPConnect::plain_fetch_url(const string &url)
00932 {
00933     DBG(cerr << "Getting URL: " << url << endl);
00934     FILE *stream = 0;
00935     string dods_temp = get_temp_file(stream);
00936     vector<string> *resp_hdrs = new vector<string>;
00937 
00938     int status = -1;
00939     try {
00940         status = read_url(url, stream, resp_hdrs); // Throws Error.
00941         if (status >= 400) {
00942             string msg = "Error while reading the URL: ";
00943             msg += url;
00944             msg += ".\nThe OPeNDAP server returned the following message:\n";
00945             msg += http_status_to_string(status);
00946             throw Error(msg);
00947         }
00948     }
00949 
00950     catch (Error &e) {
00951         close_temp(stream, dods_temp);
00952         throw e;
00953     }
00954 
00955     rewind(stream);
00956 
00957     return new HTTPResponse(stream, status, resp_hdrs, dods_temp);
00958 }
00959 
00971 void
00972 HTTPConnect::set_accept_deflate(bool deflate)
00973 {
00974     d_accept_deflate = deflate;
00975 
00976     if (d_accept_deflate) {
00977         if (find(d_request_headers.begin(), d_request_headers.end(),
00978                  "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end())
00979             d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
00980         DBG(copy(d_request_headers.begin(), d_request_headers.end(),
00981                  ostream_iterator<string>(cerr, "\n")));
00982     }
00983     else {
00984         vector<string>::iterator i;
00985         i = remove_if(d_request_headers.begin(), d_request_headers.end(),
00986                       bind2nd(equal_to<string>(),
00987                               string("Accept-Encoding: deflate, gzip, compress")));
00988         d_request_headers.erase(i, d_request_headers.end());
00989     }
00990 }
00991 
00993 class HeaderMatch : public unary_function<const string &, bool> {
00994     const string &d_header;
00995     public:
00996         HeaderMatch(const string &header) : d_header(header) {}
00997         bool operator()(const string &arg) { return arg.find(d_header) == 0; }
00998 };
00999 
01008 void
01009 HTTPConnect::set_xdap_protocol(int major, int minor)
01010 {
01011     // Look for, and remove if one exists, an XDAP-Accept header
01012     vector<string>::iterator i;
01013     i = find_if(d_request_headers.begin(), d_request_headers.end(),
01014                 HeaderMatch("XDAP-Accept:"));
01015     if (i != d_request_headers.end())
01016         d_request_headers.erase(i);
01017 
01018     // Record and add the new header value
01019     d_dap_client_protocol_major = major;
01020     d_dap_client_protocol_minor = minor;
01021     ostringstream xdap_accept;
01022     xdap_accept << "XDAP-Accept: " << major << "." << minor;
01023 
01024     d_request_headers.push_back(xdap_accept.str());
01025 
01026     DBG(copy(d_request_headers.begin(), d_request_headers.end(),
01027              ostream_iterator<string>(cerr, "\n")));
01028 }
01029 
01045 void
01046 HTTPConnect::set_credentials(const string &u, const string &p)
01047 {
01048     if (u.empty())
01049         return;
01050 
01051     // Store the credentials locally.
01052     d_username = u;
01053     d_password = p;
01054 
01055     d_upstring = u + ":" + p;
01056 }
01057 
01058 } // namespace libdap

Generated on Wed May 13 18:06:38 2009 for libdap++ by  doxygen 1.4.7