HTTPCache.cc

Go to the documentation of this file.
00001 
00002 // -*- mode: c++; c-basic-offset:4 -*-
00003 
00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
00005 // Access Protocol.
00006 
00007 // Copyright (c) 2002,2003 OPeNDAP, Inc.
00008 // Author: James Gallagher <jgallagher@opendap.org>
00009 //
00010 // This library is free software; you can redistribute it and/or
00011 // modify it under the terms of the GNU Lesser General Public
00012 // License as published by the Free Software Foundation; either
00013 // version 2.1 of the License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful,
00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00023 //
00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
00025 
00026 #include "config.h"
00027 
00028 #include <pthread.h>
00029 #include <limits.h>
00030 #include <unistd.h>   // for stat
00031 #include <sys/types.h>  // for stat and mkdir
00032 #include <sys/stat.h>
00033 
00034 #include <cstring>
00035 #include <iostream>
00036 #include <sstream>
00037 #include <algorithm>
00038 #include <iterator>
00039 #include <set>
00040 
00041 #include "Error.h"
00042 #include "InternalErr.h"
00043 #include "ResponseTooBigErr.h"
00044 #ifndef WIN32
00045 #include "SignalHandler.h"
00046 #endif
00047 #include "HTTPCacheInterruptHandler.h"
00048 #include "HTTPCache.h"
00049 
00050 #include "util_mit.h"
00051 #include "debug.h"
00052 
00053 using namespace std;
00054 
00055 namespace libdap {
00056 
00057 HTTPCache *HTTPCache::_instance = 0;
00058 
00059 // instance_mutex is used to ensure that only one instance is created. The
00060 // other mutexes used by this class are fields. 10/09/02 jhrg
00061 // Gcc (4.0.0) now complains about this saying that there are missing member
00062 // initializers for __kind, et cetera. jhrg 2/23/06
00063 // Now initialized in once_init_routice() called from the ctor.
00064 static pthread_mutex_t instance_mutex; //  = PTHREAD_MUTEX_INITIALIZER;
00065 static pthread_once_t once_block = PTHREAD_ONCE_INIT;
00066 
00067 #define LOCK(m) pthread_mutex_lock((m))
00068 #define TRYLOCK(m) pthread_mutex_trylock((m))
00069 #define UNLOCK(m) pthread_mutex_unlock((m))
00070 #define INIT(m) pthread_mutex_init((m), 0)
00071 #define DESTROY(m) pthread_mutex_destroy((m))
00072 
00073 #ifdef WIN32
00074 #include <direct.h>
00075 #include <time.h>
00076 #include <fcntl.h>
00077 #define MKDIR(a,b) _mkdir((a))
00078 #define REMOVE(a) remove((a))
00079 #define MKSTEMP(a) _open(_mktemp((a)),_O_CREAT,_S_IREAD|_S_IWRITE)
00080 #define DIR_SEPARATOR_CHAR '\\'
00081 #define DIR_SEPARATOR_STR "\\"
00082 #else
00083 #define MKDIR(a,b) mkdir((a), (b))
00084 #define REMOVE(a) remove((a))
00085 #define MKSTEMP(a) mkstemp((a))
00086 #define DIR_SEPARATOR_CHAR '/'
00087 #define DIR_SEPARATOR_STR "/"
00088 #endif
00089 
00090 #ifdef WIN32
00091 #define CACHE_LOC "\\tmp\\"
00092 #define CACHE_ROOT "dods-cache\\"
00093 #else
00094 #define CACHE_LOC "/tmp/"
00095 #define CACHE_ROOT "dods-cache/"
00096 #endif
00097 #define CACHE_INDEX ".index"
00098 #define CACHE_LOCK ".lock"
00099 #define CACHE_META ".meta"
00100 #define CACHE_EMPTY_ETAG "@cache@"

#define NO_LM_EXPIRATION 24*3600 // 24 hours
#define MAX_LM_EXPIRATION 48*3600 // Max expiration from LM

// If using LM to find the expiration then take 10% and no more than
// MAX_LM_EXPIRATION.
#ifndef LM_EXPIRATION
#define LM_EXPIRATION(t) (min((MAX_LM_EXPIRATION), static_cast<int>((t) / 10)))
#endif

#define DUMP_FREQUENCY 10 // Dump index every x loads

#define MEGA 0x100000L
#define CACHE_TOTAL_SIZE 20 // Default cache size is 20M
#define CACHE_FOLDER_PCT 10 // 10% of cache size for metainfo etc.
#define CACHE_GC_PCT 10  // 10% of cache size free after GC
#define MIN_CACHE_TOTAL_SIZE 5 // 5M Min cache size
#define MAX_CACHE_ENTRY_SIZE 3 // 3M Max size of single cached entry

00105 inline static int
get_hash(const string &url)
{
    int hash = 0;

    for (const char *ptr = url.c_str(); *ptr; ptr++)
        hash = (int)((hash * 3 + (*(unsigned char *)ptr)) % CACHE_TABLE_SIZE);

    return hash;
}

static void
once_init_routine()
{
    int status;
    status = INIT(&instance_mutex);

    if (status != 0)
        throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
00106 }
00107 
00122 HTTPCache::HTTPCache(string cache_root, bool force) throw(Error) :
00123         d_locked_open_file(0),
00124         d_cache_enabled(false),
00125         d_cache_protected(false),
00126         d_expire_ignored(false),
00127         d_always_validate(false),
00128         d_total_size(CACHE_TOTAL_SIZE * MEGA),
00129         d_folder_size(CACHE_TOTAL_SIZE / CACHE_FOLDER_PCT),
00130         d_gc_buffer(CACHE_TOTAL_SIZE / CACHE_GC_PCT),
00131         d_max_entry_size(MAX_CACHE_ENTRY_SIZE * MEGA),
00132         d_current_size(0),
00133         d_default_expiration(NO_LM_EXPIRATION),
00134         d_block_size(1),
00135         d_max_age(-1),
00136         d_max_stale(-1),
00137         d_min_fresh(-1),
00138         d_new_entries(0)
00139 {
00140     DBG(cerr << "Entering the constructor for " << this << "... ");
00141 
00142     int status = pthread_once(&once_block, once_init_routine);
00143     if (status != 0)
00144         throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
00145 
00146     INIT(&d_cache_mutex);
00147 
00148     // Initialize the cache table.
00149     for (int i = 0; i < CACHE_TABLE_SIZE; ++i)
00150         d_cache_table[i] = 0;
00151 
00152     // This used to throw an Error object if we could not get the
00153     // single user lock. However, that results in an invalid object. It's
00154     // better to have an instance that has default values. If we cannot get
00155     // the lock, make sure to set the cache as *disabled*. 03/12/03 jhrg
00156     //
00157     // I fixed this block so that the cache root is set before we try to get
00158     // the single user lock. That was the fix for bug #661. To make that
00159     // work, I had to move the call to create_cache_root out of
00160     // set_cache_root(). 09/08/03 jhrg
00161 
00162     set_cache_root(cache_root);
00163 
00164     if (get_single_user_lock(force)) {
00165 #ifdef WIN32
00166         //  Windows is unable to provide us this information.  4096 appears
00167         //  a best guess.  It is likely to be in the range [2048, 8192] on
00168         //  windows, but will the level of truth of that statement vary over
00169         //  time ?
00170         d_block_size = 4096;
00171 #else
00172         struct stat s;
00173         if (stat(cache_root.c_str(), &s) == 0)
00174             d_block_size = s.st_blksize;
00175         else
00176             throw Error("Could not set file system block size.");
00177 #endif
00178         cache_index_read();
00179         d_cache_enabled = true;
00180     }
00181 
00182     DBGN(cerr << "exiting" << endl);
00183 }
00184 
00213 HTTPCache *
00214 HTTPCache::instance(const string &cache_root, bool force)
00215 {
00216     LOCK(&instance_mutex);
00217     DBG(cerr << "Entering instance(); (" << hex << _instance << dec << ")"
00218         << "... ");
00219 
00220     try {
00221         if (!_instance) {
00222             _instance = new HTTPCache(cache_root, force);
00223 
00224             DBG(cerr << "New instance: " << _instance << ", cache root: "
00225                 << _instance->d_cache_root << endl);
00226 
00227             atexit(delete_instance);
00228 
00229 #ifndef WIN32
00230             // Register the interrupt handler. If we've already registered
00231             // one, barf. If this becomes a problem, hack SignalHandler so
00232             // that we can chain these handlers... 02/10/04 jhrg
00233             //
00234             // Technically we're leaking memory here. However, since this
00235             // class is a singleton, we know that only three objects will
00236             // ever be created and they will all exist until the process
00237             // exits. We can let this slide... 02/12/04 jhrg
00238             EventHandler *old_eh = SignalHandler::instance()->register_handler
00239                                    (SIGINT, new HTTPCacheInterruptHandler);
00240             if (old_eh) {
00241                 SignalHandler::instance()->register_handler(SIGINT, old_eh);
00242                 throw SignalHandlerRegisteredErr(
00243                     "Could not register event handler for SIGINT without superseding an existing one.");
00244             }
00245 
00246             old_eh = SignalHandler::instance()->register_handler
00247                      (SIGPIPE, new HTTPCacheInterruptHandler);
00248             if (old_eh) {
00249                 SignalHandler::instance()->register_handler(SIGPIPE, old_eh);
00250                 throw SignalHandlerRegisteredErr(
00251                     "Could not register event handler for SIGPIPE without superseding an existing one.");
00252             }
00253 
00254             old_eh = SignalHandler::instance()->register_handler
00255                      (SIGTERM, new HTTPCacheInterruptHandler);
00256             if (old_eh) {
00257                 SignalHandler::instance()->register_handler(SIGTERM, old_eh);
00258                 throw SignalHandlerRegisteredErr(
00259                     "Could not register event handler for SIGTERM without superseding an existing one.");
00260             }
00261 #endif
00262         }
00263     }
00264     catch (Error &e) {
00265         DBG2(cerr << "The constructor threw an Error!" << endl);
00266         UNLOCK(&instance_mutex);
00267         throw e;
00268     }
00269 
00270     UNLOCK(&instance_mutex);
00271     DBGN(cerr << "returning " << hex << _instance << dec << endl);
00272 
00273     return _instance;
00274 }
00275 
00279 void
00280 HTTPCache::delete_instance()
00281 {
00282     DBG(cerr << "Entering delete_instance()..." << endl);
00283     if (HTTPCache::_instance) {
00284         DBG(cerr << "Deleting the cache: " << HTTPCache::_instance << endl);
00285         delete HTTPCache::_instance;
00286         HTTPCache::_instance = 0;
00287     }
00288 
00289     DBG(cerr << "Exiting delete_instance()" << endl);
00290 }
00291 
00295 static inline void
00296 delete_cache_entry(HTTPCache::CacheEntry *e)
00297 {
00298     DBG2(cerr << "Deleting CacheEntry: " << e << endl);
00299     DESTROY(&e->lock);
00300     delete e;
00301 }
00302 
00315 HTTPCache::~HTTPCache()
00316 {
00317     DBG(cerr << "Entering the destructor for " << this << "... ");
00318 
00319     try {
00320         if (startGC())
00321             perform_garbage_collection();
00322 
00323         cache_index_write();
00324     }
00325     catch (Error &e) {
00326         // If the cache index cannot be written, we've got problems. However,
00327         // unless we're debugging, still free up the cache table in memory.
00328         // How should we let users know they cache index is not being
00329         // written?? 10/03/02 jhrg
00330         DBG(cerr << e.get_error_message() << endl);
00331     }
00332 
00333     // I don't see any code inside this try block that can throw an Error.
00334     // Nor do I see anything that can lock the interface. I'll leave this as
00335     // is, but I'm pretty sure this is left over from older code which called
00336     // perform_garbage_collection() in here and when that called
00337     // cache_index_write(). 01/23/04 jhrg
00338     try {
00339         for (int i = 0; i < CACHE_TABLE_SIZE; ++i) {
00340             CachePointers *cp = d_cache_table[i];
00341             if (cp) {
00342                 // delete each entry
00343                 for_each(cp->begin(), cp->end(), delete_cache_entry);
00344                 // now delete the vector that held the entries
00345                 DBG2(cerr << "Deleting d_cache_table[" << i << "]: "
00346                      << d_cache_table[i] << endl);
00347                 delete d_cache_table[i]; d_cache_table[i] = 0;
00348             }
00349         }
00350     }
00351     catch (Error &e) {
00352         DBG(cerr << "The constructor threw an Error!" << endl);
00353         DBGN(cerr << "Unlocking interface." << endl);
00354         UNLOCK(&d_cache_mutex);
00355         throw e;
00356     }
00357 
00358     release_single_user_lock();
00359 
00360     DBGN(cerr << "exiting destructor." << endl);
00361     DESTROY(&d_cache_mutex);
00362 }
00363 
00370 
00377 bool
00378 HTTPCache::cache_index_delete()
00379 {
00380     return (REMOVE(d_cache_index.c_str()) == 0);
00381 }
00382 
00391 bool
00392 HTTPCache::cache_index_read()
00393 {
00394     FILE *fp = fopen(d_cache_index.c_str(), "r");
00395     // If the cache index can't be opened that's OK; start with an empty
00396     // cache. 09/05/02 jhrg
00397     if (!fp) {
00398         return false;
00399     }
00400 
00401     char line[1024];
00402     while (!feof(fp) && fgets(line, 1024, fp)) {
00403         add_entry_to_cache_table(cache_index_parse_line(line));
00404         DBG2(cerr << line << endl);
00405     }
00406 
00407     int res = fclose(fp) ;
00408     if (res) {
00409         DBG(cerr << "HTTPCache::cache_index_read - Failed to close " << (void *)fp << endl ;) ;
00410     }
00411 
00412     return true;
00413 }
00414 
00422 HTTPCache::CacheEntry *
00423 HTTPCache::cache_index_parse_line(const char *line)
00424 {
00425     // Read the line and create the cache object
00426     CacheEntry *entry = new CacheEntry;
00427 
00428     INIT(&entry->lock);
00429     istringstream iss(line);
00430     iss >> entry->url;
00431     iss >> entry->cachename;
00432 
00433     iss >> entry->etag;
00434     if (entry->etag == CACHE_EMPTY_ETAG)
00435         entry->etag = "";
00436 
00437     iss >> entry->lm;
00438     iss >> entry->expires;
00439     iss >> entry->size;
00440 
00441     iss >> entry->range; // range is not used. 10/02/02 jhrg
00442 
00443     iss >> entry->hash;
00444     iss >> entry->hits;
00445     iss >> entry->freshness_lifetime;
00446     iss >> entry->response_time;
00447     iss >> entry->corrected_initial_age;
00448 
00449     iss >> entry->must_revalidate;
00450 
00451     return entry;
00452 }
00453 
00456 class WriteOneCacheEntry :
00457             public unary_function<HTTPCache::CacheEntry *, void>
00458 {
00459 
00460     FILE *d_fp;
00461 
00462 public:
00463     WriteOneCacheEntry(FILE *fp) : d_fp(fp)
00464     {}
00465 
00466     void operator()(HTTPCache::CacheEntry *e)
00467     {
00468         if (e && fprintf(d_fp,
00469                          "%s %s %s %ld %ld %ld %c %d %d %ld %ld %ld %c\r\n",
00470                          e->url.c_str(),
00471                          e->cachename.c_str(),
00472                          e->etag == "" ? CACHE_EMPTY_ETAG
00473                          : e->etag.c_str(),
00474                          (long)(e->lm),
00475                          (long)(e->expires),
00476                          e->size,
00477                          e->range ? '1' : '0', // not used. 10/02/02 jhrg
00478                          e->hash,
00479                          e->hits,
00480                          (long)(e->freshness_lifetime),
00481                          (long)(e->response_time),
00482                          (long)(e->corrected_initial_age),
00483                          e->must_revalidate ? '1' : '0') < 0)
00484             throw Error("Cache Index. Error writing cache index\n");
00485     }
00486 };
00487 
00497 void
00498 HTTPCache::cache_index_write()
00499 {
00500     DBG(cerr << "Cache Index. Writing index " << d_cache_index << endl);
00501 
00502     // Open the file for writing.
00503     FILE * fp = NULL;
00504     if ((fp = fopen(d_cache_index.c_str(), "wb")) == NULL) {
00505         throw Error(string("Cache Index. Can't open `") + d_cache_index
00506                     + string("' for writing"));
00507     }
00508 
00509     // Walk through the list and write it out. The format is really
00510     // simple as we keep it all in ASCII.
00511 
00512     for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00513         CachePointers *cp = d_cache_table[cnt];
00514         if (cp)
00515             for_each(cp->begin(), cp->end(), WriteOneCacheEntry(fp));
00516     }
00517 
00518     /* Done writing */
00519     int res = fclose(fp);
00520     if (res) {
00521         DBG(cerr << "HTTPCache::cache_index_write - Failed to close "
00522             << (void *)fp << endl ;) ;
00523     }
00524 
00525     d_new_entries = 0;
00526 }
00527 
00529 
00533 
00535 static inline int
00536 entry_disk_space(int size, unsigned int block_size)
00537 {
00538     unsigned int num_of_blocks = (size + block_size) / block_size;
00539     DBG(cerr << "size: " << size << ", block_size: " << block_size
00540         << ", num_of_blocks: " << num_of_blocks << endl);
00541 
00542     return num_of_blocks * block_size;
00543 }
00544 
00548 bool
00549 HTTPCache::stopGC() const
00550 {
00551     return (d_current_size + d_folder_size < d_total_size - d_gc_buffer);
00552 }
00553 
00560 bool
00561 HTTPCache::startGC() const
00562 {
00563     DBG(cerr << "startGC, current_size: " << d_current_size << endl);
00564     return (d_current_size + d_folder_size > d_total_size);
00565 }
00566 
00577 void
00578 HTTPCache::remove_cache_entry(CacheEntry *entry)
00579 {
00580     // This should never happen; all calls to this method are protected by
00581     // the caller, hence the InternalErr.
00582     if (entry->locked)
00583         throw InternalErr("Tried to delete a cache entry that is in use.");
00584 
00585     REMOVE(entry->cachename.c_str());
00586     REMOVE(string(entry->cachename + CACHE_META).c_str());
00587 
00588     DBG(cerr << "remove_cache_entry, current_size: " << d_current_size << endl);
00589     unsigned int esd = entry_disk_space(entry->size, d_block_size);
00590     d_current_size = (esd > d_current_size) ? 0 : d_current_size - esd;
00591 
00592     DBG(cerr << "remove_cache_entry, current_size: " << d_current_size << endl);
00593     DBG2(cerr << "Current size (after decrement): " << d_current_size << endl);
00594 
00595     DBG2(cerr << "Deleting CacheEntry: " << entry << endl);
00596     delete entry; entry = 0;
00597 }
00598 
00613 void
00614 HTTPCache::perform_garbage_collection()
00615 {
00616     DBG(cerr << "Performing garbage collection" << endl);
00617 
00618     // Remove all the expired responses.
00619     expired_gc();
00620 
00621     // Remove entries larger than max_entry_size. Also remove entries
00622     // starting with zero hits, 1, ..., until stopGC() returns true.
00623     hits_gc();
00624 }
00625 
00633 class DeleteExpired :
00634             public unary_function<HTTPCache::CacheEntry *&, void>
00635 {
00636     time_t d_time;
00637     HTTPCache *d_cache;
00638 
00639 public:
00640     DeleteExpired(HTTPCache *cache, time_t t) :
00641             d_time(t), d_cache(cache)
00642     {}
00643 
00644     void operator()(HTTPCache::CacheEntry *&e)
00645     {
00646         if (e && !e->locked
00647             && (e->freshness_lifetime
00648                 < (e->corrected_initial_age + (d_time - e->response_time)))) {
00649             DBG(cerr << "Deleting expired cache entry: " << e->url << endl);
00650             d_cache->remove_cache_entry(e);
00651             e = 0;
00652         }
00653     }
00654 };
00655 
00661 void
00662 HTTPCache::expired_gc()
00663 {
00664     if (!d_expire_ignored) {
00665         time_t now = time(0);
00666 
00667         // Walk through and delete all the expired entries.
00668         for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00669             CachePointers *slot = d_cache_table[cnt];
00670             if (slot) {
00671                 for_each(slot->begin(), slot->end(), DeleteExpired(this, now));
00672                 slot->erase(remove(slot->begin(), slot->end(),
00673                                    static_cast<CacheEntry *>(0)),
00674                             slot->end());
00675             }
00676         }
00677     }
00678 }
00679 
00686 class DeleteByHits :
00687             public unary_function<HTTPCache::CacheEntry *&, void>
00688 {
00689     HTTPCache *d_cache;
00690     int d_hits;
00691 
00692 public:
00693     DeleteByHits(HTTPCache *cache, int hits) :
00694             d_cache(cache), d_hits(hits)
00695     {}
00696 
00697     void operator()(HTTPCache::CacheEntry *&e)
00698     {
00699         if (d_cache->stopGC())
00700             return;
00701         if (e && !e->locked
00702             && (e->size > d_cache->d_max_entry_size || e->hits <= d_hits)) {
00703             DBG(cerr << "Deleting cache entry: " << e->url << endl);
00704             d_cache->remove_cache_entry(e);
00705             e = 0;
00706         }
00707     }
00708 };
00709 
00726 void
00727 HTTPCache::hits_gc()
00728 {
00729     int hits = 0;
00730 
00731     while (startGC()) {
00732         for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00733             if (d_cache_table[cnt]) {
00734                 CachePointers *slot = d_cache_table[cnt];
00735                 for_each(slot->begin(), slot->end(),
00736                          DeleteByHits(this, hits));
00737                 slot->erase(remove(slot->begin(), slot->end(),
00738                                    static_cast<CacheEntry*>(0)),
00739                             slot->end());
00740 
00741             }
00742         }
00743 
00744         hits++;
00745     }
00746 }
00747 
00749 
00753 
00762 void
00763 HTTPCache::add_entry_to_cache_table(HTTPCache::CacheEntry *entry)
00764 {
00765     int hash = entry->hash;
00766 
00767     if (!d_cache_table[hash]) {
00768         d_cache_table[hash] = new CachePointers;
00769         DBG2(cerr << "Allocated d_cache_table[" << hash << "]: "
00770              << d_cache_table[hash] << endl);
00771     }
00772 
00773     d_cache_table[hash]->push_back(entry);
00774     DBG2(cerr << "Pushing entry: " << entry << " onto d_cache_table["
00775          << hash << "]" << endl);
00776 
00777     DBG(cerr << "add_entry_to_cache_table, current_size: " << d_current_size
00778         << ", entry->size: " << entry->size << endl);
00779     d_current_size += entry_disk_space(entry->size, d_block_size);
00780     DBG(cerr << "add_entry_to_cache_table, current_size: " << d_current_size << endl);
00781     DBG2(cerr << "Current size (after increment): " << d_current_size << endl);
00782 }
00783 
00794 HTTPCache::CacheEntry *
00795 HTTPCache::get_entry_from_cache_table(int hash, const string &url) const
00796 {
00797     if (d_cache_table[hash]) {
00798         CachePointers *cp = d_cache_table[hash];
00799         for (CachePointersIter i = cp->begin(); i != cp->end(); ++i)
00800             // Must test *i because perform_garbage_collection may have
00801             // removed this entry; the CacheEntry will then be null.
00802             if ((*i) && (*i)->url == url)
00803                 return *i;
00804     }
00805 
00806     return 0;
00807 }
00808 
00812 class DeleteCacheEntry:
00813             public unary_function<HTTPCache::CacheEntry *&, void>
00814 {
00815 
00816     string d_url;
00817     HTTPCache *d_cache;
00818 
00819 public:
00820     DeleteCacheEntry(HTTPCache *c, const string &url)
00821             : d_url(url), d_cache(c)
00822     {}
00823 
00824     void operator()(HTTPCache::CacheEntry *&e)
00825     {
00826         if (e && !e->locked && e->url == d_url) {
00827             d_cache->remove_cache_entry(e);
00828             e = 0;
00829         }
00830     }
00831 };
00832 
00842 void
00843 HTTPCache::remove_entry_from_cache_table(const string &url)
00844 {
00845     int hash = get_hash(url);
00846     if (d_cache_table[hash]) {
00847         CachePointers *cp = d_cache_table[hash];
00848         for_each(cp->begin(), cp->end(), DeleteCacheEntry(this, url));
00849         cp->erase(remove(cp->begin(), cp->end(), static_cast<CacheEntry*>(0)),
00850                   cp->end());
00851     }
00852 }
00853 
00860 HTTPCache::CacheEntry *
00861 HTTPCache::get_entry_from_cache_table(const string &url) const
00862 {
00863     return get_entry_from_cache_table(get_hash(url), url);
00864 }
00865 
00867 
00876 void
00877 HTTPCache::create_cache_root(const string &cache_root)
00878 {
00879     struct stat stat_info;
00880     string::size_type cur = 0;
00881 
00882 #ifdef WIN32
00883     cur = cache_root[1] == ':' ? 3 : 1;
00884 #else
00885     cur = 1;
00886 #endif
00887     while ((cur = cache_root.find(DIR_SEPARATOR_CHAR, cur)) != string::npos) {
00888         string dir = cache_root.substr(0, cur);
00889         if (stat(dir.c_str(), &stat_info) == -1) {
00890             DBG2(cerr << "Cache....... Creating " << dir << endl);
00891             if (MKDIR(dir.c_str(), 0777) < 0) {
00892                 DBG2(cerr << "Error: can't create." << endl);
00893                 throw Error(string("Could not create the directory for the cache. Failed when building path at ") + dir + string("."));
00894             }
00895         }
00896         else {
00897             DBG2(cerr << "Cache....... Found " << dir << endl);
00898         }
00899         cur++;
00900     }
00901 }
00902 
00917 void
00918 HTTPCache::set_cache_root(const string &root)
00919 {
00920     if (root != "") {
00921         d_cache_root = root;
00922         // cache root should end in /.
00923         if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
00924             d_cache_root += DIR_SEPARATOR_CHAR;
00925     }
00926     else {
00927         // If no cache root has been indicated then look for a suitable
00928         // location.
00929         char * cr = (char *) getenv("DODS_CACHE");
00930         if (!cr) cr = (char *) getenv("TMP");
00931         if (!cr) cr = (char *) getenv("TEMP");
00932         if (!cr) cr = CACHE_LOC;
00933 
00934         d_cache_root = cr;
00935         if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
00936             d_cache_root += DIR_SEPARATOR_CHAR;
00937 
00938         d_cache_root += CACHE_ROOT;
00939     }
00940 
00941     d_cache_index = d_cache_root + CACHE_INDEX;
00942 }
00943 
00954 bool
00955 HTTPCache::get_single_user_lock(bool force)
00956 {
00957     if (!d_locked_open_file) {
00958         FILE * fp = NULL;
00959 
00960         try {
00961             // It's OK to call create_cache_root if the directory already
00962             // exists.
00963             create_cache_root(d_cache_root);
00964         }
00965         catch (Error &e) {
00966             // We need to catch and return false because this method is
00967             // called from a ctor and throwing at this point will result in a
00968             // partially constructed object. 01/22/04 jhrg
00969             return false;
00970         }
00971 
00972         string lock = d_cache_root + CACHE_LOCK;
00973     if ((fp = fopen(lock.c_str(), "r")) != NULL) {
00974             int res = fclose(fp);
00975             if (res) {
00976                 DBG(cerr << "HTTPCache::get_single_user_lock - Failed to close " << (void *)fp << endl ;) ;
00977             }
00978             if (force)
00979                 REMOVE(lock.c_str());
00980             else
00981                 return false;
00982         }
00983 
00984         if ((fp = fopen(lock.c_str(), "w")) == NULL)
00985             return false;
00986 
00987         d_locked_open_file = fp;
00988         return true;
00989     }
00990 
00991     return false;
00992 }
00993 
00996 void
00997 HTTPCache::release_single_user_lock()
00998 {
00999     if (d_locked_open_file) {
01000         int res = fclose(d_locked_open_file);
01001         if (res) {
01002             DBG(cerr << "HTTPCache::release_single_user_lock - Failed to close " << (void *)d_locked_open_file << endl ;) ;
01003         }
01004         d_locked_open_file = 0;
01005     }
01006 
01007     string lock = d_cache_root + CACHE_LOCK;
01008 REMOVE(lock.c_str());
01009 }
01010 
01013 
01017 string
01018 HTTPCache::get_cache_root() const
01019 {
01020     return d_cache_root;
01021 }
01022 
01034 void
01035 HTTPCache::set_cache_enabled(bool mode)
01036 {
01037     DBG(cerr << "Locking interface... ");
01038     LOCK(&d_cache_mutex);
01039 
01040     d_cache_enabled = mode;
01041 
01042     UNLOCK(&d_cache_mutex);
01043     DBGN(cerr << "Unlocking interface." << endl);
01044 }
01045 
01048 bool
01049 HTTPCache::is_cache_enabled() const
01050 {
01051     DBG2(cerr << "In HTTPCache::is_cache_enabled: (" << d_cache_enabled << ")"
01052          << endl);
01053     return d_cache_enabled;
01054 }
01055 
01065 void
01066 HTTPCache::set_cache_protected(bool mode)
01067 {
01068     DBG(cerr << "Locking interface... ");
01069     LOCK(&d_cache_mutex);
01070 
01071     d_cache_protected = mode;
01072 
01073     UNLOCK(&d_cache_mutex);
01074     DBGN(cerr << "Unlocking interface." << endl);
01075 }
01076 
01079 bool
01080 HTTPCache::is_cache_protected() const
01081 {
01082     return d_cache_protected;
01083 }
01084 
01095 void
01096 HTTPCache::set_cache_disconnected(CacheDisconnectedMode mode)
01097 {
01098     DBG(cerr << "Locking interface... ");
01099     LOCK(&d_cache_mutex);
01100 
01101     d_cache_disconnected = mode;
01102 
01103     UNLOCK(&d_cache_mutex);
01104     DBGN(cerr << "Unlocking interface." << endl);
01105 }
01106 
01109 CacheDisconnectedMode
01110 HTTPCache::get_cache_disconnected() const
01111 {
01112     return d_cache_disconnected;
01113 }
01114 
01123 void
01124 HTTPCache::set_expire_ignored(bool mode)
01125 {
01126     DBG(cerr << "Locking interface... ");
01127     LOCK(&d_cache_mutex);
01128 
01129     d_expire_ignored = mode;
01130 
01131     UNLOCK(&d_cache_mutex);
01132     DBGN(cerr << "Unlocking interface." << endl);
01133 }
01134 
01135 /* Is the cache ignoring Expires headers returned with responses that have
01136    been cached? */
01137 
01138 bool
01139 HTTPCache::is_expire_ignored() const
01140 {
01141     return d_expire_ignored;
01142 }
01143 
01159 void
01160 HTTPCache::set_max_size(unsigned long size)
01161 {
01162     DBG(cerr << "Locking interface... ");
01163     LOCK(&d_cache_mutex);
01164 
01165     try {
01166         unsigned long new_size = size < MIN_CACHE_TOTAL_SIZE ?
01167                                  MIN_CACHE_TOTAL_SIZE * MEGA :
01168                                  (size > ULONG_MAX ? ULONG_MAX : size * MEGA);
01169         unsigned long old_size = d_total_size;
01170         d_total_size = new_size;
01171         d_folder_size = d_total_size / CACHE_FOLDER_PCT;
01172         d_gc_buffer = d_total_size / CACHE_GC_PCT;
01173 
01174         if (new_size < old_size && startGC()) {
01175             perform_garbage_collection();
01176             cache_index_write();
01177         }
01178     }
01179     catch (Error &e) {
01180         UNLOCK(&d_cache_mutex);
01181         DBGN(cerr << "Unlocking interface." << endl);
01182         throw e;
01183     }
01184 
01185     DBG2(cerr << "Cache....... Total cache size: " << d_total_size
01186          << " with " << d_folder_size
01187          << " bytes for meta information and folders and at least "
01188          << d_gc_buffer << " bytes free after every gc" << endl);
01189 
01190     UNLOCK(&d_cache_mutex);
01191     DBGN(cerr << "Unlocking interface." << endl);
01192 }
01193 
01196 unsigned long
01197 HTTPCache::get_max_size() const
01198 {
01199     return d_total_size / MEGA;
01200 }
01201 
01210 void
01211 HTTPCache::set_max_entry_size(unsigned long size)
01212 {
01213     DBG(cerr << "Locking interface... ");
01214     LOCK(&d_cache_mutex);
01215 
01216     try {
01217         unsigned long new_size = size * MEGA;
01218         if (new_size > 0 && new_size < d_total_size - d_folder_size) {
01219             unsigned long old_size = d_max_entry_size;
01220             d_max_entry_size = new_size;
01221             if (new_size < old_size && startGC()) {
01222                 perform_garbage_collection();
01223                 cache_index_write();
01224             }
01225         }
01226     }
01227     catch (Error &e) {
01228         UNLOCK(&d_cache_mutex);
01229         DBGN(cerr << "Unlocking interface." << endl);
01230         throw e;
01231     }
01232 
01233     DBG2(cerr << "Cache...... Max entry cache size is "
01234          << d_max_entry_size << endl);
01235 
01236     UNLOCK(&d_cache_mutex);
01237     DBGN(cerr << "Unlocking interface." << endl);
01238 }
01239 
01244 unsigned long
01245 HTTPCache::get_max_entry_size() const
01246 {
01247     return d_max_entry_size / MEGA;
01248 }
01249 
01260 void
01261 HTTPCache::set_default_expiration(const int exp_time)
01262 {
01263     DBG(cerr << "Locking interface... ");
01264     LOCK(&d_cache_mutex);
01265 
01266     d_default_expiration = exp_time;
01267 
01268     UNLOCK(&d_cache_mutex);
01269     DBGN(cerr << "Unlocking interface." << endl);
01270 }
01271 
01274 int
01275 HTTPCache::get_default_expiration() const
01276 {
01277     return d_default_expiration;
01278 }
01279 
01284 void
01285 HTTPCache::set_always_validate(bool validate)
01286 {
01287     d_always_validate = validate;
01288 }
01289 
01293 bool
01294 HTTPCache::get_always_validate() const
01295 {
01296     return d_always_validate;
01297 }
01298 
01315 void
01316 HTTPCache::set_cache_control(const vector<string> &cc)
01317 {
01318     DBG(cerr << "Locking interface... ");
01319     LOCK(&d_cache_mutex);
01320 
01321     try {
01322         d_cache_control = cc;
01323 
01324         vector<string>::const_iterator i;
01325         for (i = cc.begin(); i != cc.end(); ++i) {
01326             string header = (*i).substr(0, (*i).find(':'));
01327             string value = (*i).substr((*i).find(": ") + 2);
01328             if (header != "Cache-Control") {
01329                 throw InternalErr(__FILE__, __LINE__, "Expected cache control header not found.");
01330             }
01331             else {
01332                 if (value == "no-cache" || value == "no-store")
01333                     d_cache_enabled = false;
01334                 else if (value.find("max-age") != string::npos) {
01335                     string max_age = value.substr(value.find("=" + 1));
01336                     d_max_age = parse_time(max_age.c_str());
01337                 }
01338                 else if (value == "max-stale")
01339                     d_max_stale = 0; // indicates will take anything;
01340                 else if (value.find("max-stale") != string::npos) {
01341                     string max_stale = value.substr(value.find("=" + 1));
01342                     d_max_stale = parse_time(max_stale.c_str());
01343                 }
01344                 else if (value.find("min-fresh") != string::npos) {
01345                     string min_fresh = value.substr(value.find("=" + 1));
01346                     d_min_fresh = parse_time(min_fresh.c_str());
01347                 }
01348             }
01349         }
01350     }
01351     catch (Error &e) {
01352         UNLOCK(&d_cache_mutex);
01353         DBGN(cerr << "Unlocking interface." << endl);
01354         throw e;
01355     }
01356 
01357     UNLOCK(&d_cache_mutex);
01358     DBGN(cerr << "Unlocking interface." << endl);
01359 }
01360 
01361 
01368 vector<string>
01369 HTTPCache::get_cache_control()
01370 {
01371     return d_cache_control;
01372 }
01373 
01375 
01389 string
01390 HTTPCache::create_hash_directory(int hash)
01391 {
01392     struct stat stat_info;
01393     ostringstream path;
01394 
01395     path << d_cache_root << hash;
01396     string p = path.str();
01397 
01398     if (stat(p.c_str(), &stat_info) == -1) {
01399         DBG2(cerr << "Cache....... Create dir " << p << endl);
01400         if (MKDIR(p.c_str(), 0777) < 0) {
01401             DBG2(cerr << "Cache....... Can't create..." << endl);
01402             throw Error("Could not create cache slot to hold response! Check the write permissions on your disk cache directory. Cache root: " + d_cache_root + ".");
01403         }
01404     }
01405     else {
01406         DBG2(cerr << "Cache....... Directory " << p << " already exists"
01407              << endl);
01408     }
01409 
01410     return p;
01411 }
01412 
01427 void
01428 HTTPCache::create_location(CacheEntry *entry)
01429 {
01430     string hash_dir = create_hash_directory(entry->hash);
01431 #ifdef WIN32
01432     hash_dir += "\\dodsXXXXXX";
01433 #else
01434     hash_dir += "/dodsXXXXXX"; // mkstemp uses six characters.
01435 #endif
01436 
01437     // mkstemp uses the storage passed to it; must be writable and local.
01438     char *templat = new char[hash_dir.size() + 1];
01439     strcpy(templat, hash_dir.c_str());
01440 
01441     // Open truncated for update. NB: mkstemp() returns a file descriptor.
01442     // man mkstemp says "... The file is opened with the O_EXCL flag,
01443     // guaranteeing that when mkstemp returns successfully we are the only
01444     // user." 09/19/02 jhrg
01445     int fd = MKSTEMP(templat); // fd mode is 666 or 600 (Unix)
01446     if (fd < 0) {
01447         delete templat; templat = 0;
01448         close(fd);
01449         throw Error("The HTTP Cache could not create a file to hold the response; it will not be cached.");
01450     }
01451 
01452     entry->cachename = templat;
01453     delete[] templat; templat = 0;
01454     close(fd);
01455 }
01456 
01467 void
01468 HTTPCache::parse_headers(CacheEntry *entry, const vector<string> &headers)
01469 {
01470     if( !entry ) cerr << "NO ENTRY" << endl ;
01471     vector<string>::const_iterator i;
01472     for (i = headers.begin(); i != headers.end(); ++i) {
01473         // skip a blank header.
01474         if( (*i).empty() ) continue ;
01475 
01476         string::size_type colon = (*i).find(':');
01477 
01478         // skip a header with no colon in it.
01479         if( colon == string::npos ) continue ;
01480 
01481         string header = (*i).substr(0, (*i).find(':'));
01482         string value = (*i).substr((*i).find(": ") + 2);
01483         DBG2(cerr << "Header: " << header << endl);
01484         DBG2(cerr << "Value: " << value << endl);
01485 
01486         if (header == "ETag") {
01487             entry->etag = value;
01488         }
01489         else if (header == "Last-Modified") {
01490             entry->lm = parse_time(value.c_str());
01491         }
01492         else if (header == "Expires") {
01493             entry->expires = parse_time(value.c_str());
01494         }
01495         else if (header == "Date") {
01496             entry->date = parse_time(value.c_str());
01497         }
01498         else if (header == "Age") {
01499             entry->age = parse_time(value.c_str());
01500         }
01501         else if (header == "Content-Length") {
01502             unsigned long clength = strtoul(value.c_str(), 0, 0);
01503             if (clength > d_max_entry_size)
01504                 entry->no_cache = true;
01505         }
01506         else if (header == "Cache-Control") {
01507             // Ignored Cache-Control values: public, private, no-transform,
01508             // proxy-revalidate, s-max-age. These are used by shared caches.
01509             // See section 14.9 of RFC 2612. 10/02/02 jhrg
01510             if (value == "no-cache" || value == "no-store")
01511                 // Note that we *can* store a 'no-store' response in volatile
01512                 // memory according to RFC 2616 (section 14.9.2) but those
01513                 // will be rare coming from DAP servers. 10/02/02 jhrg
01514                 entry->no_cache = true;
01515             else if (value == "must-revalidate")
01516                 entry->must_revalidate = true;
01517             else if (value.find("max-age") != string::npos) {
01518                 string max_age = value.substr(value.find("=" + 1));
01519                 entry->max_age = parse_time(max_age.c_str());
01520             }
01521         }
01522     }
01523 }
01524 
01536 void
01537 HTTPCache::calculate_time(CacheEntry *entry, time_t request_time)
01538 {
01539     entry->response_time = time(NULL);
01540     time_t apparent_age
01541     = max(0, static_cast<int>(entry->response_time - entry->date));
01542     time_t corrected_received_age = max(apparent_age, entry->age);
01543     time_t response_delay = entry->response_time - request_time;
01544     entry->corrected_initial_age = corrected_received_age + response_delay;
01545 
01546     // Estimate an expires time using the max-age and expires time. If we
01547     // don't have an explicit expires time then set it to 10% of the LM date
01548     // (although max 24 h). If no LM date is available then use 24 hours.
01549     time_t freshness_lifetime = entry->max_age;
01550     if (freshness_lifetime < 0) {
01551         if (entry->expires < 0) {
01552             if (entry->lm < 0) {
01553                 freshness_lifetime = NO_LM_EXPIRATION;
01554             }
01555             else {
01556                 freshness_lifetime = LM_EXPIRATION(entry->date - entry->lm);
01557             }
01558         }
01559         else
01560             freshness_lifetime = entry->expires - entry->date;
01561     }
01562 
01563     entry->freshness_lifetime = max(0, static_cast<int>(freshness_lifetime));
01564 
01565     DBG2(cerr << "Cache....... Received Age " << entry->age
01566          << ", corrected " << entry->corrected_initial_age
01567          << ", freshness lifetime " << entry->freshness_lifetime << endl);
01568 }
01569 
01577 bool
01578 HTTPCache::is_url_in_cache(const string &url)
01579 {
01580     DBG(cerr << "Is this url in the cache? (" << url << ")" << endl);
01581 
01582     return get_entry_from_cache_table(url) != 0;
01583 }
01584 
01593 static inline bool
01594 is_hop_by_hop_header(const string &header)
01595 {
01596     return header.find("Connection") != string::npos
01597            || header.find("Keep-Alive") != string::npos
01598            || header.find("Proxy-Authenticate") != string::npos
01599            || header.find("Proxy-Authorization") != string::npos
01600            || header.find("Transfer-Encoding") != string::npos
01601            || header.find("Upgrade") != string::npos;
01602 }
01603 
01615 void
01616 HTTPCache::write_metadata(const string &cachename, const vector<string> &headers)
01617 {
01618     string fname = cachename + CACHE_META;
01619     d_open_files.push_back(fname);
01620 
01621     FILE *dest = fopen(fname.c_str(), "w");
01622     if (!dest) {
01623         throw InternalErr(__FILE__, __LINE__,
01624                           "Could not open named cache entry file.");
01625     }
01626 
01627     vector<string>::const_iterator i;
01628     for (i = headers.begin(); i != headers.end(); ++i) {
01629         if (!is_hop_by_hop_header(*i)) {
01630             fwrite((*i).c_str(), (*i).size(), 1, dest);
01631             fwrite("\n", 1, 1, dest);
01632         }
01633     }
01634 
01635     int res = fclose(dest);
01636     if (res) {
01637         DBG(cerr << "HTTPCache::write_metadata - Failed to close "
01638             << dest << endl);
01639     }
01640 
01641     d_open_files.pop_back();
01642 }
01643 
01654 void
01655 HTTPCache::read_metadata(const string &cachename, vector<string> &headers)
01656 {
01657     FILE *md = fopen(string(cachename + CACHE_META).c_str(), "r");
01658     if (!md) {
01659         throw InternalErr(__FILE__, __LINE__,
01660                           "Could not open named cache entry meta data file.");
01661     }
01662 
01663     char line[1024];
01664     while (!feof(md) && fgets(line, 1024, md)) {
01665         line[min(1024, static_cast<int>(strlen(line)))-1] = '\0'; // erase newline
01666         headers.push_back(string(line));
01667     }
01668 
01669     int res = fclose(md);
01670     if (res) {
01671         DBG(cerr << "HTTPCache::read_metadata - Failed to close "
01672             << md << endl);
01673     }
01674 }
01675 
01697 int
01698 HTTPCache::write_body(const string &cachename, const FILE *src)
01699 {
01700     d_open_files.push_back(cachename);
01701 
01702     FILE *dest = fopen(cachename.c_str(), "wb");
01703     if (!dest) {
01704         throw InternalErr(__FILE__, __LINE__,
01705                           "Could not open named cache entry file.");
01706     }
01707 
01708     // Read and write in 1k blocks; an attempt at doing this efficiently.
01709     // 09/30/02 jhrg
01710     char line[1024];
01711     size_t n;
01712     int total = 0;
01713     while ((n = fread(line, 1, 1024, const_cast<FILE *>(src))) > 0) {
01714         total += fwrite(line, 1, n, dest);
01715         DBG2(sleep(3));
01716 #if 0
01717         // See comment above. If this is uncommented, make sure to clean up
01718         // the partially written file when ResponseTooBirErr is throw.
01719         if (total > d_max_entry_size)
01720             throw ResponseTooBigErr("This response is too big to cache.");
01721 #endif
01722     }
01723 
01724     if (ferror(const_cast<FILE *>(src)) || ferror(dest)) {
01725         int res = fclose(dest);
01726         res = res & unlink(cachename.c_str());
01727         if (res) {
01728             DBG(cerr << "HTTPCache::write_body - Failed to close/unlink "
01729                 << dest << endl);
01730         }
01731         throw InternalErr(__FILE__, __LINE__,
01732                           "I/O error transferring data to the cache.");
01733     }
01734 
01735     rewind(const_cast<FILE *>(src));
01736 
01737     int res = fclose(dest);
01738     if (res) {
01739         DBG(cerr << "HTTPCache::write_body - Failed to close "
01740             << dest << endl);
01741     }
01742 
01743     d_open_files.pop_back();
01744 
01745     return total;
01746 }
01747 
01756 FILE *
01757 HTTPCache::open_body(const string &cachename)
01758 {
01759     FILE *src = fopen(cachename.c_str(), "r+b");
01760     if (!src) {
01761         throw InternalErr(__FILE__, __LINE__,
01762                           "Could not open named cache entry file.");
01763     }
01764 
01765     return src;
01766 }
01767 
01793 bool
01794 HTTPCache::cache_response(const string &url, time_t request_time,
01795                           const vector<string> &headers, const FILE *body)
01796 {
01797     DBG(cerr << "Locking interface... ");
01798     LOCK(&d_cache_mutex);
01799 
01800     DBG(cerr << "Caching url: " << url << "." << endl);
01801 
01802     try {
01803         // If this is not an http or https URL, don't cache.
01804         if (url.find("http:") == string::npos &&
01805             url.find("https:") == string::npos) {
01806             UNLOCK(&d_cache_mutex);
01807             DBGN(cerr << "Unlocking interface." << endl);
01808             return false;
01809         }
01810 
01811         // This does nothing if url is not already in the cache. It's
01812         // more efficient to do this than to first check and see if the entry
01813         // exists. 10/10/02 jhrg
01814         remove_entry_from_cache_table(url);
01815 
01816         CacheEntry *entry = new CacheEntry;
01817 
01818         INIT(&entry->lock);
01819         entry->url = url;
01820         entry->hash = get_hash(url);
01821         entry->hits = 0;
01822 
01823         try {
01824             parse_headers(entry, headers); // etag, lm, date, age, expires, max_age.
01825             if (entry->no_cache) {
01826                 DBG(cerr << "Not cache-able; deleting CacheEntry: " << entry
01827                     << "(" << url << ")" << endl);
01828                 delete entry; entry = 0;
01829                 UNLOCK(&d_cache_mutex);
01830                 DBGN(cerr << "Unlocking interface." << endl);
01831                 return false;
01832             }
01833 
01834             // corrected_initial_age, freshness_lifetime, response_time.
01835             calculate_time(entry, request_time);
01836 
01837             create_location(entry); // cachename, cache_body_fd
01838             entry->size = write_body(entry->cachename, body);
01839             write_metadata(entry->cachename, headers);
01840         }
01841         catch (ResponseTooBigErr &e) {
01842             // Oops. Bummer. Clean up and exit.
01843             DBG(cerr << e.get_error_message() << endl);
01844             REMOVE(entry->cachename.c_str());
01845             REMOVE(string(entry->cachename + CACHE_META).c_str());
01846             DBG(cerr << "Too big; deleting CacheEntry: " << entry << "(" << url
01847                 << ")" << endl);
01848             delete entry; entry = 0;
01849             UNLOCK(&d_cache_mutex);
01850             DBGN(cerr << "Unlocking interface." << endl);
01851             return false;
01852         }
01853 
01854         entry->range = false; // not used. 10/02/02 jhrg
01855 
01856         add_entry_to_cache_table(entry);
01857 
01858         if (++d_new_entries > DUMP_FREQUENCY) {
01859             if (startGC())
01860                 perform_garbage_collection();
01861 
01862             cache_index_write(); // resets d_new_entries
01863         }
01864     }
01865     catch (Error &e) {
01866         UNLOCK(&d_cache_mutex);
01867         DBGN(cerr << "Unlocking interface." << endl);
01868         throw e;
01869     }
01870 
01871     UNLOCK(&d_cache_mutex);
01872     DBGN(cerr << "Unlocking interface." << endl);
01873 
01874     return true;
01875 }
01876 
01895 vector<string>
01896 HTTPCache::get_conditional_request_headers(const string &url)
01897 {
01898     DBG(cerr << "Locking interface... ");
01899     LOCK(&d_cache_mutex);
01900     CacheEntry *entry = 0;
01901     vector<string> headers;
01902 
01903     DBG(cerr << "Getting conditional request headers for " << url << endl);
01904 
01905     try {
01906         entry = get_entry_from_cache_table(url);
01907         if (!entry) {
01908             throw Error("There is no cache entry for the URL: " + url);
01909         }
01910 
01911         DBG(cerr << "Locking entry... ");
01912         LOCK(&entry->lock);
01913 
01914         if (entry->etag != "")
01915             headers.push_back(string("If-None-Match: ") + entry->etag);
01916 
01917         if (entry->lm > 0)
01918             headers.push_back(string("If-Modified-Since: ")
01919                               + date_time_str(&entry->lm));
01920         else if (entry->max_age > 0)
01921             headers.push_back(string("If-Modified-Since: ")
01922                               + date_time_str(&entry->max_age));
01923         else if (entry->expires > 0)
01924             headers.push_back(string("If-Modified-Since: ")
01925                               + date_time_str(&entry->expires));
01926 
01927         UNLOCK(&entry->lock);
01928         DBGN(cerr << "Unlocking entry" << endl);
01929         UNLOCK(&d_cache_mutex);
01930         DBGN(cerr << "Unlocking interface." << endl);
01931     }
01932     catch (Error &e) {
01933         UNLOCK(&d_cache_mutex);
01934         DBGN(cerr << "Unlocking interface." << endl);
01935         if (entry)
01936             UNLOCK(&entry->lock);
01937         DBGN(cerr << "Unlocking entry." << endl);
01938         throw e;
01939     }
01940 
01941     return headers;
01942 }
01943 
01947 struct HeaderLess: binary_function<const string&, const string&, bool>
01948 {
01949     bool operator()(const string &s1, const string &s2) const
01950     {
01951         return s1.substr(0, s1.find(':')) < s2.substr(0, s2.find(':'));
01952     }
01953 };
01954 
01968 void
01969 HTTPCache::update_response(const string &url, time_t request_time,
01970                            const vector<string> &headers)
01971 {
01972     DBG(cerr << "Locking interface... ");
01973     LOCK(&d_cache_mutex);
01974     CacheEntry *entry = 0;
01975 
01976     DBG(cerr << "Updating the response headers for: " << url << endl);
01977 
01978     try {
01979         entry = get_entry_from_cache_table(url);
01980         if (!entry)
01981             throw Error("There is no cache entry for the URL: " + url);
01982 
01983         DBG(cerr << "Locking entry... ");
01984         LOCK(&entry->lock);
01985 
01986         // Merge the new headers with the exiting CacheEntry object.
01987         parse_headers(entry, headers);
01988 
01989         // Update corrected_initial_age, freshness_lifetime, response_time.
01990         calculate_time(entry, request_time);
01991 
01992         // Merge the new headers with those in the persistent store. How:
01993         // Load the new headers into a set, then merge the old headers. Since
01994         // set<> ignores duplicates, old headers with the same name as a new
01995         // header will got into the bit bucket. Define a special compare
01996         // functor to make sure that headers are compared using only their
01997         // name and not their value too.
01998         set<string, HeaderLess> merged_headers;
01999 
02000         // Load in the new headers
02001         copy(headers.begin(), headers.end(),
02002              inserter(merged_headers, merged_headers.begin()));
02003 
02004         // Get the old headers and load them in.
02005         vector<string> old_headers;
02006         read_metadata(entry->cachename, old_headers);
02007         copy(old_headers.begin(), old_headers.end(),
02008              inserter(merged_headers, merged_headers.begin()));
02009 
02010         // Read the values back out. Use reverse iterators with back_inserter
02011         // to preserve header order. NB: vector<> does not support push_front
02012         // so we can't use front_inserter(). 01/09/03 jhrg
02013         vector<string> result;
02014         copy(merged_headers.rbegin(), merged_headers.rend(),
02015              back_inserter(result));
02016 
02017         // Store.
02018         write_metadata(entry->cachename, result);
02019     }
02020     catch (Error &e) {
02021         if (entry)
02022             UNLOCK(&entry->lock);
02023         DBGN(cerr << "Unlocking entry." << endl);
02024         UNLOCK(&d_cache_mutex);
02025         DBGN(cerr << "Unlocking interface." << endl);
02026         throw e;
02027     }
02028 
02029     UNLOCK(&entry->lock);
02030     DBGN(cerr << "Unlocking entry" << endl);
02031     UNLOCK(&d_cache_mutex);
02032     DBGN(cerr << "Unlocking interface." << endl);
02033 }
02034 
02046 bool
02047 HTTPCache::is_url_valid(const string &url)
02048 {
02049     DBG(cerr << "Locking interface... ");
02050     LOCK(&d_cache_mutex);
02051     bool freshness;
02052     CacheEntry *entry = 0;
02053 
02054     DBG(cerr << "Is this URL valid? (" << url << ")" << endl);
02055 
02056     try {
02057         if (d_always_validate) {
02058             UNLOCK(&d_cache_mutex);
02059             DBGN(cerr << "Unlocking interface." << endl);
02060             return false;  // force re-validation.
02061         }
02062 
02063         entry = get_entry_from_cache_table(url);
02064         if (!entry) {
02065             UNLOCK(&d_cache_mutex);
02066             DBGN(cerr << "Unlocking interface." << endl);
02067             throw Error("There is no cache entry for the URL: " + url);
02068         }
02069 
02070         DBG(cerr << "Locking entry... ");
02071         LOCK(&entry->lock);
02072 
02073         // If we supported range requests, we'd need code here to check if
02074         // there was only a partial response in the cache. 10/02/02 jhrg
02075 
02076         // In case this entry is of type "must-revalidate" then we consider it
02077         // invalid.
02078         if (entry->must_revalidate) {
02079             UNLOCK(&entry->lock);
02080             DBGN(cerr << "Unlocking entry" << endl);
02081             UNLOCK(&d_cache_mutex);
02082             DBGN(cerr << "Unlocking interface." << endl);
02083             return false;
02084         }
02085 
02086         time_t resident_time = time(NULL) - entry->response_time;
02087         time_t current_age = entry->corrected_initial_age + resident_time;
02088 
02089         // Check that the max-age, max-stale, and min-fresh directives
02090         // given in the request cache control header is followed.
02091         if (d_max_age >= 0 && current_age > d_max_age) {
02092             DBG(cerr << "Cache....... Max-age validation" << endl);
02093             UNLOCK(&entry->lock);
02094             DBGN(cerr << "Unlocking entry" << endl);
02095             UNLOCK(&d_cache_mutex);
02096             DBGN(cerr << "Unlocking interface." << endl);
02097             return false;
02098         }
02099         if (d_min_fresh >= 0
02100             && entry->freshness_lifetime < current_age + d_min_fresh) {
02101             DBG(cerr << "Cache....... Min-fresh validation" << endl);
02102             UNLOCK(&entry->lock);
02103             DBGN(cerr << "Unlocking entry" << endl);
02104             UNLOCK(&d_cache_mutex);
02105             DBGN(cerr << "Unlocking interface." << endl);
02106             return false;
02107         }
02108 
02109         freshness = (entry->freshness_lifetime
02110                      + (d_max_stale >= 0 ? d_max_stale : 0) > current_age);
02111     }
02112     catch (Error &e) {
02113         UNLOCK(&entry->lock);
02114         DBGN(cerr << "Unlocking entry." << endl);
02115         UNLOCK(&d_cache_mutex);
02116         DBGN(cerr << "Unlocking interface." << endl);
02117         throw e;
02118     }
02119 
02120     UNLOCK(&entry->lock);
02121     DBGN(cerr << "Unlocking entry" << endl);
02122     UNLOCK(&d_cache_mutex);
02123     DBGN(cerr << "Unlocking interface." << endl);
02124 
02125     return freshness;
02126 }
02127 
02153 FILE *
02154 HTTPCache::get_cached_response(const string &url, vector<string> &headers)
02155 {
02156     DBG(cerr << "Locking interface... ");
02157     LOCK(&d_cache_mutex);
02158     FILE *body;
02159     CacheEntry *entry = 0;
02160 
02161     DBG(cerr << "Getting the cached response for " << url << endl);
02162 
02163     try {
02164         entry = get_entry_from_cache_table(url);
02165         if (!entry)
02166             throw Error("There is no cache entry for the URL: " + url);
02167 
02168         read_metadata(entry->cachename, headers);
02169         DBG(cerr << "Headers just read from cache: " << endl);
02170         DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr, "\n")));
02171 
02172         body = open_body(entry->cachename);
02173 
02174         DBG(cerr << "Returning: " << url << " from the cache." << endl);
02175 
02176         entry->hits++;  // Mark hit
02177         entry->locked++; // lock entry
02178         d_locked_entries[body] = entry; // record lock, see release_cached_r...
02179         DBG(cerr << "Locking entry (non-blocking lock)... ");
02180         TRYLOCK(&entry->lock); // Needed for blocking lock; locked counts
02181     }
02182     catch (Error &e) {
02183         if (entry)
02184             UNLOCK(&entry->lock);
02185         DBGN(cerr << "Unlocking entry." << endl);
02186         UNLOCK(&d_cache_mutex);
02187         DBGN(cerr << "Unlocking interface." << endl);
02188         throw e;
02189     }
02190 
02191     UNLOCK(&d_cache_mutex);
02192     DBGN(cerr << "Unlocking interface." << endl);
02193 
02194     return body;
02195 }
02196 
02223 FILE *
02224 HTTPCache::get_cached_response( const string &url, vector<string> &headers,
02225                                 string &cacheName )
02226 {
02227     FILE *body = get_cached_response( url, headers ) ;
02228     if( body )
02229     {
02230         CacheEntry *entry = d_locked_entries[body];
02231         if( entry )
02232         {
02233             cacheName = entry->cachename ;
02234         }
02235     }
02236 
02237     return body ;
02238 }
02239 
02260 FILE *
02261 HTTPCache::get_cached_response_body(const string &url)
02262 {
02263     DBG(cerr << "Locking interface... ");
02264     LOCK(&d_cache_mutex);
02265     FILE *body;
02266     CacheEntry *entry = 0;
02267 
02268     try {
02269         entry = get_entry_from_cache_table(url);
02270 
02271         if (!entry) {
02272             UNLOCK(&d_cache_mutex);
02273             DBGN(cerr << "Unlocking interface." << endl);
02274             throw Error("There is no cache entry for the URL: " + url);
02275         }
02276 
02277         body = open_body(entry->cachename); // throws InternalErr
02278 
02279         DBG(cerr << "Returning body for: " << url << " from the cache."
02280             << endl);
02281 
02282         entry->hits++;  // Mark hit
02283         entry->locked++;  // lock entry
02284         d_locked_entries[body] = entry; // record lock, see release_cached_r...
02285         DBG(cerr << "Locking entry (non-blocking lock)... ");
02286         TRYLOCK(&entry->lock);
02287     }
02288     catch (Error &e) {
02289         if (entry)
02290             UNLOCK(&entry->lock);
02291         DBGN(cerr << "Unlocking entry." << endl);
02292         UNLOCK(&d_cache_mutex);
02293         DBGN(cerr << "Unlocking interface." << endl);
02294         throw e;
02295     }
02296 
02297     UNLOCK(&d_cache_mutex);
02298     DBGN(cerr << "Unlocking interface." << endl);
02299 
02300     return body;
02301 }
02302 
02315 void
02316 HTTPCache::release_cached_response(FILE *body)
02317 {
02318     DBG(cerr << "Locking interface... ");
02319     LOCK(&d_cache_mutex);
02320     CacheEntry *entry = 0;
02321 
02322     try {
02323         entry = d_locked_entries[body];
02324         if (!entry)
02325             throw Error("There is no cache entry for the response given.");
02326 
02327         entry->locked--;
02328         if (entry->locked == 0) {
02329             d_locked_entries.erase(body);
02330             UNLOCK(&entry->lock);
02331             DBG(cerr << "Unlocking entry " << hex << entry << dec << endl);
02332         }
02333 
02334         if (entry->locked < 0)
02335             throw Error("An unlocked entry was released");
02336     }
02337     catch (Error &e) {
02338         UNLOCK(&d_cache_mutex);
02339         DBGN(cerr << "Unlocking interface." << endl);
02340         throw e;
02341     }
02342 
02343     UNLOCK(&d_cache_mutex);
02344     DBGN(cerr << "Unlocking interface." << endl);
02345 }
02346 
02349 class DeleteUnlockedCacheEntry :
02350             public unary_function<HTTPCache::CacheEntry *&, void>
02351 {
02352 
02353     HTTPCache *d_cache;
02354 
02355 public:
02356     DeleteUnlockedCacheEntry(HTTPCache *c) : d_cache(c)
02357     {}
02358     void operator()(HTTPCache::CacheEntry *&e)
02359     {
02360         if (e) {
02361             d_cache->remove_cache_entry(e);
02362             e = 0;
02363         }
02364     }
02365 };
02366 
02379 void
02380 HTTPCache::purge_cache()
02381 {
02382     DBG(cerr << "Locking interface... ");
02383     LOCK(&d_cache_mutex);
02384     DBG(cerr << "Purging the cache." << endl);
02385 
02386     try {
02387         if (!d_locked_entries.empty()) {
02388             throw Error("Attempt to purge the cache with entries in use.");
02389         }
02390 
02391         // Walk through the cache table and, for every entry in the cache, delete
02392         // it on disk and in the cache table.
02393         for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
02394             CachePointers *slot = d_cache_table[cnt];
02395             if (slot) {
02396                 for_each(slot->begin(), slot->end(),
02397                          DeleteUnlockedCacheEntry(this));
02398                 slot->erase(remove(slot->begin(), slot->end(),
02399                                    static_cast<CacheEntry *>(0)),
02400                             slot->end());
02401             }
02402         }
02403 
02404         // Now delete the index itself.
02405         cache_index_delete();
02406     }
02407     catch (Error &e) {
02408         UNLOCK(&d_cache_mutex);
02409         DBGN(cerr << "Unlocking interface." << endl);
02410         throw e;
02411     }
02412 
02413     UNLOCK(&d_cache_mutex);
02414     DBGN(cerr << "Unlocking interface." << endl);
02415 }
02416 
02417 } // namespace libdap
02418 

Generated on Wed Mar 5 15:27:11 2008 for libdap++ by  doxygen 1.5.4