HTTPCache.cc

Go to the documentation of this file.
00001 
00002 // -*- mode: c++; c-basic-offset:4 -*-
00003 
00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
00005 // Access Protocol.
00006 
00007 // Copyright (c) 2002,2003 OPeNDAP, Inc.
00008 // Author: James Gallagher <jgallagher@opendap.org>
00009 //
00010 // This library is free software; you can redistribute it and/or
00011 // modify it under the terms of the GNU Lesser General Public
00012 // License as published by the Free Software Foundation; either
00013 // version 2.1 of the License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful,
00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00023 //
00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
00025 
00026 #include "config.h"
00027 
00028 #include <stdio.h>
00029 #include <pthread.h>
00030 
00031 #include <iostream>
00032 #include <sstream>
00033 #include <algorithm>
00034 #include <iterator>
00035 #include <set>
00036 
00037 #include "Error.h"
00038 #include "InternalErr.h"
00039 #include "ResponseTooBigErr.h"
00040 #ifndef WIN32
00041 #include "SignalHandler.h"
00042 #endif
00043 #include "HTTPCacheInterruptHandler.h"
00044 #include "HTTPCache.h"
00045 
00046 #include "util_mit.h"
00047 #include "debug.h"
00048 
00049 HTTPCache *HTTPCache::_instance = 0;
00050 
00051 using namespace std;
00052 
00053 // instance_mutex is used to ensure that only one instance is created. The
00054 // other mutexes used by this class are fields. 10/09/02 jhrg
00055 // Gcc (4.0.0) now complains about this saying that there are missing member
00056 // initializers for __kind, et cetera. jhrg 2/23/06
00057 // Now initialized in once_init_routice() called from the ctor.
00058 static pthread_mutex_t instance_mutex; //  = PTHREAD_MUTEX_INITIALIZER;
00059 static pthread_once_t once_block = PTHREAD_ONCE_INIT;
00060 
00061 #define LOCK(m) pthread_mutex_lock((m))
00062 #define TRYLOCK(m) pthread_mutex_trylock((m))
00063 #define UNLOCK(m) pthread_mutex_unlock((m))
00064 #define INIT(m) pthread_mutex_init((m), 0)
00065 #define DESTROY(m) pthread_mutex_destroy((m))
00066 
00067 #ifdef WIN32
00068 #include <direct.h>
00069 #include <time.h>
00070 #include <fcntl.h>
00071 #define MKDIR(a,b) _mkdir((a))
00072 #define REMOVE(a) remove((a))
00073 #define MKSTEMP(a) _open(_mktemp((a)),_O_CREAT,_S_IREAD|_S_IWRITE)
00074 #define DIR_SEPARATOR_CHAR '\\'
00075 #define DIR_SEPARATOR_STR "\\"
00076 #else
00077 #define MKDIR(a,b) mkdir((a), (b))
00078 #define REMOVE(a) remove((a))
00079 #define MKSTEMP(a) mkstemp((a))
00080 #define DIR_SEPARATOR_CHAR '/'
00081 #define DIR_SEPARATOR_STR "/"
00082 #endif
00083 
00084 #ifdef WIN32
00085 #define CACHE_LOC "\\tmp\\"
00086 #define CACHE_ROOT "dods-cache\\"
00087 #else
00088 #define CACHE_LOC "/tmp/"
00089 #define CACHE_ROOT "dods-cache/"
00090 #endif
00091 #define CACHE_INDEX ".index"
00092 #define CACHE_LOCK ".lock"
00093 #define CACHE_META ".meta"
00094 #define CACHE_EMPTY_ETAG "@cache@"

#define NO_LM_EXPIRATION 24*3600 // 24 hours
#define MAX_LM_EXPIRATION 48*3600 // Max expiration from LM

// If using LM to find the expiration then take 10% and no more than
// MAX_LM_EXPIRATION.
#ifndef LM_EXPIRATION
#define LM_EXPIRATION(t) (min((MAX_LM_EXPIRATION), static_cast<int>((t) / 10)))
#endif

#define DUMP_FREQUENCY 10 // Dump index every x loads

#define MEGA 0x100000L
#define CACHE_TOTAL_SIZE 20 // Default cache size is 20M
#define CACHE_FOLDER_PCT 10 // 10% of cache size for metainfo etc.
#define CACHE_GC_PCT 10  // 10% of cache size free after GC
#define MIN_CACHE_TOTAL_SIZE 5 // 5M Min cache size
#define MAX_CACHE_ENTRY_SIZE 3 // 3M Max size of single cached entry

00099 inline static int
get_hash(const string &url)
{
    int hash = 0;

    for (const char *ptr = url.c_str(); *ptr; ptr++)
        hash = (int)((hash * 3 + (*(unsigned char *)ptr)) % CACHE_TABLE_SIZE);

    return hash;
}

static void
once_init_routine()
{
    int status;
    status = INIT(&instance_mutex);

    if (status != 0)
        throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
00100 }
00101 
00116 HTTPCache::HTTPCache(string cache_root, bool force) throw(Error) :
00117         d_locked_open_file(0),
00118         d_cache_enabled(false),
00119         d_cache_protected(false),
00120         d_expire_ignored(false),
00121         d_always_validate(false),
00122         d_total_size(CACHE_TOTAL_SIZE * MEGA),
00123         d_folder_size(CACHE_TOTAL_SIZE / CACHE_FOLDER_PCT),
00124         d_gc_buffer(CACHE_TOTAL_SIZE / CACHE_GC_PCT),
00125         d_max_entry_size(MAX_CACHE_ENTRY_SIZE * MEGA),
00126         d_current_size(0),
00127         d_default_expiration(NO_LM_EXPIRATION),
00128         d_block_size(1),
00129         d_max_age(-1),
00130         d_max_stale(-1),
00131         d_min_fresh(-1),
00132         d_new_entries(0)
00133 {
00134     DBG(cerr << "Entering the constructor for " << this << "... ");
00135 
00136     int status = pthread_once(&once_block, once_init_routine);
00137     if (status != 0)
00138         throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
00139 
00140     INIT(&d_cache_mutex);
00141 
00142     // Initialize the cache table.
00143     for (int i = 0; i < CACHE_TABLE_SIZE; ++i)
00144         d_cache_table[i] = 0;
00145 
00146     // This used to throw an Error object if we could not get the
00147     // single user lock. However, that results in an invalid object. It's
00148     // better to have an instance that has default values. If we cannot get
00149     // the lock, make sure to set the cache as *disabled*. 03/12/03 jhrg
00150     //
00151     // I fixed this block so that the cache root is set before we try to get
00152     // the single user lock. That was the fix for bug #661. To make that
00153     // work, I had to move the call to create_cache_root out of
00154     // set_cache_root(). 09/08/03 jhrg
00155 
00156     set_cache_root(cache_root);
00157 
00158     if (get_single_user_lock(force)) {
00159 #ifdef WIN32
00160         //  Windows is unable to provide us this information.  4096 appears
00161         //  a best guess.  It is likely to be in the range [2048, 8192] on
00162         //  windows, but will the level of truth of that statement vary over
00163         //  time ?
00164         d_block_size = 4096;
00165 #else
00166         struct stat s;
00167         if (stat(cache_root.c_str(), &s) == 0)
00168             d_block_size = s.st_blksize;
00169         else
00170             throw Error("Could not set file system block size.");
00171 #endif
00172         cache_index_read();
00173         d_cache_enabled = true;
00174     }
00175 
00176     DBGN(cerr << "exiting" << endl);
00177 }
00178 
00207 HTTPCache *
00208 HTTPCache::instance(const string &cache_root, bool force)
00209 {
00210     LOCK(&instance_mutex);
00211     DBG(cerr << "Entering instance(); (" << hex << _instance << dec << ")"
00212         << "... ");
00213 
00214     try {
00215         if (!_instance) {
00216             _instance = new HTTPCache(cache_root, force);
00217 
00218             DBG(cerr << "New instance: " << _instance << ", cache root: "
00219                 << _instance->d_cache_root << endl);
00220 
00221             atexit(delete_instance);
00222 
00223 #ifndef WIN32
00224             // Register the interrupt handler. If we've already registered
00225             // one, barf. If this becomes a problem, hack SignalHandler so
00226             // that we can chain these handlers... 02/10/04 jhrg
00227             //
00228             // Technically we're leaking memory here. However, since this
00229             // class is a singleton, we know that only three objects will
00230             // ever be created and they will all exist until the process
00231             // exits. We can let this slide... 02/12/04 jhrg
00232             EventHandler *old_eh = SignalHandler::instance()->register_handler
00233                                    (SIGINT, new HTTPCacheInterruptHandler);
00234             if (old_eh) {
00235                 SignalHandler::instance()->register_handler(SIGINT, old_eh);
00236                 throw SignalHandlerRegisteredErr(
00237                     "Could not register event handler for SIGINT without superseding an existing one.");
00238             }
00239 
00240             old_eh = SignalHandler::instance()->register_handler
00241                      (SIGPIPE, new HTTPCacheInterruptHandler);
00242             if (old_eh) {
00243                 SignalHandler::instance()->register_handler(SIGPIPE, old_eh);
00244                 throw SignalHandlerRegisteredErr(
00245                     "Could not register event handler for SIGPIPE without superseding an existing one.");
00246             }
00247 
00248             old_eh = SignalHandler::instance()->register_handler
00249                      (SIGTERM, new HTTPCacheInterruptHandler);
00250             if (old_eh) {
00251                 SignalHandler::instance()->register_handler(SIGTERM, old_eh);
00252                 throw SignalHandlerRegisteredErr(
00253                     "Could not register event handler for SIGTERM without superseding an existing one.");
00254             }
00255 #endif
00256         }
00257     }
00258     catch (Error &e) {
00259         DBG2(cerr << "The constructor threw an Error!" << endl);
00260         UNLOCK(&instance_mutex);
00261         throw e;
00262     }
00263 
00264     UNLOCK(&instance_mutex);
00265     DBGN(cerr << "returning " << hex << _instance << dec << endl);
00266 
00267     return _instance;
00268 }
00269 
00273 void
00274 HTTPCache::delete_instance()
00275 {
00276     DBG(cerr << "Entering delete_instance()..." << endl);
00277     if (HTTPCache::_instance) {
00278         DBG(cerr << "Deleting the cache: " << HTTPCache::_instance << endl);
00279         delete HTTPCache::_instance;
00280         HTTPCache::_instance = 0;
00281     }
00282 
00283     DBG(cerr << "Exiting delete_instance()" << endl);
00284 }
00285 
00289 static inline void
00290 delete_cache_entry(HTTPCache::CacheEntry *e)
00291 {
00292     DBG2(cerr << "Deleting CacheEntry: " << e << endl);
00293     DESTROY(&e->lock);
00294     delete e;
00295 }
00296 
00309 HTTPCache::~HTTPCache()
00310 {
00311     DBG(cerr << "Entering the destructor for " << this << "... ");
00312 
00313     try {
00314         if (startGC())
00315             perform_garbage_collection();
00316 
00317         cache_index_write();
00318     }
00319     catch (Error &e) {
00320         // If the cache index cannot be written, we've got problems. However,
00321         // unless we're debugging, still free up the cache table in memory.
00322         // How should we let users know they cache index is not being
00323         // written?? 10/03/02 jhrg
00324         DBG(cerr << e.get_error_message() << endl);
00325     }
00326 
00327     // I don't see any code inside this try block that can throw an Error.
00328     // Nor do I see anything that can lock the interface. I'll leave this as
00329     // is, but I'm pretty sure this is left over from older code which called
00330     // perform_garbage_collection() in here and when that called
00331     // cache_index_write(). 01/23/04 jhrg
00332     try {
00333         for (int i = 0; i < CACHE_TABLE_SIZE; ++i) {
00334             CachePointers *cp = d_cache_table[i];
00335             if (cp) {
00336                 // delete each entry
00337                 for_each(cp->begin(), cp->end(), delete_cache_entry);
00338                 // now delete the vector that held the entries
00339                 DBG2(cerr << "Deleting d_cache_table[" << i << "]: "
00340                      << d_cache_table[i] << endl);
00341                 delete d_cache_table[i]; d_cache_table[i] = 0;
00342             }
00343         }
00344     }
00345     catch (Error &e) {
00346         DBG(cerr << "The constructor threw an Error!" << endl);
00347         DBGN(cerr << "Unlocking interface." << endl);
00348         UNLOCK(&d_cache_mutex);
00349         throw e;
00350     }
00351 
00352     release_single_user_lock();
00353 
00354     DBGN(cerr << "exiting destructor." << endl);
00355     DESTROY(&d_cache_mutex);
00356 }
00357 
00364 
00371 bool
00372 HTTPCache::cache_index_delete()
00373 {
00374     return (REMOVE(d_cache_index.c_str()) == 0);
00375 }
00376 
00385 bool
00386 HTTPCache::cache_index_read()
00387 {
00388     FILE *fp = fopen(d_cache_index.c_str(), "r");
00389     // If the cache index can't be opened that's OK; start with an empty
00390     // cache. 09/05/02 jhrg
00391     if (!fp) {
00392         return false;
00393     }
00394 
00395     char line[1024];
00396     while (!feof(fp) && fgets(line, 1024, fp)) {
00397         add_entry_to_cache_table(cache_index_parse_line(line));
00398         DBG2(cerr << line << endl);
00399     }
00400 
00401     int res = fclose(fp) ;
00402     if (res) {
00403         DBG(cerr << "HTTPCache::cache_index_read - Failed to close " << (void *)fp << endl ;) ;
00404     }
00405 
00406     return true;
00407 }
00408 
00416 HTTPCache::CacheEntry *
00417 HTTPCache::cache_index_parse_line(const char *line)
00418 {
00419     // Read the line and create the cache object
00420     CacheEntry *entry = new CacheEntry;
00421 
00422     INIT(&entry->lock);
00423     istringstream iss(line);
00424     iss >> entry->url;
00425     iss >> entry->cachename;
00426 
00427     iss >> entry->etag;
00428     if (entry->etag == CACHE_EMPTY_ETAG)
00429         entry->etag = "";
00430 
00431     iss >> entry->lm;
00432     iss >> entry->expires;
00433     iss >> entry->size;
00434 
00435     iss >> entry->range; // range is not used. 10/02/02 jhrg
00436 
00437     iss >> entry->hash;
00438     iss >> entry->hits;
00439     iss >> entry->freshness_lifetime;
00440     iss >> entry->response_time;
00441     iss >> entry->corrected_initial_age;
00442 
00443     iss >> entry->must_revalidate;
00444 
00445     return entry;
00446 }
00447 
00450 class WriteOneCacheEntry :
00451             public unary_function<HTTPCache::CacheEntry *, void>
00452 {
00453 
00454     FILE *d_fp;
00455 
00456 public:
00457     WriteOneCacheEntry(FILE *fp) : d_fp(fp)
00458     {}
00459 
00460     void operator()(HTTPCache::CacheEntry *e)
00461     {
00462         if (e && fprintf(d_fp,
00463                          "%s %s %s %ld %ld %ld %c %d %d %ld %ld %ld %c\r\n",
00464                          e->url.c_str(),
00465                          e->cachename.c_str(),
00466                          e->etag == "" ? CACHE_EMPTY_ETAG
00467                          : e->etag.c_str(),
00468                          (long)(e->lm),
00469                          (long)(e->expires),
00470                          e->size,
00471                          e->range ? '1' : '0', // not used. 10/02/02 jhrg
00472                          e->hash,
00473                          e->hits,
00474                          (long)(e->freshness_lifetime),
00475                          (long)(e->response_time),
00476                          (long)(e->corrected_initial_age),
00477                          e->must_revalidate ? '1' : '0') < 0)
00478             throw Error("Cache Index. Error writing cache index\n");
00479     }
00480 };
00481 
00491 void
00492 HTTPCache::cache_index_write()
00493 {
00494     DBG(cerr << "Cache Index. Writing index " << d_cache_index << endl);
00495 
00496     // Open the file for writing.
00497     FILE * fp = NULL;
00498     if ((fp = fopen(d_cache_index.c_str(), "wb")) == NULL) {
00499         throw Error(string("Cache Index. Can't open `") + d_cache_index
00500                     + string("' for writing"));
00501     }
00502 
00503     // Walk through the list and write it out. The format is really
00504     // simple as we keep it all in ASCII.
00505 
00506     for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00507         CachePointers *cp = d_cache_table[cnt];
00508         if (cp)
00509             for_each(cp->begin(), cp->end(), WriteOneCacheEntry(fp));
00510     }
00511 
00512     /* Done writing */
00513     int res = fclose(fp);
00514     if (res) {
00515         DBG(cerr << "HTTPCache::cache_index_write - Failed to close "
00516             << (void *)fp << endl ;) ;
00517     }
00518 
00519     d_new_entries = 0;
00520 }
00521 
00523 
00527 
00529 static inline int
00530 entry_disk_space(int size, unsigned int block_size)
00531 {
00532     unsigned int num_of_blocks = (size + block_size) / block_size;
00533     DBG(cerr << "size: " << size << ", block_size: " << block_size
00534         << ", num_of_blocks: " << num_of_blocks << endl);
00535 
00536     return num_of_blocks * block_size;
00537 }
00538 
00542 bool
00543 HTTPCache::stopGC() const
00544 {
00545     return (d_current_size + d_folder_size < d_total_size - d_gc_buffer);
00546 }
00547 
00554 bool
00555 HTTPCache::startGC() const
00556 {
00557     DBG(cerr << "startGC, current_size: " << d_current_size << endl);
00558     return (d_current_size + d_folder_size > d_total_size);
00559 }
00560 
00571 void
00572 HTTPCache::remove_cache_entry(CacheEntry *entry)
00573 {
00574     // This should never happen; all calls to this method are protected by
00575     // the caller, hence the InternalErr.
00576     if (entry->locked)
00577         throw InternalErr("Tried to delete a cache entry that is in use.");
00578 
00579     REMOVE(entry->cachename.c_str());
00580     REMOVE(string(entry->cachename + CACHE_META).c_str());
00581 
00582     DBG(cerr << "remove_cache_entry, current_size: " << d_current_size << endl);
00583     unsigned int esd = entry_disk_space(entry->size, d_block_size);
00584     d_current_size = (esd > d_current_size) ? 0 : d_current_size - esd;
00585 
00586     DBG(cerr << "remove_cache_entry, current_size: " << d_current_size << endl);
00587     DBG2(cerr << "Current size (after decrement): " << d_current_size << endl);
00588 
00589     DBG2(cerr << "Deleting CacheEntry: " << entry << endl);
00590     delete entry; entry = 0;
00591 }
00592 
00607 void
00608 HTTPCache::perform_garbage_collection()
00609 {
00610     DBG(cerr << "Performing garbage collection" << endl);
00611 
00612     // Remove all the expired responses.
00613     expired_gc();
00614 
00615     // Remove entries larger than max_entry_size. Also remove entries
00616     // starting with zero hits, 1, ..., until stopGC() returns true.
00617     hits_gc();
00618 }
00619 
00627 class DeleteExpired :
00628             public unary_function<HTTPCache::CacheEntry *&, void>
00629 {
00630     time_t d_time;
00631     HTTPCache *d_cache;
00632 
00633 public:
00634     DeleteExpired(HTTPCache *cache, time_t t) :
00635             d_time(t), d_cache(cache)
00636     {}
00637 
00638     void operator()(HTTPCache::CacheEntry *&e)
00639     {
00640         if (e && !e->locked
00641             && (e->freshness_lifetime
00642                 < (e->corrected_initial_age + (d_time - e->response_time)))) {
00643             DBG(cerr << "Deleting expired cache entry: " << e->url << endl);
00644             d_cache->remove_cache_entry(e);
00645             e = 0;
00646         }
00647     }
00648 };
00649 
00655 void
00656 HTTPCache::expired_gc()
00657 {
00658     if (!d_expire_ignored) {
00659         time_t now = time(0);
00660 
00661         // Walk through and delete all the expired entries.
00662         for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00663             CachePointers *slot = d_cache_table[cnt];
00664             if (slot) {
00665                 for_each(slot->begin(), slot->end(), DeleteExpired(this, now));
00666                 slot->erase(remove(slot->begin(), slot->end(),
00667                                    static_cast<CacheEntry *>(0)),
00668                             slot->end());
00669             }
00670         }
00671     }
00672 }
00673 
00680 class DeleteByHits :
00681             public unary_function<HTTPCache::CacheEntry *&, void>
00682 {
00683     HTTPCache *d_cache;
00684     int d_hits;
00685 
00686 public:
00687     DeleteByHits(HTTPCache *cache, int hits) :
00688             d_cache(cache), d_hits(hits)
00689     {}
00690 
00691     void operator()(HTTPCache::CacheEntry *&e)
00692     {
00693         if (d_cache->stopGC())
00694             return;
00695         if (e && !e->locked
00696             && (e->size > d_cache->d_max_entry_size || e->hits <= d_hits)) {
00697             DBG(cerr << "Deleting cache entry: " << e->url << endl);
00698             d_cache->remove_cache_entry(e);
00699             e = 0;
00700         }
00701     }
00702 };
00703 
00720 void
00721 HTTPCache::hits_gc()
00722 {
00723     int hits = 0;
00724 
00725     while (startGC()) {
00726         for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00727             if (d_cache_table[cnt]) {
00728                 CachePointers *slot = d_cache_table[cnt];
00729                 for_each(slot->begin(), slot->end(),
00730                          DeleteByHits(this, hits));
00731                 slot->erase(remove(slot->begin(), slot->end(),
00732                                    static_cast<CacheEntry*>(0)),
00733                             slot->end());
00734 
00735             }
00736         }
00737 
00738         hits++;
00739     }
00740 }
00741 
00743 
00747 
00756 void
00757 HTTPCache::add_entry_to_cache_table(HTTPCache::CacheEntry *entry)
00758 {
00759     int hash = entry->hash;
00760 
00761     if (!d_cache_table[hash]) {
00762         d_cache_table[hash] = new CachePointers;
00763         DBG2(cerr << "Allocated d_cache_table[" << hash << "]: "
00764              << d_cache_table[hash] << endl);
00765     }
00766 
00767     d_cache_table[hash]->push_back(entry);
00768     DBG2(cerr << "Pushing entry: " << entry << " onto d_cache_table["
00769          << hash << "]" << endl);
00770 
00771     DBG(cerr << "add_entry_to_cache_table, current_size: " << d_current_size
00772         << ", entry->size: " << entry->size << endl);
00773     d_current_size += entry_disk_space(entry->size, d_block_size);
00774     DBG(cerr << "add_entry_to_cache_table, current_size: " << d_current_size << endl);
00775     DBG2(cerr << "Current size (after increment): " << d_current_size << endl);
00776 }
00777 
00788 HTTPCache::CacheEntry *
00789 HTTPCache::get_entry_from_cache_table(int hash, const string &url) const
00790 {
00791     if (d_cache_table[hash]) {
00792         CachePointers *cp = d_cache_table[hash];
00793         for (CachePointersIter i = cp->begin(); i != cp->end(); ++i)
00794             // Must test *i because perform_garbage_collection may have
00795             // removed this entry; the CacheEntry will then be null.
00796             if ((*i) && (*i)->url == url)
00797                 return *i;
00798     }
00799 
00800     return 0;
00801 }
00802 
00806 class DeleteCacheEntry:
00807             public unary_function<HTTPCache::CacheEntry *&, void>
00808 {
00809 
00810     string d_url;
00811     HTTPCache *d_cache;
00812 
00813 public:
00814     DeleteCacheEntry(HTTPCache *c, const string &url)
00815             : d_url(url), d_cache(c)
00816     {}
00817 
00818     void operator()(HTTPCache::CacheEntry *&e)
00819     {
00820         if (e && !e->locked && e->url == d_url) {
00821             d_cache->remove_cache_entry(e);
00822             e = 0;
00823         }
00824     }
00825 };
00826 
00836 void
00837 HTTPCache::remove_entry_from_cache_table(const string &url)
00838 {
00839     int hash = get_hash(url);
00840     if (d_cache_table[hash]) {
00841         CachePointers *cp = d_cache_table[hash];
00842         for_each(cp->begin(), cp->end(), DeleteCacheEntry(this, url));
00843         cp->erase(remove(cp->begin(), cp->end(), static_cast<CacheEntry*>(0)),
00844                   cp->end());
00845     }
00846 }
00847 
00854 HTTPCache::CacheEntry *
00855 HTTPCache::get_entry_from_cache_table(const string &url) const
00856 {
00857     return get_entry_from_cache_table(get_hash(url), url);
00858 }
00859 
00861 
00870 void
00871 HTTPCache::create_cache_root(const string &cache_root)
00872 {
00873     struct stat stat_info;
00874     string::size_type cur = 0;
00875 
00876 #ifdef WIN32
00877     cur = cache_root[1] == ':' ? 3 : 1;
00878 #else
00879     cur = 1;
00880 #endif
00881     while ((cur = cache_root.find(DIR_SEPARATOR_CHAR, cur)) != string::npos) {
00882         string dir = cache_root.substr(0, cur);
00883         if (stat(dir.c_str(), &stat_info) == -1) {
00884             DBG2(cerr << "Cache....... Creating " << dir << endl);
00885             if (MKDIR(dir.c_str(), 0777) < 0) {
00886                 DBG2(cerr << "Error: can't create." << endl);
00887                 throw Error(string("Could not create the directory for the cache. Failed when building path at ") + dir + string("."));
00888             }
00889         }
00890         else {
00891             DBG2(cerr << "Cache....... Found " << dir << endl);
00892         }
00893         cur++;
00894     }
00895 }
00896 
00911 void
00912 HTTPCache::set_cache_root(const string &root)
00913 {
00914     if (root != "") {
00915         d_cache_root = root;
00916         // cache root should end in /.
00917         if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
00918             d_cache_root += DIR_SEPARATOR_CHAR;
00919     }
00920     else {
00921         // If no cache root has been indicated then look for a suitable
00922         // location.
00923         char * cr = (char *) getenv("DODS_CACHE");
00924         if (!cr) cr = (char *) getenv("TMP");
00925         if (!cr) cr = (char *) getenv("TEMP");
00926         if (!cr) cr = CACHE_LOC;
00927 
00928         d_cache_root = cr;
00929         if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
00930             d_cache_root += DIR_SEPARATOR_CHAR;
00931 
00932         d_cache_root += CACHE_ROOT;
00933     }
00934 
00935     d_cache_index = d_cache_root + CACHE_INDEX;
00936 }
00937 
00948 bool
00949 HTTPCache::get_single_user_lock(bool force)
00950 {
00951     if (!d_locked_open_file) {
00952         FILE * fp = NULL;
00953 
00954         try {
00955             // It's OK to call create_cache_root if the directory already
00956             // exists.
00957             create_cache_root(d_cache_root);
00958         }
00959         catch (Error &e) {
00960             // We need to catch and return false because this method is
00961             // called from a ctor and throwing at this point will result in a
00962             // partially constructed object. 01/22/04 jhrg
00963             return false;
00964         }
00965 
00966         string lock = d_cache_root + CACHE_LOCK;
00967     if ((fp = fopen(lock.c_str(), "r")) != NULL) {
00968             int res = fclose(fp);
00969             if (res) {
00970                 DBG(cerr << "HTTPCache::get_single_user_lock - Failed to close " << (void *)fp << endl ;) ;
00971             }
00972             if (force)
00973                 REMOVE(lock.c_str());
00974             else
00975                 return false;
00976         }
00977 
00978         if ((fp = fopen(lock.c_str(), "w")) == NULL)
00979             return false;
00980 
00981         d_locked_open_file = fp;
00982         return true;
00983     }
00984 
00985     return false;
00986 }
00987 
00990 void
00991 HTTPCache::release_single_user_lock()
00992 {
00993     if (d_locked_open_file) {
00994         int res = fclose(d_locked_open_file);
00995         if (res) {
00996             DBG(cerr << "HTTPCache::release_single_user_lock - Failed to close " << (void *)d_locked_open_file << endl ;) ;
00997         }
00998         d_locked_open_file = 0;
00999     }
01000 
01001     string lock = d_cache_root + CACHE_LOCK;
01002 REMOVE(lock.c_str());
01003 }
01004 
01007 
01011 string
01012 HTTPCache::get_cache_root() const
01013 {
01014     return d_cache_root;
01015 }
01016 
01028 void
01029 HTTPCache::set_cache_enabled(bool mode)
01030 {
01031     DBG(cerr << "Locking interface... ");
01032     LOCK(&d_cache_mutex);
01033 
01034     d_cache_enabled = mode;
01035 
01036     UNLOCK(&d_cache_mutex);
01037     DBGN(cerr << "Unlocking interface." << endl);
01038 }
01039 
01042 bool
01043 HTTPCache::is_cache_enabled() const
01044 {
01045     DBG2(cerr << "In HTTPCache::is_cache_enabled: (" << d_cache_enabled << ")"
01046          << endl);
01047     return d_cache_enabled;
01048 }
01049 
01059 void
01060 HTTPCache::set_cache_protected(bool mode)
01061 {
01062     DBG(cerr << "Locking interface... ");
01063     LOCK(&d_cache_mutex);
01064 
01065     d_cache_protected = mode;
01066 
01067     UNLOCK(&d_cache_mutex);
01068     DBGN(cerr << "Unlocking interface." << endl);
01069 }
01070 
01073 bool
01074 HTTPCache::is_cache_protected() const
01075 {
01076     return d_cache_protected;
01077 }
01078 
01089 void
01090 HTTPCache::set_cache_disconnected(CacheDisconnectedMode mode)
01091 {
01092     DBG(cerr << "Locking interface... ");
01093     LOCK(&d_cache_mutex);
01094 
01095     d_cache_disconnected = mode;
01096 
01097     UNLOCK(&d_cache_mutex);
01098     DBGN(cerr << "Unlocking interface." << endl);
01099 }
01100 
01103 CacheDisconnectedMode
01104 HTTPCache::get_cache_disconnected() const
01105 {
01106     return d_cache_disconnected;
01107 }
01108 
01117 void
01118 HTTPCache::set_expire_ignored(bool mode)
01119 {
01120     DBG(cerr << "Locking interface... ");
01121     LOCK(&d_cache_mutex);
01122 
01123     d_expire_ignored = mode;
01124 
01125     UNLOCK(&d_cache_mutex);
01126     DBGN(cerr << "Unlocking interface." << endl);
01127 }
01128 
01129 /* Is the cache ignoring Expires headers returned with responses that have
01130    been cached? */
01131 
01132 bool
01133 HTTPCache::is_expire_ignored() const
01134 {
01135     return d_expire_ignored;
01136 }
01137 
01153 void
01154 HTTPCache::set_max_size(unsigned long size)
01155 {
01156     DBG(cerr << "Locking interface... ");
01157     LOCK(&d_cache_mutex);
01158 
01159     try {
01160         unsigned long new_size = size < MIN_CACHE_TOTAL_SIZE ?
01161                                  MIN_CACHE_TOTAL_SIZE * MEGA :
01162                                  (size > ULONG_MAX ? ULONG_MAX : size * MEGA);
01163         unsigned long old_size = d_total_size;
01164         d_total_size = new_size;
01165         d_folder_size = d_total_size / CACHE_FOLDER_PCT;
01166         d_gc_buffer = d_total_size / CACHE_GC_PCT;
01167 
01168         if (new_size < old_size && startGC()) {
01169             perform_garbage_collection();
01170             cache_index_write();
01171         }
01172     }
01173     catch (Error &e) {
01174         UNLOCK(&d_cache_mutex);
01175         DBGN(cerr << "Unlocking interface." << endl);
01176         throw e;
01177     }
01178 
01179     DBG2(cerr << "Cache....... Total cache size: " << d_total_size
01180          << " with " << d_folder_size
01181          << " bytes for meta information and folders and at least "
01182          << d_gc_buffer << " bytes free after every gc" << endl);
01183 
01184     UNLOCK(&d_cache_mutex);
01185     DBGN(cerr << "Unlocking interface." << endl);
01186 }
01187 
01190 unsigned long
01191 HTTPCache::get_max_size() const
01192 {
01193     return d_total_size / MEGA;
01194 }
01195 
01204 void
01205 HTTPCache::set_max_entry_size(unsigned long size)
01206 {
01207     DBG(cerr << "Locking interface... ");
01208     LOCK(&d_cache_mutex);
01209 
01210     try {
01211         unsigned long new_size = size * MEGA;
01212         if (new_size > 0 && new_size < d_total_size - d_folder_size) {
01213             unsigned long old_size = d_max_entry_size;
01214             d_max_entry_size = new_size;
01215             if (new_size < old_size && startGC()) {
01216                 perform_garbage_collection();
01217                 cache_index_write();
01218             }
01219         }
01220     }
01221     catch (Error &e) {
01222         UNLOCK(&d_cache_mutex);
01223         DBGN(cerr << "Unlocking interface." << endl);
01224         throw e;
01225     }
01226 
01227     DBG2(cerr << "Cache...... Max entry cache size is "
01228          << d_max_entry_size << endl);
01229 
01230     UNLOCK(&d_cache_mutex);
01231     DBGN(cerr << "Unlocking interface." << endl);
01232 }
01233 
01238 unsigned long
01239 HTTPCache::get_max_entry_size() const
01240 {
01241     return d_max_entry_size / MEGA;
01242 }
01243 
01254 void
01255 HTTPCache::set_default_expiration(const int exp_time)
01256 {
01257     DBG(cerr << "Locking interface... ");
01258     LOCK(&d_cache_mutex);
01259 
01260     d_default_expiration = exp_time;
01261 
01262     UNLOCK(&d_cache_mutex);
01263     DBGN(cerr << "Unlocking interface." << endl);
01264 }
01265 
01268 int
01269 HTTPCache::get_default_expiration() const
01270 {
01271     return d_default_expiration;
01272 }
01273 
01278 void
01279 HTTPCache::set_always_validate(bool validate)
01280 {
01281     d_always_validate = validate;
01282 }
01283 
01287 bool
01288 HTTPCache::get_always_validate() const
01289 {
01290     return d_always_validate;
01291 }
01292 
01309 void
01310 HTTPCache::set_cache_control(const vector<string> &cc)
01311 {
01312     DBG(cerr << "Locking interface... ");
01313     LOCK(&d_cache_mutex);
01314 
01315     try {
01316         d_cache_control = cc;
01317 
01318         vector<string>::const_iterator i;
01319         for (i = cc.begin(); i != cc.end(); ++i) {
01320             string header = (*i).substr(0, (*i).find(':'));
01321             string value = (*i).substr((*i).find(": ") + 2);
01322             if (header != "Cache-Control") {
01323                 throw InternalErr(__FILE__, __LINE__, "Expected cache control header not found.");
01324             }
01325             else {
01326                 if (value == "no-cache" || value == "no-store")
01327                     d_cache_enabled = false;
01328                 else if (value.find("max-age") != string::npos) {
01329                     string max_age = value.substr(value.find("=" + 1));
01330                     d_max_age = parse_time(max_age.c_str());
01331                 }
01332                 else if (value == "max-stale")
01333                     d_max_stale = 0; // indicates will take anything;
01334                 else if (value.find("max-stale") != string::npos) {
01335                     string max_stale = value.substr(value.find("=" + 1));
01336                     d_max_stale = parse_time(max_stale.c_str());
01337                 }
01338                 else if (value.find("min-fresh") != string::npos) {
01339                     string min_fresh = value.substr(value.find("=" + 1));
01340                     d_min_fresh = parse_time(min_fresh.c_str());
01341                 }
01342             }
01343         }
01344     }
01345     catch (Error &e) {
01346         UNLOCK(&d_cache_mutex);
01347         DBGN(cerr << "Unlocking interface." << endl);
01348         throw e;
01349     }
01350 
01351     UNLOCK(&d_cache_mutex);
01352     DBGN(cerr << "Unlocking interface." << endl);
01353 }
01354 
01355 
01362 vector<string>
01363 HTTPCache::get_cache_control()
01364 {
01365     return d_cache_control;
01366 }
01367 
01369 
01383 string
01384 HTTPCache::create_hash_directory(int hash)
01385 {
01386     struct stat stat_info;
01387     ostringstream path;
01388 
01389     path << d_cache_root << hash;
01390     string p = path.str();
01391 
01392     if (stat(p.c_str(), &stat_info) == -1) {
01393         DBG2(cerr << "Cache....... Create dir " << p << endl);
01394         if (MKDIR(p.c_str(), 0777) < 0) {
01395             DBG2(cerr << "Cache....... Can't create..." << endl);
01396             throw Error("Could not create cache slot to hold response! Check the write permissions on your disk cache directory. Cache root: " + d_cache_root + ".");
01397         }
01398     }
01399     else {
01400         DBG2(cerr << "Cache....... Directory " << p << " already exists"
01401              << endl);
01402     }
01403 
01404     return p;
01405 }
01406 
01421 void
01422 HTTPCache::create_location(CacheEntry *entry)
01423 {
01424     string hash_dir = create_hash_directory(entry->hash);
01425 #ifdef WIN32
01426     hash_dir += "\\dodsXXXXXX";
01427 #else
01428     hash_dir += "/dodsXXXXXX"; // mkstemp uses six characters.
01429 #endif
01430 
01431     // mkstemp uses the storage passed to it; must be writable and local.
01432     char *templat = new char[hash_dir.size() + 1];
01433     strcpy(templat, hash_dir.c_str());
01434 
01435     // Open truncated for update. NB: mkstemp() returns a file descriptor.
01436     // man mkstemp says "... The file is opened with the O_EXCL flag,
01437     // guaranteeing that when mkstemp returns successfully we are the only
01438     // user." 09/19/02 jhrg
01439     int fd = MKSTEMP(templat); // fd mode is 666 or 600 (Unix)
01440     if (fd < 0) {
01441         delete templat; templat = 0;
01442         close(fd);
01443         throw Error("The HTTP Cache could not create a file to hold the response; it will not be cached.");
01444     }
01445 
01446     entry->cachename = templat;
01447     delete[] templat; templat = 0;
01448     close(fd);
01449 }
01450 
01461 void
01462 HTTPCache::parse_headers(CacheEntry *entry, const vector<string> &headers)
01463 {
01464     vector<string>::const_iterator i;
01465     for (i = headers.begin(); i != headers.end(); ++i) {
01466         string header = (*i).substr(0, (*i).find(':'));
01467         string value = (*i).substr((*i).find(": ") + 2);
01468         DBG2(cerr << "Header: " << header << endl);
01469         DBG2(cerr << "Value: " << value << endl);
01470 
01471         if (header == "ETag") {
01472             entry->etag = value;
01473         }
01474         else if (header == "Last-Modified") {
01475             entry->lm = parse_time(value.c_str());
01476         }
01477         else if (header == "Expires") {
01478             entry->expires = parse_time(value.c_str());
01479         }
01480         else if (header == "Date") {
01481             entry->date = parse_time(value.c_str());
01482         }
01483         else if (header == "Age") {
01484             entry->age = parse_time(value.c_str());
01485         }
01486         else if (header == "Content-Length") {
01487             unsigned long clength = strtoul(value.c_str(), 0, 0);
01488             if (clength > d_max_entry_size)
01489                 entry->no_cache = true;
01490         }
01491         else if (header == "Cache-Control") {
01492             // Ignored Cache-Control values: public, private, no-transform,
01493             // proxy-revalidate, s-max-age. These are used by shared caches.
01494             // See section 14.9 of RFC 2612. 10/02/02 jhrg
01495             if (value == "no-cache" || value == "no-store")
01496                 // Note that we *can* store a 'no-store' response in volatile
01497                 // memory according to RFC 2616 (section 14.9.2) but those
01498                 // will be rare coming from DAP servers. 10/02/02 jhrg
01499                 entry->no_cache = true;
01500             else if (value == "must-revalidate")
01501                 entry->must_revalidate = true;
01502             else if (value.find("max-age") != string::npos) {
01503                 string max_age = value.substr(value.find("=" + 1));
01504                 entry->max_age = parse_time(max_age.c_str());
01505             }
01506         }
01507     }
01508 }
01509 
01521 void
01522 HTTPCache::calculate_time(CacheEntry *entry, time_t request_time)
01523 {
01524     entry->response_time = time(NULL);
01525     time_t apparent_age
01526     = max(0, static_cast<int>(entry->response_time - entry->date));
01527     time_t corrected_received_age = max(apparent_age, entry->age);
01528     time_t response_delay = entry->response_time - request_time;
01529     entry->corrected_initial_age = corrected_received_age + response_delay;
01530 
01531     // Estimate an expires time using the max-age and expires time. If we
01532     // don't have an explicit expires time then set it to 10% of the LM date
01533     // (although max 24 h). If no LM date is available then use 24 hours.
01534     time_t freshness_lifetime = entry->max_age;
01535     if (freshness_lifetime < 0) {
01536         if (entry->expires < 0) {
01537             if (entry->lm < 0) {
01538                 freshness_lifetime = NO_LM_EXPIRATION;
01539             }
01540             else {
01541                 freshness_lifetime = LM_EXPIRATION(entry->date - entry->lm);
01542             }
01543         }
01544         else
01545             freshness_lifetime = entry->expires - entry->date;
01546     }
01547 
01548     entry->freshness_lifetime = max(0, static_cast<int>(freshness_lifetime));
01549 
01550     DBG2(cerr << "Cache....... Received Age " << entry->age
01551          << ", corrected " << entry->corrected_initial_age
01552          << ", freshness lifetime " << entry->freshness_lifetime << endl);
01553 }
01554 
01562 bool
01563 HTTPCache::is_url_in_cache(const string &url)
01564 {
01565     DBG(cerr << "Is this url in the cache? (" << url << ")" << endl);
01566 
01567     return get_entry_from_cache_table(url) != 0;
01568 }
01569 
01578 static inline bool
01579 is_hop_by_hop_header(const string &header)
01580 {
01581     return header.find("Connection") != string::npos
01582            || header.find("Keep-Alive") != string::npos
01583            || header.find("Proxy-Authenticate") != string::npos
01584            || header.find("Proxy-Authorization") != string::npos
01585            || header.find("Transfer-Encoding") != string::npos
01586            || header.find("Upgrade") != string::npos;
01587 }
01588 
01600 void
01601 HTTPCache::write_metadata(const string &cachename, const vector<string> &headers)
01602 {
01603     string fname = cachename + CACHE_META;
01604     d_open_files.push_back(fname);
01605 
01606     FILE *dest = fopen(fname.c_str(), "w");
01607     if (!dest) {
01608         throw InternalErr(__FILE__, __LINE__,
01609                           "Could not open named cache entry file.");
01610     }
01611 
01612     vector<string>::const_iterator i;
01613     for (i = headers.begin(); i != headers.end(); ++i) {
01614         if (!is_hop_by_hop_header(*i)) {
01615             fwrite((*i).c_str(), (*i).size(), 1, dest);
01616             fwrite("\n", 1, 1, dest);
01617         }
01618     }
01619 
01620     int res = fclose(dest);
01621     if (res) {
01622         DBG(cerr << "HTTPCache::write_metadata - Failed to close "
01623             << dest << endl);
01624     }
01625 
01626     d_open_files.pop_back();
01627 }
01628 
01639 void
01640 HTTPCache::read_metadata(const string &cachename, vector<string> &headers)
01641 {
01642     FILE *md = fopen(string(cachename + CACHE_META).c_str(), "r");
01643     if (!md) {
01644         throw InternalErr(__FILE__, __LINE__,
01645                           "Could not open named cache entry meta data file.");
01646     }
01647 
01648     char line[1024];
01649     while (!feof(md) && fgets(line, 1024, md)) {
01650         line[strlen(line)-1] = '\0'; // erase newline
01651         headers.push_back(string(line));
01652     }
01653 
01654     int res = fclose(md);
01655     if (res) {
01656         DBG(cerr << "HTTPCache::read_metadata - Failed to close "
01657             << md << endl);
01658     }
01659 }
01660 
01682 int
01683 HTTPCache::write_body(const string &cachename, const FILE *src)
01684 {
01685     d_open_files.push_back(cachename);
01686 
01687     FILE *dest = fopen(cachename.c_str(), "wb");
01688     if (!dest) {
01689         throw InternalErr(__FILE__, __LINE__,
01690                           "Could not open named cache entry file.");
01691     }
01692 
01693     // Read and write in 1k blocks; an attempt at doing this efficiently.
01694     // 09/30/02 jhrg
01695     char line[1024];
01696     size_t n;
01697     int total = 0;
01698     while ((n = fread(line, 1, 1024, const_cast<FILE *>(src))) > 0) {
01699         total += fwrite(line, 1, n, dest);
01700         DBG2(sleep(3));
01701 #if 0
01702         // See comment above. If this is uncommented, make sure to clean up
01703         // the partially written file when ResponseTooBirErr is throw.
01704         if (total > d_max_entry_size)
01705             throw ResponseTooBigErr("This response is too big to cache.");
01706 #endif
01707     }
01708 
01709     if (ferror(const_cast<FILE *>(src)) || ferror(dest)) {
01710         int res = fclose(dest);
01711         res = res & unlink(cachename.c_str());
01712         if (res) {
01713             DBG(cerr << "HTTPCache::write_body - Failed to close/unlink "
01714                 << dest << endl);
01715         }
01716         throw InternalErr(__FILE__, __LINE__,
01717                           "I/O error transferring data to the cache.");
01718     }
01719 
01720     rewind(const_cast<FILE *>(src));
01721 
01722     int res = fclose(dest);
01723     if (res) {
01724         DBG(cerr << "HTTPCache::write_body - Failed to close "
01725             << dest << endl);
01726     }
01727 
01728     d_open_files.pop_back();
01729 
01730     return total;
01731 }
01732 
01741 FILE *
01742 HTTPCache::open_body(const string &cachename)
01743 {
01744     FILE *src = fopen(cachename.c_str(), "r+b");
01745     if (!src) {
01746         throw InternalErr(__FILE__, __LINE__,
01747                           "Could not open named cache entry file.");
01748     }
01749 
01750     return src;
01751 }
01752 
01778 bool
01779 HTTPCache::cache_response(const string &url, time_t request_time,
01780                           const vector<string> &headers, const FILE *body)
01781 {
01782     DBG(cerr << "Locking interface... ");
01783     LOCK(&d_cache_mutex);
01784 
01785     DBG(cerr << "Caching url: " << url << "." << endl);
01786 
01787     try {
01788         // If this is not an http or https URL, don't cache.
01789         if (url.find("http:") == string::npos &&
01790             url.find("https:") == string::npos) {
01791             UNLOCK(&d_cache_mutex);
01792             DBGN(cerr << "Unlocking interface." << endl);
01793             return false;
01794         }
01795 
01796         // This does nothing if url is not already in the cache. It's
01797         // more efficient to do this than to first check and see if the entry
01798         // exists. 10/10/02 jhrg
01799         remove_entry_from_cache_table(url);
01800 
01801         CacheEntry *entry = new CacheEntry;
01802 
01803         INIT(&entry->lock);
01804         entry->url = url;
01805         entry->hash = get_hash(url);
01806         entry->hits = 0;
01807 
01808         try {
01809             parse_headers(entry, headers); // etag, lm, date, age, expires, max_age.
01810             if (entry->no_cache) {
01811                 DBG(cerr << "Not cache-able; deleting CacheEntry: " << entry
01812                     << "(" << url << ")" << endl);
01813                 delete entry; entry = 0;
01814                 UNLOCK(&d_cache_mutex);
01815                 DBGN(cerr << "Unlocking interface." << endl);
01816                 return false;
01817             }
01818 
01819             // corrected_initial_age, freshness_lifetime, response_time.
01820             calculate_time(entry, request_time);
01821 
01822             create_location(entry); // cachename, cache_body_fd
01823             entry->size = write_body(entry->cachename, body);
01824             write_metadata(entry->cachename, headers);
01825         }
01826         catch (ResponseTooBigErr &e) {
01827             // Oops. Bummer. Clean up and exit.
01828             DBG(cerr << e.get_error_message() << endl);
01829             REMOVE(entry->cachename.c_str());
01830             REMOVE(string(entry->cachename + CACHE_META).c_str());
01831             DBG(cerr << "Too big; deleting CacheEntry: " << entry << "(" << url
01832                 << ")" << endl);
01833             delete entry; entry = 0;
01834             UNLOCK(&d_cache_mutex);
01835             DBGN(cerr << "Unlocking interface." << endl);
01836             return false;
01837         }
01838 
01839         entry->range = false; // not used. 10/02/02 jhrg
01840 
01841         add_entry_to_cache_table(entry);
01842 
01843         if (++d_new_entries > DUMP_FREQUENCY) {
01844             if (startGC())
01845                 perform_garbage_collection();
01846 
01847             cache_index_write(); // resets d_new_entries
01848         }
01849     }
01850     catch (Error &e) {
01851         UNLOCK(&d_cache_mutex);
01852         DBGN(cerr << "Unlocking interface." << endl);
01853         throw e;
01854     }
01855 
01856     UNLOCK(&d_cache_mutex);
01857     DBGN(cerr << "Unlocking interface." << endl);
01858 
01859     return true;
01860 }
01861 
01880 vector<string>
01881 HTTPCache::get_conditional_request_headers(const string &url)
01882 {
01883     DBG(cerr << "Locking interface... ");
01884     LOCK(&d_cache_mutex);
01885     CacheEntry *entry;
01886     vector<string> headers;
01887 
01888     DBG(cerr << "Getting conditional request headers for " << url << endl);
01889 
01890     try {
01891         entry = get_entry_from_cache_table(url);
01892         if (!entry) {
01893             throw Error("There is no cache entry for the URL: " + url);
01894         }
01895 
01896         DBG(cerr << "Locking entry... ");
01897         LOCK(&entry->lock);
01898 
01899         if (entry->etag != "")
01900             headers.push_back(string("If-None-Match: ") + entry->etag);
01901 
01902         if (entry->lm > 0)
01903             headers.push_back(string("If-Modified-Since: ")
01904                               + date_time_str(&entry->lm));
01905         else if (entry->max_age > 0)
01906             headers.push_back(string("If-Modified-Since: ")
01907                               + date_time_str(&entry->max_age));
01908         else if (entry->expires > 0)
01909             headers.push_back(string("If-Modified-Since: ")
01910                               + date_time_str(&entry->expires));
01911     }
01912     catch (Error &e) {
01913         UNLOCK(&d_cache_mutex);
01914         DBGN(cerr << "Unlocking interface." << endl);
01915         UNLOCK(&entry->lock);
01916         DBGN(cerr << "Unlocking entry." << endl);
01917         throw e;
01918     }
01919 
01920     UNLOCK(&entry->lock);
01921     DBGN(cerr << "Unlocking entry" << endl);
01922     UNLOCK(&d_cache_mutex);
01923     DBGN(cerr << "Unlocking interface." << endl);
01924 
01925     return headers;
01926 }
01927 
01931 struct HeaderLess: binary_function<const string&, const string&, bool>
01932 {
01933     bool operator()(const string &s1, const string &s2) const
01934     {
01935         return s1.substr(0, s1.find(':')) < s2.substr(0, s2.find(':'));
01936     }
01937 };
01938 
01952 void
01953 HTTPCache::update_response(const string &url, time_t request_time,
01954                            const vector<string> &headers)
01955 {
01956     DBG(cerr << "Locking interface... ");
01957     LOCK(&d_cache_mutex);
01958     CacheEntry *entry;
01959 
01960     DBG(cerr << "Updating the response headers for: " << url << endl);
01961 
01962     try {
01963         entry = get_entry_from_cache_table(url);
01964         if (!entry)
01965             throw Error("There is no cache entry for the URL: " + url);
01966 
01967         DBG(cerr << "Locking entry... ");
01968         LOCK(&entry->lock);
01969 
01970         // Merge the new headers with the exiting CacheEntry object.
01971         parse_headers(entry, headers);
01972 
01973         // Update corrected_initial_age, freshness_lifetime, response_time.
01974         calculate_time(entry, request_time);
01975 
01976         // Merge the new headers with those in the persistent store. How:
01977         // Load the new headers into a set, then merge the old headers. Since
01978         // set<> ignores duplicates, old headers with the same name as a new
01979         // header will got into the bit bucket. Define a special compare
01980         // functor to make sure that headers are compared using only their
01981         // name and not their value too.
01982         set<string, HeaderLess> merged_headers;
01983 
01984         // Load in the new headers
01985         copy(headers.begin(), headers.end(),
01986              inserter(merged_headers, merged_headers.begin()));
01987 
01988         // Get the old headers and load them in.
01989         vector<string> old_headers;
01990         read_metadata(entry->cachename, old_headers);
01991         copy(old_headers.begin(), old_headers.end(),
01992              inserter(merged_headers, merged_headers.begin()));
01993 
01994         // Read the values back out. Use reverse iterators with back_inserter
01995         // to preserve header order. NB: vector<> does not support push_front
01996         // so we can't use front_inserter(). 01/09/03 jhrg
01997         vector<string> result;
01998         copy(merged_headers.rbegin(), merged_headers.rend(),
01999              back_inserter(result));
02000 
02001         // Store.
02002         write_metadata(entry->cachename, result);
02003     }
02004     catch (Error &e) {
02005         UNLOCK(&entry->lock);
02006         DBGN(cerr << "Unlocking entry." << endl);
02007         UNLOCK(&d_cache_mutex);
02008         DBGN(cerr << "Unlocking interface." << endl);
02009         throw e;
02010     }
02011 
02012     UNLOCK(&entry->lock);
02013     DBGN(cerr << "Unlocking entry" << endl);
02014     UNLOCK(&d_cache_mutex);
02015     DBGN(cerr << "Unlocking interface." << endl);
02016 }
02017 
02029 bool
02030 HTTPCache::is_url_valid(const string &url)
02031 {
02032     DBG(cerr << "Locking interface... ");
02033     LOCK(&d_cache_mutex);
02034     bool freshness;
02035     CacheEntry *entry = 0;
02036 
02037     DBG(cerr << "Is this URL valid? (" << url << ")" << endl);
02038 
02039     try {
02040         if (d_always_validate) {
02041             UNLOCK(&d_cache_mutex);
02042             DBGN(cerr << "Unlocking interface." << endl);
02043             return false;  // force re-validation.
02044         }
02045 
02046         entry = get_entry_from_cache_table(url);
02047         if (!entry) {
02048             UNLOCK(&d_cache_mutex);
02049             DBGN(cerr << "Unlocking interface." << endl);
02050             throw Error("There is no cache entry for the URL: " + url);
02051         }
02052 
02053         DBG(cerr << "Locking entry... ");
02054         LOCK(&entry->lock);
02055 
02056         // If we supported range requests, we'd need code here to check if
02057         // there was only a partial response in the cache. 10/02/02 jhrg
02058 
02059         // In case this entry is of type "must-revalidate" then we consider it
02060         // invalid.
02061         if (entry->must_revalidate) {
02062             UNLOCK(&entry->lock);
02063             DBGN(cerr << "Unlocking entry" << endl);
02064             UNLOCK(&d_cache_mutex);
02065             DBGN(cerr << "Unlocking interface." << endl);
02066             return false;
02067         }
02068 
02069         time_t resident_time = time(NULL) - entry->response_time;
02070         time_t current_age = entry->corrected_initial_age + resident_time;
02071 
02072         // Check that the max-age, max-stale, and min-fresh directives
02073         // given in the request cache control header is followed.
02074         if (d_max_age >= 0 && current_age > d_max_age) {
02075             DBG(cerr << "Cache....... Max-age validation" << endl);
02076             UNLOCK(&entry->lock);
02077             DBGN(cerr << "Unlocking entry" << endl);
02078             UNLOCK(&d_cache_mutex);
02079             DBGN(cerr << "Unlocking interface." << endl);
02080             return false;
02081         }
02082         if (d_min_fresh >= 0
02083             && entry->freshness_lifetime < current_age + d_min_fresh) {
02084             DBG(cerr << "Cache....... Min-fresh validation" << endl);
02085             UNLOCK(&entry->lock);
02086             DBGN(cerr << "Unlocking entry" << endl);
02087             UNLOCK(&d_cache_mutex);
02088             DBGN(cerr << "Unlocking interface." << endl);
02089             return false;
02090         }
02091 
02092         freshness = (entry->freshness_lifetime
02093                      + (d_max_stale >= 0 ? d_max_stale : 0) > current_age);
02094     }
02095     catch (Error &e) {
02096         UNLOCK(&entry->lock);
02097         DBGN(cerr << "Unlocking entry." << endl);
02098         UNLOCK(&d_cache_mutex);
02099         DBGN(cerr << "Unlocking interface." << endl);
02100         throw e;
02101     }
02102 
02103     UNLOCK(&entry->lock);
02104     DBGN(cerr << "Unlocking entry" << endl);
02105     UNLOCK(&d_cache_mutex);
02106     DBGN(cerr << "Unlocking interface." << endl);
02107 
02108     return freshness;
02109 }
02110 
02136 FILE *
02137 HTTPCache::get_cached_response(const string &url, vector<string> &headers)
02138 {
02139     DBG(cerr << "Locking interface... ");
02140     LOCK(&d_cache_mutex);
02141     FILE *body;
02142     CacheEntry *entry;
02143 
02144     DBG(cerr << "Getting the cached response for " << url << endl);
02145 
02146     try {
02147         entry = get_entry_from_cache_table(url);
02148         if (!entry)
02149             throw Error("There is no cache entry for the URL: " + url);
02150 
02151         read_metadata(entry->cachename, headers);
02152         DBG(cerr << "Headers just read from cache: " << endl);
02153         DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr, "\n")));
02154 
02155         body = open_body(entry->cachename);
02156 
02157         DBG(cerr << "Returning: " << url << " from the cache." << endl);
02158 
02159         entry->hits++;  // Mark hit
02160         entry->locked++; // lock entry
02161         d_locked_entries[body] = entry; // record lock, see release_cached_r...
02162         DBG(cerr << "Locking entry (non-blocking lock)... ");
02163         TRYLOCK(&entry->lock); // Needed for blocking lock; locked counts
02164     }
02165     catch (Error &e) {
02166         UNLOCK(&entry->lock);
02167         DBGN(cerr << "Unlocking entry." << endl);
02168         UNLOCK(&d_cache_mutex);
02169         DBGN(cerr << "Unlocking interface." << endl);
02170         throw e;
02171     }
02172 
02173     UNLOCK(&d_cache_mutex);
02174     DBGN(cerr << "Unlocking interface." << endl);
02175 
02176     return body;
02177 }
02178 
02199 FILE *
02200 HTTPCache::get_cached_response_body(const string &url)
02201 {
02202     DBG(cerr << "Locking interface... ");
02203     LOCK(&d_cache_mutex);
02204     FILE *body;
02205     CacheEntry *entry;
02206 
02207     try {
02208         entry = get_entry_from_cache_table(url);
02209 
02210         if (!entry) {
02211             UNLOCK(&d_cache_mutex);
02212             DBGN(cerr << "Unlocking interface." << endl);
02213             throw Error("There is no cache entry for the URL: " + url);
02214         }
02215 
02216         body = open_body(entry->cachename); // throws InternalErr
02217 
02218         DBG(cerr << "Returning body for: " << url << " from the cache."
02219             << endl);
02220 
02221         entry->hits++;  // Mark hit
02222         entry->locked++;  // lock entry
02223         d_locked_entries[body] = entry; // record lock, see release_cached_r...
02224         DBG(cerr << "Locking entry (non-blocking lock)... ");
02225         TRYLOCK(&entry->lock);
02226     }
02227     catch (Error &e) {
02228         UNLOCK(&entry->lock);
02229         DBGN(cerr << "Unlocking entry." << endl);
02230         UNLOCK(&d_cache_mutex);
02231         DBGN(cerr << "Unlocking interface." << endl);
02232         throw e;
02233     }
02234 
02235     UNLOCK(&d_cache_mutex);
02236     DBGN(cerr << "Unlocking interface." << endl);
02237 
02238     return body;
02239 }
02240 
02253 void
02254 HTTPCache::release_cached_response(FILE *body)
02255 {
02256     DBG(cerr << "Locking interface... ");
02257     LOCK(&d_cache_mutex);
02258     CacheEntry *entry;
02259 
02260     try {
02261         entry = d_locked_entries[body];
02262         if (!entry)
02263             throw Error("There is no cache entry for the response given.");
02264 
02265         entry->locked--;
02266         if (entry->locked == 0) {
02267             d_locked_entries.erase(body);
02268             UNLOCK(&entry->lock);
02269             DBG(cerr << "Unlocking entry " << hex << entry << dec << endl);
02270         }
02271 
02272         if (entry->locked < 0)
02273             throw Error("An unlocked entry was released");
02274     }
02275     catch (Error &e) {
02276         UNLOCK(&d_cache_mutex);
02277         DBGN(cerr << "Unlocking interface." << endl);
02278         throw e;
02279     }
02280 
02281     UNLOCK(&d_cache_mutex);
02282     DBGN(cerr << "Unlocking interface." << endl);
02283 }
02284 
02287 class DeleteUnlockedCacheEntry :
02288             public unary_function<HTTPCache::CacheEntry *&, void>
02289 {
02290 
02291     HTTPCache *d_cache;
02292 
02293 public:
02294     DeleteUnlockedCacheEntry(HTTPCache *c) : d_cache(c)
02295     {}
02296     void operator()(HTTPCache::CacheEntry *&e)
02297     {
02298         if (e) {
02299             d_cache->remove_cache_entry(e);
02300             e = 0;
02301         }
02302     }
02303 };
02304 
02317 void
02318 HTTPCache::purge_cache()
02319 {
02320     DBG(cerr << "Locking interface... ");
02321     LOCK(&d_cache_mutex);
02322     DBG(cerr << "Purging the cache." << endl);
02323 
02324     try {
02325         if (!d_locked_entries.empty()) {
02326             throw Error("Attempt to purge the cache with entries in use.");
02327         }
02328 
02329         // Walk through the cache table and, for every entry in the cache, delete
02330         // it on disk and in the cache table.
02331         for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
02332             CachePointers *slot = d_cache_table[cnt];
02333             if (slot) {
02334                 for_each(slot->begin(), slot->end(),
02335                          DeleteUnlockedCacheEntry(this));
02336                 slot->erase(remove(slot->begin(), slot->end(),
02337                                    static_cast<CacheEntry *>(0)),
02338                             slot->end());
02339             }
02340         }
02341 
02342         // Now delete the index itself.
02343         cache_index_delete();
02344     }
02345     catch (Error &e) {
02346         UNLOCK(&d_cache_mutex);
02347         DBGN(cerr << "Unlocking interface." << endl);
02348         throw e;
02349     }
02350 
02351     UNLOCK(&d_cache_mutex);
02352     DBGN(cerr << "Unlocking interface." << endl);
02353 }
02354 

Generated on Wed Jun 27 12:56:39 2007 for libdap++ by  doxygen 1.4.7