HTTPCache.h

Go to the documentation of this file.
00001 
00002 // -*- mode: c++; c-basic-offset:4 -*-
00003 
00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
00005 // Access Protocol.
00006 
00007 // Copyright (c) 2002 OPeNDAP, Inc.
00008 // Author: James Gallagher <jgallagher@opendap.org>
00009 //
00010 // This library is free software; you can redistribute it and/or
00011 // modify it under the terms of the GNU Lesser General Public
00012 // License as published by the Free Software Foundation; either
00013 // version 2.1 of the License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful,
00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00023 //
00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
00025 
00026 #ifndef _http_cache_h
00027 #define _http_cache_h
00028 
00029 #include <pthread.h>
00030 
00031 #ifdef WIN32
00032 #include <io.h>   // stat for win32? 09/05/02 jhrg
00033 #endif
00034 
00035 #include <string>
00036 #include <vector>
00037 #include <map>
00038 
00039 #ifndef _error_h
00040 #include "Error.h"
00041 #endif
00042 
00043 #ifndef _internalerr_h
00044 #include "InternalErr.h"
00045 #endif
00046 
00047 // The private method HTTPCache::write_body() could, at one time, throw
00048 // ResponseTooBig to signal that while writing a response body it was found
00049 // to be bigger than the max_entry_size property. But I bagged that; the
00050 // garbage collection methods remove entries larger than max_entry_size. It
00051 // might be that a really big entry belongs in the cache so long as it
00052 // doesn't push other entries out. 10/07/02 jhrg
00053 #ifndef _response_too_big_err_h
00054 #include "ResponseTooBigErr.h"
00055 #endif
00056 
00057 #ifndef _http_cache_disconnected_mode_h
00058 #include "HTTPCacheDisconnectedMode.h"
00059 #endif
00060 
00061 #ifndef _signal_handler_registered_err_h
00062 #include "SignalHandlerRegisteredErr.h"
00063 #endif
00064 
00065 const int CACHE_TABLE_SIZE = 1499;
00066 
00067 using namespace std;
00068 
00069 namespace libdap
00070 {
00071 
00129 class HTTPCache
00130 {
00131 public:
00143     struct CacheEntry
00144     {
00145         string url;  // Location
00146         int hash;
00147         int hits;  // Hit counts
00148 
00149         string cachename;
00150 
00151         string etag;
00152         time_t lm;  // Last modified
00153         time_t expires;
00154         time_t date;  // From the response header.
00155         time_t age;
00156         time_t max_age;  // From Cache-Control
00157 
00158         unsigned long size; // Size of cached entity body
00159         bool range;  // Range is not currently supported. 10/02/02
00160         // jhrg
00161 
00162         time_t freshness_lifetime;
00163         time_t response_time;
00164         time_t corrected_initial_age;
00165 
00166         bool must_revalidate;
00167         bool no_cache;  // This field is not saved in the index.
00168 
00169         int locked;
00170         pthread_mutex_t lock ;
00171 
00172     CacheEntry() : url(""), hash(-1), hits(0), cachename(""),
00173                 etag(""), lm(-1),
00174                 expires(-1), date(-1), age(-1), max_age(-1), size(0),
00175                 range(false), freshness_lifetime(0), response_time(0),
00176                 corrected_initial_age(0), must_revalidate(false),
00177                 no_cache(false), locked(0)
00178     {}
00179     };
00180 
00181 #ifdef WIN32
00182     //  Declared private below for gcc.  There appears to be a
00183     //  difference in public vs. private under gcc when objects
00184     //  share the same source file (??).
00185     //
00186     // My guess is that this was a bug in older versions of gcc. I've made
00187     // the functors classes (they were structs) and made them friends (so
00188     // they can access private stuff). We should not need this any longer,
00189     // but I'm hesitant to remove it since I cannot easily test with VC++.
00190     // 01/23/04 jhrg
00191     unsigned long d_max_entry_size; // Max individual entry size.
00192 
00193     void remove_cache_entry(CacheEntry *entry);
00194     bool stopGC() const;
00195 #endif
00196 
00197 private:
00198     string d_cache_root;
00199     string d_cache_index;
00200     FILE *d_locked_open_file; // Lock for single process use.
00201 
00202     bool d_cache_enabled;
00203     bool d_cache_protected;
00204     CacheDisconnectedMode d_cache_disconnected;
00205     bool d_expire_ignored;
00206     bool d_always_validate;
00207 
00208     unsigned long d_total_size; // How much can we store?
00209     unsigned long d_folder_size; // How much of that is meta data?
00210     unsigned long d_gc_buffer; // How much memory needed as buffer?
00211 #ifndef WIN32  //  Declared public above for win32
00212     unsigned long d_max_entry_size; // Max individual entry size.
00213 #endif
00214     unsigned long d_current_size;
00215     int d_default_expiration;
00216     unsigned int d_block_size; // File block size.
00217 
00218     vector<string> d_cache_control;
00219     // these are values read from a request-directive Cache-Control header.
00220     // Not to be confused with values read from the response or a cached
00221     // response (e.g., CacheEntry has a max_age field, too). These fields are
00222     // set when the set_cache_control method is called.
00223     time_t d_max_age;
00224     time_t d_max_stale;  // -1: not set, 0:any response, >0 max time.
00225     time_t d_min_fresh;
00226 
00227     int d_new_entries;  // How many entries since index write?
00228 
00229     // Lock non-const methods (also ones that use the STL).
00230     pthread_mutex_t d_cache_mutex;
00231 
00232     // Typedefs for CacheTable. A CacheTable is a vector of vectors of
00233     // CacheEntries. The outer vector is accessed using the hash value.
00234     // Entries with matching hashes occupy successive positions in the inner
00235     // vector (that's how hash collisions are resolved). Search the inner
00236     // vector for a specific match.
00237     typedef vector<CacheEntry *> CachePointers;
00238     typedef CachePointers::iterator CachePointersIter;
00239 
00240     // CACHE_TABLE_SIZE is used by the static function get_hash defined in
00241     // HTTPCache.cc. The table is indexed by the various cache entries' hash
00242     // code. 10/01/02 jhrg
00243     typedef CachePointers *CacheTable[CACHE_TABLE_SIZE];
00244 
00245     CacheTable d_cache_table;
00246 
00247     map<FILE *, CacheEntry *> d_locked_entries;
00248     vector<string> d_open_files;
00249 
00250     static HTTPCache *_instance;
00251 
00252     friend class HTTPCacheTest; // Unit tests
00253     friend class HTTPCacheInterruptHandler;
00254 
00255     // Functors used with STL algorithms
00256 
00257     friend class DeleteExpired;
00258     friend class DeleteByHits;
00259     friend class DeleteCacheEntry;
00260     friend class DeleteUnlockedCacheEntry;
00261     friend class WriteOneCacheEntry;
00262 
00263     // Private methods
00264 
00265     void clone(const HTTPCache &)
00266     {}
00267 
00268     HTTPCache(const HTTPCache &cache)
00269     {
00270         clone(cache);
00271     }
00272 
00273     HTTPCache()
00274     {}
00275 
00276     HTTPCache(string cache_root, bool force) throw(Error);
00277 
00278     HTTPCache &operator=(const HTTPCache &rhs)
00279     {
00280         if (this != &rhs)
00281             clone(rhs);
00282         return *this;
00283     }
00284 
00285     static void delete_instance(); // Run by atexit (hence static)
00286 
00287     CacheEntry *cache_index_parse_line(const char *line);
00288     bool cache_index_read();
00289     bool cache_index_delete();
00290 
00291     void set_cache_root(const string &root = "");
00292 
00293     bool get_single_user_lock(bool force = false);
00294     void release_single_user_lock();
00295 
00296     void add_entry_to_cache_table(CacheEntry *e);
00297     void remove_entry_from_cache_table(const string &url);
00298     void parse_headers(CacheEntry *entry, const vector<string> &headers);
00299     void calculate_time(CacheEntry *entry, time_t request_time);
00300 #ifndef WIN32  //  Declared public above for win32
00301     void remove_cache_entry(CacheEntry *entry);
00302 #endif
00303     CacheEntry *get_entry_from_cache_table(const string &url) const;
00304     CacheEntry *get_entry_from_cache_table(int hash, const string &url) const;
00305 
00306     // I made these four methods so they could be tested by HTTPCacheTest.
00307     // Otherwise they would be static functions in HTTPCache.cc. 10/01/02
00308     // jhrg
00309     void write_metadata(const string &cachename, const vector<string> &headers);
00310     void read_metadata(const string &cachename, vector<string> &headers);
00311     int write_body(const string &cachename, const FILE *src);
00312     FILE *open_body(const string &cachename);
00313 
00314     void create_cache_root(const string &cache_root);
00315 
00316     string create_hash_directory(int hash);
00317     void create_location(CacheEntry *entry);
00318 
00319 #ifndef WIN32  //  Declared public above for win32
00320     bool stopGC() const;
00321 #endif
00322     bool startGC() const;
00323 
00324     void cache_index_write();
00325 
00326     void perform_garbage_collection();
00327     void expired_gc();
00328     void hits_gc();
00329 
00330 public:
00331     static HTTPCache *instance(const string &cache_root, bool force = false);
00332     virtual ~HTTPCache();
00333 
00334     string get_cache_root() const;
00335 
00336     void set_cache_enabled(bool mode);
00337     bool is_cache_enabled() const;
00338 
00339     void set_cache_protected(bool mode);
00340     bool is_cache_protected() const;
00341 
00342     void set_cache_disconnected(CacheDisconnectedMode mode);
00343     CacheDisconnectedMode get_cache_disconnected() const;
00344 
00345     void set_expire_ignored(bool mode);
00346     bool is_expire_ignored() const;
00347 
00348     void set_max_size(unsigned long size);
00349     unsigned long get_max_size() const;
00350 
00351     void set_max_entry_size(unsigned long size);
00352     unsigned long get_max_entry_size() const;
00353 
00354     void set_default_expiration(int exp_time);
00355     int get_default_expiration() const;
00356 
00357     void set_always_validate(bool validate);
00358     bool get_always_validate() const;
00359 
00360     void set_cache_control(const vector<string> &cc);
00361     vector<string> get_cache_control();
00362 
00363     bool cache_response(const string &url, time_t request_time,
00364                         const vector<string> &headers, const FILE *body);
00365     vector<string> get_conditional_request_headers(const string &url);
00366     void update_response(const string &url, time_t request_time,
00367                          const vector<string> &headers);
00368 
00369     bool is_url_in_cache(const string &url);
00370     bool is_url_valid(const string &url);
00371     FILE *get_cached_response(const string &url, vector<string> &headers);
00372     FILE *get_cached_response(const string &url, vector<string> &headers,
00373                               string &cacheName);
00374     FILE *get_cached_response_body(const string &url);
00375     void release_cached_response(FILE *response);
00376 
00377     void purge_cache();
00378 };
00379 
00380 } // namespace libdap
00381 
00382 #endif // _http_cache_h

Generated on Tue Jun 10 18:00:31 2008 for libdap++ by  doxygen 1.5.4