HTTPCache.h

Go to the documentation of this file.
00001 
00002 // -*- mode: c++; c-basic-offset:4 -*-
00003 
00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
00005 // Access Protocol.
00006 
00007 // Copyright (c) 2002 OPeNDAP, Inc.
00008 // Author: James Gallagher <jgallagher@opendap.org>
00009 //
00010 // This library is free software; you can redistribute it and/or
00011 // modify it under the terms of the GNU Lesser General Public
00012 // License as published by the Free Software Foundation; either
00013 // version 2.1 of the License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful,
00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00023 //
00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
00025 
00026 #ifndef _http_cache_h
00027 #define _http_cache_h
00028 
00029 #include <stdio.h>
00030 #include <pthread.h>
00031 #include <sys/types.h>
00032 #include <sys/stat.h>
00033 
00034 #ifdef WIN32
00035 #include <io.h>   // stat for win32? 09/05/02 jhrg
00036 #else
00037 #include <unistd.h>
00038 #endif
00039 
00040 #include <string>
00041 #include <vector>
00042 #include <map>
00043 
00044 #ifndef _error_h
00045 #include "Error.h"
00046 #endif
00047 
00048 #ifndef _internalerr_h
00049 #include "InternalErr.h"
00050 #endif
00051 
00052 // The private method HTTPCache::write_body() could, at one time, throw
00053 // ResponseTooBig to signal that while writing a response body it was found
00054 // to be bigger than the max_entry_size property. But I bagged that; the
00055 // garbage collection methods remove entries larger than max_entry_size. It
00056 // might be that a really big entry belongs in the cache so long as it
00057 // doesn't push other entries out. 10/07/02 jhrg
00058 #ifndef _response_too_big_err_h
00059 #include "ResponseTooBigErr.h"
00060 #endif
00061 
00062 #ifndef _http_cache_disconnected_mode_h
00063 #include "HTTPCacheDisconnectedMode.h"
00064 #endif
00065 
00066 #ifndef _signal_handler_registered_err_h
00067 #include "SignalHandlerRegisteredErr.h"
00068 #endif
00069 
00070 const int CACHE_TABLE_SIZE = 1499;
00071 
00072 using namespace std;
00073 
00131 class HTTPCache
00132 {
00133 public:
00145     struct CacheEntry
00146     {
00147         string url;  // Location
00148         int hash;
00149         int hits;  // Hit counts
00150 
00151         string cachename;
00152 
00153         string etag;
00154         time_t lm;  // Last modified
00155         time_t expires;
00156         time_t date;  // From the response header.
00157         time_t age;
00158         time_t max_age;  // From Cache-Control
00159 
00160         unsigned long size; // Size of cached entity body
00161         bool range;  // Range is not currently supported. 10/02/02
00162         // jhrg
00163 
00164         time_t freshness_lifetime;
00165         time_t response_time;
00166         time_t corrected_initial_age;
00167 
00168         bool must_revalidate;
00169         bool no_cache;  // This field is not saved in the index.
00170 
00171         int locked;
00172         pthread_mutex_t lock ;
00173 
00174     CacheEntry() : url(""), hash(-1), hits(0), cachename(""),
00175                 etag(""), lm(-1),
00176                 expires(-1), date(-1), age(-1), max_age(-1), size(0),
00177                 range(false), freshness_lifetime(0), response_time(0),
00178                 corrected_initial_age(0), must_revalidate(false),
00179                 no_cache(false), locked(0)
00180     {}
00181     };
00182 
00183 #ifdef WIN32
00184     //  Declared private below for gcc.  There appears to be a
00185     //  difference in public vs. private under gcc when objects
00186     //  share the same source file (??).
00187     //
00188     // My guess is that this was a bug in older versions of gcc. I've made
00189     // the functors classes (they were structs) and made them friends (so
00190     // they can access private stuff). We should not need this any longer,
00191     // but I'm hesitant to remove it since I cannot easily test with VC++.
00192     // 01/23/04 jhrg
00193     unsigned long d_max_entry_size; // Max individual entry size.
00194 
00195     void remove_cache_entry(CacheEntry *entry);
00196     bool stopGC() const;
00197 #endif
00198 
00199 private:
00200     string d_cache_root;
00201     string d_cache_index;
00202     FILE *d_locked_open_file; // Lock for single process use.
00203 
00204     bool d_cache_enabled;
00205     bool d_cache_protected;
00206     CacheDisconnectedMode d_cache_disconnected;
00207     bool d_expire_ignored;
00208     bool d_always_validate;
00209 
00210     unsigned long d_total_size; // How much can we store?
00211     unsigned long d_folder_size; // How much of that is meta data?
00212     unsigned long d_gc_buffer; // How much memory needed as buffer?
00213 #ifndef WIN32  //  Declared public above for win32
00214     unsigned long d_max_entry_size; // Max individual entry size.
00215 #endif
00216     unsigned long d_current_size;
00217     int d_default_expiration;
00218     unsigned int d_block_size; // File block size.
00219 
00220     vector<string> d_cache_control;
00221     // these are values read from a request-directive Cache-Control header.
00222     // Not to be confused with values read from the response or a cached
00223     // response (e.g., CacheEntry has a max_age field, too). These fields are
00224     // set when the set_cache_control method is called.
00225     time_t d_max_age;
00226     time_t d_max_stale;  // -1: not set, 0:any response, >0 max time.
00227     time_t d_min_fresh;
00228 
00229     int d_new_entries;  // How many entries since index write?
00230 
00231     // Lock non-const methods (also ones that use the STL).
00232     pthread_mutex_t d_cache_mutex;
00233 
00234     // Typedefs for CacheTable. A CacheTable is a vector of vectors of
00235     // CacheEntries. The outer vector is accessed using the hash value.
00236     // Entries with matching hashes occupy successive positions in the inner
00237     // vector (that's how hash collisions are resolved). Search the inner
00238     // vector for a specific match.
00239     typedef vector<CacheEntry *> CachePointers;
00240     typedef CachePointers::iterator CachePointersIter;
00241 
00242     // CACHE_TABLE_SIZE is used by the static function get_hash defined in
00243     // HTTPCache.cc. The table is indexed by the various cache entries' hash
00244     // code. 10/01/02 jhrg
00245     typedef CachePointers *CacheTable[CACHE_TABLE_SIZE];
00246 
00247     CacheTable d_cache_table;
00248 
00249     map<FILE *, CacheEntry *> d_locked_entries;
00250     vector<string> d_open_files;
00251 
00252     static HTTPCache *_instance;
00253 
00254     friend class HTTPCacheTest; // Unit tests
00255     friend class HTTPCacheInterruptHandler;
00256 
00257     // Functors used with STL algorithms
00258 
00259     friend class DeleteExpired;
00260     friend class DeleteByHits;
00261     friend class DeleteCacheEntry;
00262     friend class DeleteUnlockedCacheEntry;
00263     friend class WriteOneCacheEntry;
00264 
00265     // Private methods
00266 
00267     void clone(const HTTPCache &)
00268     {}
00269 
00270     HTTPCache(const HTTPCache &cache)
00271     {
00272         clone(cache);
00273     }
00274 
00275     HTTPCache()
00276     {}
00277 
00278     HTTPCache(string cache_root, bool force) throw(Error);
00279 
00280     HTTPCache &operator=(const HTTPCache &rhs)
00281     {
00282         if (this != &rhs)
00283             clone(rhs);
00284         return *this;
00285     }
00286 
00287     static void delete_instance(); // Run by atexit (hence static)
00288 
00289     CacheEntry *cache_index_parse_line(const char *line);
00290     bool cache_index_read();
00291     bool cache_index_delete();
00292 
00293     void set_cache_root(const string &root = "");
00294 
00295     bool get_single_user_lock(bool force = false);
00296     void release_single_user_lock();
00297 
00298     void add_entry_to_cache_table(CacheEntry *e);
00299     void remove_entry_from_cache_table(const string &url);
00300     void parse_headers(CacheEntry *entry, const vector<string> &headers);
00301     void calculate_time(CacheEntry *entry, time_t request_time);
00302 #ifndef WIN32  //  Declared public above for win32
00303     void remove_cache_entry(CacheEntry *entry);
00304 #endif
00305     CacheEntry *get_entry_from_cache_table(const string &url) const;
00306     CacheEntry *get_entry_from_cache_table(int hash, const string &url) const;
00307 
00308     // I made these four methods so they could be tested by HTTPCacheTest.
00309     // Otherwise they would be static functions in HTTPCache.cc. 10/01/02
00310     // jhrg
00311     void write_metadata(const string &cachename, const vector<string> &headers);
00312     void read_metadata(const string &cachename, vector<string> &headers);
00313     int write_body(const string &cachename, const FILE *src);
00314     FILE *open_body(const string &cachename);
00315 
00316     void create_cache_root(const string &cache_root);
00317 
00318     string create_hash_directory(int hash);
00319     void create_location(CacheEntry *entry);
00320 
00321 #ifndef WIN32  //  Declared public above for win32
00322     bool stopGC() const;
00323 #endif
00324     bool startGC() const;
00325 
00326     void cache_index_write();
00327 
00328     void perform_garbage_collection();
00329     void expired_gc();
00330     void hits_gc();
00331 
00332 public:
00333     static HTTPCache *instance(const string &cache_root, bool force = false);
00334     virtual ~HTTPCache();
00335 
00336     string get_cache_root() const;
00337 
00338     void set_cache_enabled(bool mode);
00339     bool is_cache_enabled() const;
00340 
00341     void set_cache_protected(bool mode);
00342     bool is_cache_protected() const;
00343 
00344     void set_cache_disconnected(CacheDisconnectedMode mode);
00345     CacheDisconnectedMode get_cache_disconnected() const;
00346 
00347     void set_expire_ignored(bool mode);
00348     bool is_expire_ignored() const;
00349 
00350     void set_max_size(unsigned long size);
00351     unsigned long get_max_size() const;
00352 
00353     void set_max_entry_size(unsigned long size);
00354     unsigned long get_max_entry_size() const;
00355 
00356     void set_default_expiration(int exp_time);
00357     int get_default_expiration() const;
00358 
00359     void set_always_validate(bool validate);
00360     bool get_always_validate() const;
00361 
00362     void set_cache_control(const vector<string> &cc);
00363     vector<string> get_cache_control();
00364 
00365     bool cache_response(const string &url, time_t request_time,
00366                         const vector<string> &headers, const FILE *body);
00367     vector<string> get_conditional_request_headers(const string &url);
00368     void update_response(const string &url, time_t request_time,
00369                          const vector<string> &headers);
00370 
00371     bool is_url_in_cache(const string &url);
00372     bool is_url_valid(const string &url);
00373     FILE *get_cached_response(const string &url, vector<string> &headers);
00374     FILE *get_cached_response_body(const string &url);
00375     void release_cached_response(FILE *response);
00376 
00377     void purge_cache();
00378 };
00379 
00380 #endif // _http_cache_h

Generated on Wed Jun 27 12:56:39 2007 for libdap++ by  doxygen 1.4.7