00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #include "config.h"
00027
00028
00029
00030
00031
00032 #include <pthread.h>
00033 #include <limits.h>
00034 #include <unistd.h>
00035 #include <sys/types.h>
00036 #include <sys/stat.h>
00037
00038 #include <cstring>
00039 #include <iostream>
00040 #include <sstream>
00041 #include <algorithm>
00042 #include <iterator>
00043 #include <set>
00044
00045 #include "Error.h"
00046 #include "InternalErr.h"
00047 #include "ResponseTooBigErr.h"
00048 #ifndef WIN32
00049 #include "SignalHandler.h"
00050 #endif
00051 #include "HTTPCacheInterruptHandler.h"
00052 #include "HTTPCacheTable.h"
00053
00054 #include "util_mit.h"
00055 #include "debug.h"
00056
00057 #ifdef WIN32
00058 #include <direct.h>
00059 #include <time.h>
00060 #include <fcntl.h>
00061 #define MKDIR(a,b) _mkdir((a))
00062 #define REMOVE(a) remove((a))
00063 #define MKSTEMP(a) _open(_mktemp((a)),_O_CREAT,_S_IREAD|_S_IWRITE)
00064 #define DIR_SEPARATOR_CHAR '\\'
00065 #define DIR_SEPARATOR_STR "\\"
00066 #else
00067 #define MKDIR(a,b) mkdir((a), (b))
00068 #define REMOVE(a) remove((a))
00069 #define MKSTEMP(a) mkstemp((a))
00070 #define DIR_SEPARATOR_CHAR '/'
00071 #define DIR_SEPARATOR_STR "/"
00072 #endif
00073
00074 #define CACHE_META ".meta"
00075 #define CACHE_INDEX ".index"
00076 #define CACHE_EMPTY_ETAG "@cache@"
#define NO_LM_EXPIRATION 24*3600 // 24 hours
#define MAX_LM_EXPIRATION 48*3600 // Max expiration from LM
// If using LM to find the expiration then take 10% and no more than
// MAX_LM_EXPIRATION.
#ifndef LM_EXPIRATION
#define LM_EXPIRATION(t) (min((MAX_LM_EXPIRATION), static_cast<int>((t) / 10)))
#endif
const int CACHE_TABLE_SIZE = 1499;
using namespace std;
namespace libdap {
00080 int
get_hash(const string &url)
{
int hash = 0;
for (const char *ptr = url.c_str(); *ptr; ptr++)
hash = (int)((hash * 3 + (*(unsigned char *)ptr)) % CACHE_TABLE_SIZE);
return hash;
}
HTTPCacheTable::HTTPCacheTable(const string &cache_root, int block_size) :
d_cache_root(cache_root),
d_block_size(block_size),
d_current_size(0),
d_new_entries(0)
{
d_cache_index = cache_root + CACHE_INDEX;
d_cache_table = new CacheEntries*[CACHE_TABLE_SIZE];
// Initialize the cache table.
for (int i = 0; i < CACHE_TABLE_SIZE; ++i)
d_cache_table[i] = 0;
cache_index_read();
}
00084 static inline void
delete_cache_entry(HTTPCacheTable::CacheEntry *e)
{
DBG2(cerr << "Deleting CacheEntry: " << e << endl);
00085 #if 0
00086 DESTROY(&e->get_lock());
00087 #endif
00088 delete e;
00089 }
00090
00091 HTTPCacheTable::~HTTPCacheTable() {
00092 for (int i = 0; i < CACHE_TABLE_SIZE; ++i) {
00093 HTTPCacheTable::CacheEntries *cp = get_cache_table()[i];
00094 if (cp) {
00095
00096 for_each(cp->begin(), cp->end(), delete_cache_entry);
00097
00098
00099 delete get_cache_table()[i];
00100 get_cache_table()[i] = 0;
00101 }
00102 }
00103
00104 delete[] d_cache_table;
00105 }
00106
00114 class DeleteExpired : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
00115 time_t d_time;
00116 HTTPCacheTable &d_table;
00117
00118 public:
00119 DeleteExpired(HTTPCacheTable &table, time_t t) :
00120 d_time(t), d_table(table) {
00121 if (!t)
00122 d_time = time(0);
00123 }
00124
00125 void operator()(HTTPCacheTable::CacheEntry *&e) {
00126 if (e && !e->readers && (e->freshness_lifetime
00127 < (e->corrected_initial_age + (d_time - e->response_time)))) {
00128 DBG(cerr << "Deleting expired cache entry: " << e->url << endl);
00129 d_table.remove_cache_entry(e);
00130 delete e; e = 0;
00131 }
00132 }
00133 };
00134
00135
00136 void HTTPCacheTable::delete_expired_entries(time_t time) {
00137
00138 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00139 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
00140 if (slot) {
00141 for_each(slot->begin(), slot->end(), DeleteExpired(*this, time));
00142 slot->erase(remove(slot->begin(), slot->end(),
00143 static_cast<HTTPCacheTable::CacheEntry *>(0)), slot->end());
00144 }
00145 }
00146 }
00147
00154 class DeleteByHits : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
00155 HTTPCacheTable &d_table;
00156 int d_hits;
00157
00158 public:
00159 DeleteByHits(HTTPCacheTable &table, int hits) :
00160 d_table(table), d_hits(hits) {
00161 }
00162
00163 void operator()(HTTPCacheTable::CacheEntry *&e) {
00164 if (e && !e->readers && e->hits <= d_hits) {
00165 DBG(cerr << "Deleting cache entry: " << e->url << endl);
00166 d_table.remove_cache_entry(e);
00167 delete e; e = 0;
00168 }
00169 }
00170 };
00171
00172 void
00173 HTTPCacheTable::delete_by_hits(int hits) {
00174 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00175 if (get_cache_table()[cnt]) {
00176 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
00177 for_each(slot->begin(), slot->end(), DeleteByHits(*this, hits));
00178 slot->erase(remove(slot->begin(), slot->end(),
00179 static_cast<HTTPCacheTable::CacheEntry*>(0)),
00180 slot->end());
00181
00182 }
00183 }
00184 }
00185
00190 class DeleteBySize : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
00191 HTTPCacheTable &d_table;
00192 unsigned int d_size;
00193
00194 public:
00195 DeleteBySize(HTTPCacheTable &table, unsigned int size) :
00196 d_table(table), d_size(size) {
00197 }
00198
00199 void operator()(HTTPCacheTable::CacheEntry *&e) {
00200 if (e && !e->readers && e->size > d_size) {
00201 DBG(cerr << "Deleting cache entry: " << e->url << endl);
00202 d_table.remove_cache_entry(e);
00203 delete e; e = 0;
00204 }
00205 }
00206 };
00207
00208 void HTTPCacheTable::delete_by_size(unsigned int size) {
00209 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00210 if (get_cache_table()[cnt]) {
00211 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
00212 for_each(slot->begin(), slot->end(), DeleteBySize(*this, size));
00213 slot->erase(remove(slot->begin(), slot->end(),
00214 static_cast<HTTPCacheTable::CacheEntry*>(0)),
00215 slot->end());
00216
00217 }
00218 }
00219 }
00220
00227
00234 bool
00235 HTTPCacheTable::cache_index_delete()
00236 {
00237 d_new_entries = 0;
00238
00239 return (REMOVE(d_cache_index.c_str()) == 0);
00240 }
00241
00250 bool
00251 HTTPCacheTable::cache_index_read()
00252 {
00253 FILE *fp = fopen(d_cache_index.c_str(), "r");
00254
00255
00256 if (!fp) {
00257 return false;
00258 }
00259
00260 char line[1024];
00261 while (!feof(fp) && fgets(line, 1024, fp)) {
00262 add_entry_to_cache_table(cache_index_parse_line(line));
00263 DBG2(cerr << line << endl);
00264 }
00265
00266 int res = fclose(fp) ;
00267 if (res) {
00268 DBG(cerr << "HTTPCache::cache_index_read - Failed to close " << (void *)fp << endl);
00269 }
00270
00271 d_new_entries = 0;
00272
00273 return true;
00274 }
00275
00283 HTTPCacheTable::CacheEntry *
00284 HTTPCacheTable::cache_index_parse_line(const char *line)
00285 {
00286
00287 HTTPCacheTable::CacheEntry *entry = new HTTPCacheTable::CacheEntry;
00288 #if 0
00289 INIT(&entry->d_lock);
00290 #endif
00291 istringstream iss(line);
00292 iss >> entry->url;
00293 iss >> entry->cachename;
00294
00295 iss >> entry->etag;
00296 if (entry->etag == CACHE_EMPTY_ETAG)
00297 entry->etag = "";
00298
00299 iss >> entry->lm;
00300 iss >> entry->expires;
00301 iss >> entry->size;
00302 iss >> entry->range;
00303
00304 iss >> entry->hash;
00305 iss >> entry->hits;
00306 iss >> entry->freshness_lifetime;
00307 iss >> entry->response_time;
00308 iss >> entry->corrected_initial_age;
00309
00310 iss >> entry->must_revalidate;
00311
00312 return entry;
00313 }
00314
00317 class WriteOneCacheEntry :
00318 public unary_function<HTTPCacheTable::CacheEntry *, void>
00319 {
00320
00321 FILE *d_fp;
00322
00323 public:
00324 WriteOneCacheEntry(FILE *fp) : d_fp(fp)
00325 {}
00326
00327 void operator()(HTTPCacheTable::CacheEntry *e)
00328 {
00329 if (e && fprintf(d_fp,
00330 "%s %s %s %ld %ld %ld %c %d %d %ld %ld %ld %c\r\n",
00331 e->url.c_str(),
00332 e->cachename.c_str(),
00333 e->etag == "" ? CACHE_EMPTY_ETAG : e->etag.c_str(),
00334 (long)(e->lm),
00335 (long)(e->expires),
00336 e->size,
00337 e->range ? '1' : '0',
00338 e->hash,
00339 e->hits,
00340 (long)(e->freshness_lifetime),
00341 (long)(e->response_time),
00342 (long)(e->corrected_initial_age),
00343 e->must_revalidate ? '1' : '0') < 0)
00344 throw Error("Cache Index. Error writing cache index\n");
00345 }
00346 };
00347
00357 void
00358 HTTPCacheTable::cache_index_write()
00359 {
00360 DBG(cerr << "Cache Index. Writing index " << d_cache_index << endl);
00361
00362
00363 FILE * fp = NULL;
00364 if ((fp = fopen(d_cache_index.c_str(), "wb")) == NULL) {
00365 throw Error(string("Cache Index. Can't open `") + d_cache_index
00366 + string("' for writing"));
00367 }
00368
00369
00370
00371
00372 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00373 HTTPCacheTable::CacheEntries *cp = get_cache_table()[cnt];
00374 if (cp)
00375 for_each(cp->begin(), cp->end(), WriteOneCacheEntry(fp));
00376 }
00377
00378
00379 int res = fclose(fp);
00380 if (res) {
00381 DBG(cerr << "HTTPCache::cache_index_write - Failed to close "
00382 << (void *)fp << endl);
00383 }
00384
00385 d_new_entries = 0;
00386 }
00387
00389
00402 string
00403 HTTPCacheTable::create_hash_directory(int hash)
00404 {
00405 struct stat stat_info;
00406 ostringstream path;
00407
00408 path << d_cache_root << hash;
00409 string p = path.str();
00410
00411 if (stat(p.c_str(), &stat_info) == -1) {
00412 DBG2(cerr << "Cache....... Create dir " << p << endl);
00413 if (MKDIR(p.c_str(), 0777) < 0) {
00414 DBG2(cerr << "Cache....... Can't create..." << endl);
00415 throw Error("Could not create cache slot to hold response! Check the write permissions on your disk cache directory. Cache root: " + d_cache_root + ".");
00416 }
00417 }
00418 else {
00419 DBG2(cerr << "Cache....... Directory " << p << " already exists"
00420 << endl);
00421 }
00422
00423 return p;
00424 }
00425
00440 void
00441 HTTPCacheTable::create_location(HTTPCacheTable::CacheEntry *entry)
00442 {
00443 string hash_dir = create_hash_directory(entry->hash);
00444 #ifdef WIN32
00445 hash_dir += "\\dodsXXXXXX";
00446 #else
00447 hash_dir += "/dodsXXXXXX";
00448 #endif
00449
00450
00451 char *templat = new char[hash_dir.size() + 1];
00452 strcpy(templat, hash_dir.c_str());
00453
00454
00455
00456
00457
00458 int fd = MKSTEMP(templat);
00459 if (fd < 0) {
00460 delete[] templat; templat = 0;
00461 close(fd);
00462 throw Error("The HTTP Cache could not create a file to hold the response; it will not be cached.");
00463 }
00464
00465 entry->cachename = templat;
00466 delete[] templat; templat = 0;
00467 close(fd);
00468 }
00469
00470
00472 static inline int
00473 entry_disk_space(int size, unsigned int block_size)
00474 {
00475 unsigned int num_of_blocks = (size + block_size) / block_size;
00476
00477 DBG(cerr << "size: " << size << ", block_size: " << block_size
00478 << ", num_of_blocks: " << num_of_blocks << endl);
00479
00480 return num_of_blocks * block_size;
00481 }
00482
00486
00492 void
00493 HTTPCacheTable::add_entry_to_cache_table(CacheEntry *entry)
00494 {
00495 int hash = entry->hash;
00496
00497 if (!d_cache_table[hash])
00498 d_cache_table[hash] = new CacheEntries;
00499
00500 d_cache_table[hash]->push_back(entry);
00501
00502 DBG(cerr << "add_entry_to_cache_table, current_size: " << d_current_size
00503 << ", entry->size: " << entry->size << ", block size: " << d_block_size
00504 << endl);
00505
00506 d_current_size += entry_disk_space(entry->size, d_block_size);
00507
00508 DBG(cerr << "add_entry_to_cache_table, current_size: " << d_current_size << endl);
00509
00510 increment_new_entries();
00511 }
00512
00516 HTTPCacheTable::CacheEntry *
00517 HTTPCacheTable::get_locked_entry_from_cache_table(const string &url)
00518 {
00519 return get_locked_entry_from_cache_table(get_hash(url), url);
00520 }
00521
00529 HTTPCacheTable::CacheEntry *
00530 HTTPCacheTable::get_locked_entry_from_cache_table(int hash, const string &url)
00531 {
00532 DBG(cerr << "url: " << url << "; hash: " << hash << endl);
00533 DBG(cerr << "d_cache_table: " << hex << d_cache_table << dec << endl);
00534 if (d_cache_table[hash]) {
00535 CacheEntries *cp = d_cache_table[hash];
00536 for (CacheEntriesIter i = cp->begin(); i != cp->end(); ++i) {
00537
00538
00539 if ((*i) && (*i)->url == url) {
00540 (*i)->lock_read_response();
00541 #if 0
00542 (*i)->lock();
00543 #endif
00544 return *i;
00545 }
00546 }
00547 }
00548
00549 return 0;
00550 }
00551
00558 HTTPCacheTable::CacheEntry *
00559 HTTPCacheTable::get_write_locked_entry_from_cache_table(const string &url)
00560 {
00561 int hash = get_hash(url);
00562 if (d_cache_table[hash]) {
00563 CacheEntries *cp = d_cache_table[hash];
00564 for (CacheEntriesIter i = cp->begin(); i != cp->end(); ++i) {
00565
00566
00567 if ((*i) && (*i)->url == url) {
00568 (*i)->lock_write_response();
00569 #if 0
00570 (*i)->lock();
00571 #endif
00572 return *i;
00573 }
00574 }
00575 }
00576
00577 return 0;
00578 }
00579
00587 void
00588 HTTPCacheTable::remove_cache_entry(HTTPCacheTable::CacheEntry *entry)
00589 {
00590
00591
00592 if (entry->readers)
00593 throw InternalErr(__FILE__, __LINE__, "Tried to delete a cache entry that is in use.");
00594
00595 REMOVE(entry->cachename.c_str());
00596 REMOVE(string(entry->cachename + CACHE_META).c_str());
00597
00598 DBG(cerr << "remove_cache_entry, current_size: " << get_current_size() << endl);
00599
00600 unsigned int eds = entry_disk_space(entry->size, get_block_size());
00601 set_current_size((eds > get_current_size()) ? 0 : get_current_size() - eds);
00602
00603 DBG(cerr << "remove_cache_entry, current_size: " << get_current_size() << endl);
00604 }
00605
00608 class DeleteCacheEntry: public unary_function<HTTPCacheTable::CacheEntry *&, void>
00609 {
00610 string d_url;
00611 HTTPCacheTable *d_cache_table;
00612
00613 public:
00614 DeleteCacheEntry(HTTPCacheTable *c, const string &url)
00615 : d_url(url), d_cache_table(c)
00616 {}
00617
00618 void operator()(HTTPCacheTable::CacheEntry *&e)
00619 {
00620 if (e && e->url == d_url) {
00621 e->lock_write_response();
00622 d_cache_table->remove_cache_entry(e);
00623 e->unlock_write_response();
00624 delete e; e = 0;
00625 }
00626 }
00627 };
00628
00635 void
00636 HTTPCacheTable::remove_entry_from_cache_table(const string &url)
00637 {
00638 int hash = get_hash(url);
00639 if (d_cache_table[hash]) {
00640 CacheEntries *cp = d_cache_table[hash];
00641 for_each(cp->begin(), cp->end(), DeleteCacheEntry(this, url));
00642 cp->erase(remove(cp->begin(), cp->end(), static_cast<HTTPCacheTable::CacheEntry*>(0)),
00643 cp->end());
00644 }
00645 }
00646
00649 class DeleteUnlockedCacheEntry :
00650 public unary_function<HTTPCacheTable::CacheEntry *&, void> {
00651 HTTPCacheTable &d_table;
00652
00653 public:
00654 DeleteUnlockedCacheEntry(HTTPCacheTable &t) :
00655 d_table(t) {
00656 }
00657 void operator()(HTTPCacheTable::CacheEntry *&e) {
00658 if (e) {
00659 d_table.remove_cache_entry(e);
00660 delete e; e = 0;
00661 }
00662 }
00663 };
00664
00665 void HTTPCacheTable::delete_all_entries() {
00666
00667
00668 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00669 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
00670 if (slot) {
00671 for_each(slot->begin(), slot->end(), DeleteUnlockedCacheEntry(*this));
00672 slot->erase(remove(slot->begin(), slot->end(), static_cast<HTTPCacheTable::CacheEntry *>(0)),
00673 slot->end());
00674 }
00675 }
00676
00677 cache_index_delete();
00678 }
00679
00693 void
00694 HTTPCacheTable::calculate_time(HTTPCacheTable::CacheEntry *entry, int default_expiration, time_t request_time)
00695 {
00696 entry->response_time = time(NULL);
00697 time_t apparent_age = max(0, static_cast<int>(entry->response_time - entry->date));
00698 time_t corrected_received_age = max(apparent_age, entry->age);
00699 time_t response_delay = entry->response_time - request_time;
00700 entry->corrected_initial_age = corrected_received_age + response_delay;
00701
00702
00703
00704
00705 time_t freshness_lifetime = entry->max_age;
00706 if (freshness_lifetime < 0) {
00707 if (entry->expires < 0) {
00708 if (entry->lm < 0) {
00709 freshness_lifetime = default_expiration;
00710 }
00711 else {
00712 freshness_lifetime = LM_EXPIRATION(entry->date - entry->lm);
00713 }
00714 }
00715 else
00716 freshness_lifetime = entry->expires - entry->date;
00717 }
00718
00719 entry->freshness_lifetime = max(0, static_cast<int>(freshness_lifetime));
00720
00721 DBG2(cerr << "Cache....... Received Age " << entry->age
00722 << ", corrected " << entry->corrected_initial_age
00723 << ", freshness lifetime " << entry->freshness_lifetime << endl);
00724 }
00725
00737 void HTTPCacheTable::parse_headers(HTTPCacheTable::CacheEntry *entry,
00738 unsigned long max_entry_size, const vector<string> &headers) {
00739 vector<string>::const_iterator i;
00740 for (i = headers.begin(); i != headers.end(); ++i) {
00741
00742 if ((*i).empty())
00743 continue;
00744
00745 string::size_type colon = (*i).find(':');
00746
00747
00748 if (colon == string::npos)
00749 continue;
00750
00751 string header = (*i).substr(0, (*i).find(':'));
00752 string value = (*i).substr((*i).find(": ") + 2);
00753 DBG2(cerr << "Header: " << header << endl);DBG2(cerr << "Value: " << value << endl);
00754
00755 if (header == "ETag") {
00756 entry->etag = value;
00757 } else if (header == "Last-Modified") {
00758 entry->lm = parse_time(value.c_str());
00759 } else if (header == "Expires") {
00760 entry->expires = parse_time(value.c_str());
00761 } else if (header == "Date") {
00762 entry->date = parse_time(value.c_str());
00763 } else if (header == "Age") {
00764 entry->age = parse_time(value.c_str());
00765 } else if (header == "Content-Length") {
00766 unsigned long clength = strtoul(value.c_str(), 0, 0);
00767 if (clength > max_entry_size)
00768 entry->set_no_cache(true);
00769 } else if (header == "Cache-Control") {
00770
00771
00772
00773 if (value == "no-cache" || value == "no-store")
00774
00775
00776
00777 entry->set_no_cache(true);
00778 else if (value == "must-revalidate")
00779 entry->must_revalidate = true;
00780 else if (value.find("max-age") != string::npos) {
00781 string max_age = value.substr(value.find("=" + 1));
00782 entry->max_age = parse_time(max_age.c_str());
00783 }
00784 }
00785 }
00786 }
00787
00789
00790
00791 void HTTPCacheTable::bind_entry_to_data(HTTPCacheTable::CacheEntry *entry, FILE *body) {
00792 entry->hits++;
00793 d_locked_entries[body] = entry;
00794 #if 0
00795 entry->unlock();
00796 #endif
00797 }
00798
00799 void HTTPCacheTable::uncouple_entry_from_data(FILE *body) {
00800 HTTPCacheTable::CacheEntry *entry = d_locked_entries[body];
00801 if (!entry)
00802 throw InternalErr("There is no cache entry for the response given.");
00803
00804 d_locked_entries.erase(body);
00805 entry->unlock_read_response();
00806
00807 if (entry->readers < 0)
00808 throw InternalErr("An unlocked entry was released");
00809 }
00810
00811 bool HTTPCacheTable::is_locked_read_responses() {
00812 return !d_locked_entries.empty();
00813 }
00814
00815 }
00816