cgi_util.cc

Go to the documentation of this file.
00001 
00002 // -*- mode: c++; c-basic-offset:4 -*-
00003 
00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
00005 // Access Protocol.
00006 
00007 // Copyright (c) 2002,2003 OPeNDAP, Inc.
00008 // Author: James Gallagher <jgallagher@opendap.org>
00009 //         Reza Nekovei <rnekovei@intcomm.net>
00010 //
00011 // This library is free software; you can redistribute it and/or
00012 // modify it under the terms of the GNU Lesser General Public
00013 // License as published by the Free Software Foundation; either
00014 // version 2.1 of the License, or (at your option) any later version.
00015 //
00016 // This library is distributed in the hope that it will be useful,
00017 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00018 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00019 // Lesser General Public License for more details.
00020 //
00021 // You should have received a copy of the GNU Lesser General Public
00022 // License along with this library; if not, write to the Free Software
00023 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00024 //
00025 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
00026 
00027 // (c) COPYRIGHT URI/MIT 1994-2001
00028 // Please read the full copyright statement in the file COPYRIGHT_URI.
00029 //
00030 // Authors:
00031 //      jhrg,jimg       James Gallagher <jgallagher@gso.uri.edu>
00032 //      reza            Reza Nekovei <rnekovei@intcomm.net>
00033 
00034 // A few useful routines which are used in CGI programs.
00035 //
00036 // ReZa 9/30/94
00037 
00038 #include "config.h"
00039 
00040 static char rcsid[] not_used =
00041     {"$Id: cgi_util.cc 16612 2007-06-04 22:08:57Z jimg $"
00042     };
00043 
00044 #include <stdio.h>
00045 #include <stdlib.h>
00046 #include <ctype.h>
00047 
00048 #ifndef TM_IN_SYS_TIME
00049 #include <time.h>
00050 #else
00051 #include <sys/time.h>
00052 #endif
00053 
00054 #include <sys/types.h>
00055 #include <sys/stat.h>
00056 
00057 #ifndef WIN32
00058 #include <unistd.h>
00059 #include <sys/wait.h>
00060 #else
00061 #include <io.h>
00062 #include <fcntl.h>
00063 #include <process.h>
00064 // Win32 does not define this. 08/21/02 jhrg
00065 #define F_OK 0
00066 #endif
00067 
00068 #include <iostream>
00069 #include <sstream>
00070 #include <fstream>
00071 #include <string>
00072 
00073 #include "cgi_util.h"
00074 #include "util.h"  // This supplies flush_stream for WIN32.
00075 #include "debug.h"
00076 
00077 
00078 #ifdef WIN32
00079 #define FILE_DELIMITER '\\'
00080 #else  //  default to unix
00081 #define FILE_DELIMITER '/'
00082 #endif
00083 
00084 // ...not using a const string here to avoid global objects. jhrg 12/23/05
00085 #define CRLF "\r\n"             // Change here and in expr-test.cc.
00086 
00087 using namespace std;
00088 
00089 static const int TimLen = 26; // length of string from asctime()
00090 static const int CLUMP_SIZE = 1024; // size of clumps to new in fmakeword()
00091 
00105 bool
00106 do_version(const string &script_ver, const string &dataset_ver)
00107 {
00108     fprintf(stdout, "HTTP/1.0 200 OK%s", CRLF) ;
00109     fprintf(stdout, "XDODS-Server: %s%s", DVR, CRLF) ;
00110     fprintf(stdout, "XOPeNDAP-Server: %s%s", DVR, CRLF) ;
00111     fprintf(stdout, "XDAP: %s%s", DAP_PROTOCOL_VERSION, CRLF) ;
00112     fprintf(stdout, "Content-Type: text/plain%s", CRLF) ;
00113     fprintf(stdout, CRLF) ;
00114 
00115     fprintf(stdout, "Core software version: %s%s", DVR, CRLF) ;
00116 
00117     if (script_ver != "")
00118         fprintf(stdout, "Server Script Revision: %s%s", script_ver.c_str(), CRLF) ;
00119 
00120     if (dataset_ver != "")
00121         fprintf(stdout,  "Dataset version: %s%s", dataset_ver.c_str(), CRLF) ;
00122 
00123     fflush(stdout) ;            // Not sure this is needed. jhrg 12/23/05
00124 
00125     return true;
00126 }
00127 
00172 string
00173 find_ancillary_file(const string &pathname, const string &ext,
00174                     const string &dir, const string &file)
00175 {
00176     string::size_type slash = pathname.rfind('/') + 1;
00177     string directory = pathname.substr(0, slash);
00178     string filename = pathname.substr(slash);
00179     string basename = pathname.substr(slash, pathname.rfind('.') - slash);
00180 
00181     DBG(cerr << "find ancillary file params: " << pathname << ", " << ext
00182         << ", " << dir << ", " << file << endl);
00183     DBG(cerr << "find ancillary file comp: " << directory << ", " << filename
00184         << ", " << basename << endl);
00185 
00186     string dot_ext = "." + ext;
00187 
00188     string name = directory + basename + dot_ext;
00189     if (access(name.c_str(), F_OK) == 0)
00190         return name;
00191 
00192     name = pathname + dot_ext;
00193     if (access(name.c_str(), F_OK) == 0)
00194         return name;
00195 
00196     name = directory + ext;
00197     if (access(name.c_str(), F_OK) == 0)
00198         return name;
00199 
00200     name = dir + basename + dot_ext;
00201     if (access(name.c_str(), F_OK) == 0)
00202         return name;
00203 
00204     name = directory + file + dot_ext;
00205     if (access(name.c_str(), F_OK) == 0)
00206         return name;
00207 
00208     name = dir + file + dot_ext;
00209     if (access(name.c_str(), F_OK) == 0)
00210         return name;
00211 
00212     name = dir + ext;
00213     if (access(name.c_str(), F_OK) == 0)
00214         return name;
00215 
00216     return "";
00217 }
00218 
00219 // Given a pathname to a datafile, take that pathname apart and look for an
00220 // ancillary file that describes a group of datafiles of which this datafile
00221 // is a member. Assume that groups follow a simple naming convention where
00222 // files use either leading or trailing digits and a common basename to name
00223 // group members. For example, 00stuff.hdf, 01stuff.hdf, 02stuff.hdf, ..., is
00224 // a group and is has `stuff' as its basename.
00225 
00239 string
00240 find_group_ancillary_file(const string &name, const string &ext)
00241 {
00242     // Given /usr/local/data/stuff.01.nc
00243     // pathname = /usr/local/data, filename = stuff.01.nc and
00244     // rootname = stuff.01
00245     string::size_type slash = name.find_last_of('/');
00246     string dirname = name.substr(0, slash);
00247     string filename = name.substr(slash + 1);
00248     string rootname = filename.substr(0, filename.find_last_of('.'));
00249 
00250     // Instead of using regexs, scan the filename for leading and then
00251     // trailing digits.
00252     string::iterator rootname_iter = rootname.begin();
00253     string::iterator rootname_end_iter = rootname.end();
00254     if (isdigit(*rootname_iter)) {
00255         while (rootname_iter != rootname_end_iter
00256                && isdigit(*++rootname_iter))
00257             ;
00258 
00259         // We want: new_name = dirname + "/" + <base> + ext but without
00260         // creating a bunch of temp objects.
00261         string new_name = dirname;
00262         new_name.append("/");
00263         new_name.append(rootname_iter, rootname_end_iter);
00264         new_name.append(ext);
00265         DBG(cerr << "New Name (iter): " << new_name << endl);
00266         if (access(new_name.c_str(), F_OK) == 0) {
00267             return new_name;
00268         }
00269     }
00270 
00271     string::reverse_iterator rootname_riter = rootname.rbegin();
00272     string::reverse_iterator rootname_end_riter = rootname.rend();
00273     if (isdigit(*rootname_riter)) {
00274         while (rootname_riter != rootname_end_riter
00275                && isdigit(*++rootname_riter))
00276             ;
00277         string new_name = dirname;
00278         new_name.append("/");
00279         // I used reverse iters to scan rootname backwards. To avoid
00280         // reversing the fragment between end_riter and riter, pass append
00281         // regular iters obtained using reverse_iterator::base(). See Meyers
00282         // p. 123. 1/22/2002 jhrg
00283         new_name.append(rootname_end_riter.base(), rootname_riter.base());
00284         new_name.append(ext);
00285         DBG(cerr << "New Name (riter): " << new_name << endl);
00286         if (access(new_name.c_str(), F_OK) == 0) {
00287             return new_name;
00288         }
00289     }
00290 
00291     // If we're here either the file does not begin with leading digits or a
00292     // template made by removing those digits was not found.
00293 
00294     return "";
00295 }
00296 
00306 void
00307 ErrMsgT(const string &Msgt)
00308 {
00309     time_t TimBin;
00310     char TimStr[TimLen];
00311 
00312     if (time(&TimBin) == (time_t) - 1)
00313         strcpy(TimStr, "time() error           ");
00314     else {
00315         strcpy(TimStr, ctime(&TimBin));
00316         TimStr[TimLen - 2] = '\0'; // overwrite the \n
00317     }
00318 
00319     const char *host_or_addr = getenv("REMOTE_HOST") ? getenv("REMOTE_HOST") :
00320                                getenv("REMOTE_ADDR") ? getenv("REMOTE_ADDR") : "local (a non-CGI run)";
00321     const char *script = getenv("SCRIPT_NAME") ? getenv("SCRIPT_NAME") :
00322                          "OPeNDAP server";
00323 
00324     cerr << "[" << TimStr << "] CGI: " << script << " failed for "
00325     << host_or_addr << ": " << Msgt << endl;
00326 }
00327 
00328 // Given a pathname, return just the filename component with any extension
00329 // removed. The new string resides in newly allocated memory; the caller must
00330 // delete it when done using the filename.
00331 // Originally from the netcdf distribution (ver 2.3.2).
00332 //
00333 // *** Change to string class argument and return type. jhrg
00334 // *** Changed so it also removes the#path#of#the#file# from decompressed
00335 //     files.  rph.
00336 // Returns: A filename, with path and extension information removed. If
00337 // memory for the new name cannot be allocated, does not return!
00338 
00349 string
00350 name_path(const string &path)
00351 {
00352     if (path == "")
00353         return string("");
00354 
00355     string::size_type delim = path.find_last_of(FILE_DELIMITER);
00356     string::size_type pound = path.find_last_of("#");
00357     string new_path;
00358 
00359     if (pound != string::npos)
00360         new_path = path.substr(pound + 1);
00361     else
00362         new_path = path.substr(delim + 1);
00363 
00364     return new_path;
00365 }
00366 
00367 // Return a MIME rfc-822 date. The grammar for this is:
00368 //       date-time   =  [ day "," ] date time        ; dd mm yy
00369 //                                                   ;  hh:mm:ss zzz
00370 //
00371 //       day         =  "Mon"  / "Tue" /  "Wed"  / "Thu"
00372 //                   /  "Fri"  / "Sat" /  "Sun"
00373 //
00374 //       date        =  1*2DIGIT month 2DIGIT        ; day month year
00375 //                                                   ;  e.g. 20 Jun 82
00376 //                   NB: year is 4 digit; see RFC 1123. 11/30/99 jhrg
00377 //
00378 //       month       =  "Jan"  /  "Feb" /  "Mar"  /  "Apr"
00379 //                   /  "May"  /  "Jun" /  "Jul"  /  "Aug"
00380 //                   /  "Sep"  /  "Oct" /  "Nov"  /  "Dec"
00381 //
00382 //       time        =  hour zone                    ; ANSI and Military
00383 //
00384 //       hour        =  2DIGIT ":" 2DIGIT [":" 2DIGIT]
00385 //                                                   ; 00:00:00 - 23:59:59
00386 //
00387 //       zone        =  "UT"  / "GMT"                ; Universal Time
00388 //                                                   ; North American : UT
00389 //                   /  "EST" / "EDT"                ;  Eastern:  - 5/ - 4
00390 //                   /  "CST" / "CDT"                ;  Central:  - 6/ - 5
00391 //                   /  "MST" / "MDT"                ;  Mountain: - 7/ - 6
00392 //                   /  "PST" / "PDT"                ;  Pacific:  - 8/ - 7
00393 //                   /  1ALPHA                       ; Military: Z = UT;
00394 //                                                   ;  A:-1; (J not used)
00395 //                                                   ;  M:-12; N:+1; Y:+12
00396 //                   / ( ("+" / "-") 4DIGIT )        ; Local differential
00397 //                                                   ;  hours+min. (HHMM)
00398 
00399 static const char *days[] =
00400     {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
00401     };
00402 static const char *months[] =
00403     {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul",
00404      "Aug", "Sep", "Oct", "Nov", "Dec"
00405     };
00406 
00415 string
00416 rfc822_date(const time_t t)
00417 {
00418     struct tm *stm = gmtime(&t);
00419     char d[256];
00420 
00421     sprintf(d, "%s, %02d %s %4d %02d:%02d:%02d GMT", days[stm->tm_wday],
00422             stm->tm_mday, months[stm->tm_mon],
00423 #if 0
00424             // On Solaris 2.7 this tm_year is years since 1900. 3/17/2000
00425             // jhrg
00426             stm->tm_year < 100 ? 1900 + stm->tm_year : stm->tm_year,
00427 #endif
00428             1900 + stm->tm_year,
00429             stm->tm_hour, stm->tm_min, stm->tm_sec);
00430     return string(d);
00431 }
00432 
00438 time_t
00439 last_modified_time(const string &name)
00440 {
00441     struct stat m;
00442 
00443     if (stat(name.c_str(), &m) == 0 && (S_IFREG & m.st_mode))
00444         return m.st_mtime;
00445     else
00446         return time(0);
00447 }
00448 
00449 // Send string to set the transfer (mime) type and server version
00450 // Note that the content description filed is used to indicate whether valid
00451 // information of an error message is contained in the document and the
00452 // content-encoding field is used to indicate whether the data is compressed.
00453 // If the data stream is to be compressed, arrange for a compression output
00454 // filter so that all information sent after the header will be compressed.
00455 //
00456 // Returns: false if the compression output filter was to be used but could
00457 // not be started, true otherwise.
00458 
00459 static const char *descrip[] =
00460     {"unknown", "dods_das", "dods_dds", "dods_data",
00461      "dods_error", "web_error", "dap4_ddx"
00462     };
00463 static const char *encoding[] =
00464     {"unknown", "deflate", "x-plain"
00465     };
00466 
00479 void
00480 set_mime_text(FILE *out, ObjectType type, const string &ver,
00481               EncodingType enc, const time_t last_modified)
00482 {
00483     fprintf(out, "HTTP/1.0 200 OK%s", CRLF) ;
00484     if (ver == "") {
00485         fprintf(out, "XDODS-Server: %s%s", DVR, CRLF) ;
00486         fprintf(out, "XOPeNDAP-Server: %s%s", DVR, CRLF) ;
00487     }
00488     else {
00489         fprintf(out, "XDODS-Server: %s%s", ver.c_str(), CRLF) ;
00490         fprintf(out, "XOPeNDAP-Server: %s%s", ver.c_str(), CRLF) ;
00491     }
00492     fprintf(out, "XDAP: %s%s", DAP_PROTOCOL_VERSION, CRLF) ;
00493 
00494     const time_t t = time(0);
00495     fprintf(out, "Date: %s%s", rfc822_date(t).c_str(), CRLF) ;
00496 
00497     fprintf(out, "Last-Modified: ") ;
00498     if (last_modified > 0)
00499         fprintf(out, "%s%s", rfc822_date(last_modified).c_str(), CRLF) ;
00500     else
00501         fprintf(out, "%s%s", rfc822_date(t).c_str(), CRLF) ;
00502 
00503     if (type == dap4_ddx)
00504         fprintf(out, "Content-Type: text/xml%s", CRLF) ;
00505     else
00506         fprintf(out, "Content-Type: text/plain%s", CRLF) ;
00507 
00508     // Note that Content-Description is from RFC 2045 (MIME, pt 1), not 2616.
00509     // jhrg 12/23/05
00510     fprintf(out, "Content-Description: %s%s", descrip[type], CRLF) ;
00511     if (type == dods_error) // don't cache our error responses.
00512         fprintf(out, "Cache-Control: no-cache%s", CRLF) ;
00513     // Don't write a Content-Encoding header for x-plain since that breaks
00514     // Netscape on NT. jhrg 3/23/97
00515     if (enc != x_plain)
00516         fprintf(out, "Content-Encoding: %s%s", encoding[enc], CRLF) ;
00517     fprintf(out, CRLF) ;
00518 }
00519 
00530 void
00531 set_mime_html(FILE *out, ObjectType type, const string &ver,
00532               EncodingType enc, const time_t last_modified)
00533 {
00534     fprintf(out, "HTTP/1.0 200 OK%s", CRLF) ;
00535     if (ver == "") {
00536         fprintf(out, "XDODS-Server: %s%s", DVR, CRLF) ;
00537         fprintf(out, "XOPeNDAP-Server: %s%s", DVR, CRLF) ;
00538     }
00539     else {
00540         fprintf(out, "XDODS-Server: %s%s", ver.c_str(), CRLF) ;
00541         fprintf(out, "XOPeNDAP-Server: %s%s", ver.c_str(), CRLF) ;
00542     }
00543     fprintf(out, "XDAP: %s%s", DAP_PROTOCOL_VERSION, CRLF) ;
00544 
00545     const time_t t = time(0);
00546     fprintf(out, "Date: %s%s", rfc822_date(t).c_str(), CRLF) ;
00547 
00548     fprintf(out, "Last-Modified: ") ;
00549     if (last_modified > 0)
00550         fprintf(out, "%s%s", rfc822_date(last_modified).c_str(), CRLF) ;
00551     else
00552         fprintf(out, "%s%s", rfc822_date(t).c_str(), CRLF) ;
00553 
00554     fprintf(out, "Content-type: text/html%s", CRLF) ;
00555     // See note above about Content-Description header. jhrg 12/23/05
00556     fprintf(out, "Content-Description: %s%s", descrip[type], CRLF) ;
00557     if (type == dods_error) // don't cache our error responses.
00558         fprintf(out, "Cache-Control: no-cache%s", CRLF) ;
00559     // Don't write a Content-Encoding header for x-plain since that breaks
00560     // Netscape on NT. jhrg 3/23/97
00561     if (enc != x_plain)
00562         fprintf(out, "Content-Encoding: %s%s", encoding[enc], CRLF) ;
00563     fprintf(out, CRLF) ;
00564 }
00565 
00579 void
00580 set_mime_binary(FILE *out, ObjectType type, const string &ver,
00581                 EncodingType enc, const time_t last_modified)
00582 {
00583     fprintf(out, "HTTP/1.0 200 OK%s", CRLF) ;
00584     if (ver == "") {
00585         fprintf(out, "XDODS-Server: %s%s", DVR, CRLF) ;
00586         fprintf(out, "XOPeNDAP-Server: %s%s", DVR, CRLF) ;
00587     }
00588     else {
00589         fprintf(out, "XDODS-Server: %s%s", ver.c_str(), CRLF) ;
00590         fprintf(out, "XOPeNDAP-Server: %s%s", ver.c_str(), CRLF) ;
00591     }
00592     fprintf(out, "XDAP: %s%s", DAP_PROTOCOL_VERSION, CRLF) ;
00593 
00594     const time_t t = time(0);
00595     fprintf(out, "Date: %s%s", rfc822_date(t).c_str(), CRLF) ;
00596 
00597     fprintf(out, "Last-Modified: ") ;
00598     if (last_modified > 0)
00599         fprintf(out, "%s%s", rfc822_date(last_modified).c_str(), CRLF) ;
00600     else
00601         fprintf(out, "%s%s", rfc822_date(t).c_str(), CRLF) ;
00602 
00603     fprintf(out, "Content-Type: application/octet-stream%s", CRLF) ;
00604     fprintf(out, "Content-Description: %s%s", descrip[type], CRLF) ;
00605     if (enc != x_plain)
00606         fprintf(out, "Content-Encoding: %s%s", encoding[enc], CRLF) ;
00607 
00608     fprintf(out, CRLF) ;
00609 }
00610 
00611 
00618 void
00619 set_mime_error(FILE *out, int code, const string &reason,
00620                const string &version)
00621 {
00622     fprintf(out, "HTTP/1.0 %d %s%s", code, reason.c_str(), CRLF) ;
00623     if (version == "") {
00624         fprintf(out, "XDODS-Server: %s%s", DVR, CRLF) ;
00625         fprintf(out, "XOPeNDAP-Server: %s%s", DVR, CRLF) ;
00626     }
00627     else {
00628         fprintf(out, "XDODS-Server: %s%s", version.c_str(), CRLF) ;
00629         fprintf(out, "XOPeNDAP-Server: %s%s", version.c_str(), CRLF) ;
00630     }
00631     fprintf(out, "XDAP: %s%s", DAP_PROTOCOL_VERSION, CRLF) ;
00632 
00633     const time_t t = time(0);
00634     fprintf(out, "Date: %s%s", rfc822_date(t).c_str(), CRLF) ;
00635     fprintf(out, "Cache-Control: no-cache%s", CRLF) ;
00636     fprintf(out, CRLF) ;
00637 }
00638 
00639 
00646 void
00647 set_mime_not_modified(FILE *out)
00648 {
00649     fprintf(out, "HTTP/1.0 304 NOT MODIFIED%s", CRLF) ;
00650     const time_t t = time(0);
00651     fprintf(out, "Date: %s%s", rfc822_date(t).c_str(), CRLF) ;
00652     fprintf(out, CRLF) ;
00653 }
00654 
00663 bool
00664 found_override(string name, string &doc)
00665 {
00666     ifstream ifs((name + ".ovr").c_str());
00667     if (!ifs)
00668         return false;
00669 
00670     char tmp[256];
00671     doc = "";
00672     while (!ifs.eof()) {
00673         ifs.getline(tmp, 255);
00674         strcat(tmp, "\n");
00675         doc += tmp;
00676     }
00677 
00678     return true;
00679 }
00680 
00689 bool
00690 remove_mime_header(FILE *in)
00691 {
00692     char tmp[256];
00693     while (!feof(in)) {
00694         fgets(tmp, 255, in);
00695         if (strncmp(&tmp[0], CRLF, 2) == 0)
00696             return true;
00697     }
00698 
00699     return false;
00700 }
00701 
00702 
00725 string
00726 get_user_supplied_docs(string name, string cgi)
00727 {
00728     char tmp[256];
00729     ostringstream oss;
00730     ifstream ifs((cgi + ".html").c_str());
00731 
00732     if (ifs) {
00733         while (!ifs.eof()) {
00734             ifs.getline(tmp, 255);
00735             oss << tmp << "\n";
00736         }
00737         ifs.close();
00738 
00739         oss << "<hr>";
00740     }
00741 
00742     // Problem: This code is run with the CWD as the CGI-BIN directory but
00743     // the data are in DocumentRoot (and we don't have the pathname of the
00744     // data relative to DocumentRoot). So the only time this will work is
00745     // when the server is in the same directory as the data. See bug 815.
00746     // 10/08/04 jhrg
00747     ifs.open((name + ".html").c_str());
00748 
00749     // If name.html cannot be opened, look for basename.html
00750     if (!ifs) {
00751         string new_name = find_group_ancillary_file(name, ".html");
00752         if (new_name != "")
00753             ifs.open(new_name.c_str());
00754     }
00755 
00756     if (ifs) {
00757         while (!ifs.eof()) {
00758             ifs.getline(tmp, 255);
00759             oss << tmp << "\n";
00760         }
00761         ifs.close();
00762     }
00763 
00764     return oss.str();
00765 }
00766 

Generated on Wed Jun 27 12:56:38 2007 for libdap++ by  doxygen 1.4.7