util.cc

Go to the documentation of this file.
00001 
00002 // -*- mode: c++; c-basic-offset:4 -*-
00003 
00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
00005 // Access Protocol.
00006 
00007 // Copyright (c) 2002,2003 OPeNDAP, Inc.
00008 // Author: James Gallagher <jgallagher@opendap.org>
00009 //
00010 // This library is free software; you can redistribute it and/or
00011 // modify it under the terms of the GNU Lesser General Public
00012 // License as published by the Free Software Foundation; either
00013 // version 2.1 of the License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful,
00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00023 //
00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
00025 
00026 // (c) COPYRIGHT URI/MIT 1994-1999
00027 // Please read the full copyright statement in the file COPYRIGHT_URI.
00028 //
00029 // Authors:
00030 //      jhrg,jimg       James Gallagher <jgallagher@gso.uri.edu>
00031 
00032 // Utility functions used by the api.
00033 //
00034 // jhrg 9/21/94
00035 
00036 #include "config.h"
00037 
00038 static char rcsid[] not_used =
00039     {"$Id: util.cc 18315 2008-03-03 20:14:44Z jimg $"
00040     };
00041 
00042 #include <cassert>
00043 #include <cstring>
00044 
00045 #include <ctype.h>
00046 #ifndef TM_IN_SYS_TIME
00047 #include <time.h>
00048 #else
00049 #include <sys/time.h>
00050 #endif
00051 
00052 #ifndef WIN32
00053 #include <unistd.h>    // for stat
00054 #else
00055 #include <io.h>
00056 #include <fcntl.h>
00057 #include <process.h>
00058 #endif
00059 
00060 #include <sys/types.h>
00061 #include <sys/stat.h>
00062 
00063 #include <string>
00064 #include <sstream>
00065 #include <vector>
00066 #include <algorithm>
00067 #include <stdexcept>
00068 
00069 #include "BaseType.h"
00070 #include "Str.h"
00071 #include "Url.h"
00072 #include "Sequence.h"
00073 #include "Error.h"
00074 #include "parser.h"
00075 #include "util.h"
00076 #include "GNURegex.h"
00077 #include "debug.h"
00078 
00079 
00080 using namespace std;
00081 
00082 namespace libdap {
00083 
00084 // Remove spaces from the start of a URL and from the start of any constraint
00085 // expression it contains. 4/7/98 jhrg
00086 
00095 string
00096 prune_spaces(const string &name)
00097 {
00098     // If the URL does not even have white space return.
00099     if (name.find_first_of(' ') == name.npos)
00100         return name;
00101     else {
00102         // Strip leading spaces from http://...
00103         unsigned int i = name.find_first_not_of(' ');
00104         string tmp_name = name.substr(i);
00105 
00106         // Strip leading spaces from constraint part (following `?').
00107         unsigned int j = tmp_name.find('?') + 1;
00108         i = tmp_name.find_first_not_of(' ', j);
00109         tmp_name.erase(j, i - j);
00110 
00111         return tmp_name;
00112     }
00113 }
00114 
00115 // Compare elements in a list of (BaseType *)s and return true if there are
00116 // no duplicate elements, otherwise return false.
00117 
00118 bool
00119 unique_names(vector<BaseType *> l, const string &var_name,
00120              const string &type_name, string &msg)
00121 {
00122     // copy the identifier names to a vector
00123     vector<string> names(l.size());
00124 
00125     int nelem = 0;
00126     typedef std::vector<BaseType *>::const_iterator citer ;
00127     for (citer i = l.begin(); i != l.end(); i++) {
00128         assert(*i);
00129         names[nelem++] = (*i)->name();
00130         DBG(cerr << "NAMES[" << nelem - 1 << "]=" << names[nelem-1] << endl);
00131     }
00132 
00133     // sort the array of names
00134     sort(names.begin(), names.end());
00135 
00136 #ifdef DODS_DEBUG2
00137     cout << "unique:" << endl;
00138     for (int ii = 0; ii < nelem; ++ii)
00139         cout << "NAMES[" << ii << "]=" << names[ii] << endl;
00140 #endif
00141 
00142     // sort the array of names
00143     sort(names.begin(), names.end());
00144 
00145 #ifdef DODS_DEBUG2
00146     cout << "unique:" << endl;
00147     for (int ii = 0; ii < nelem; ++ii)
00148         cout << "NAMES[" << ii << "]=" << names[ii] << endl;
00149 #endif
00150 
00151     // look for any instance of consecutive names that are ==
00152     for (int j = 1; j < nelem; ++j) {
00153         if (names[j-1] == names[j]) {
00154             ostringstream oss;
00155             oss << "The variable `" << names[j]
00156             << "' is used more than once in " << type_name << " `"
00157             << var_name << "'";
00158             msg = oss.str();
00159 
00160             return false;
00161         }
00162     }
00163 
00164     return true;
00165 }
00166 
00167 const char *
00168 libdap_root()
00169 {
00170     return LIBDAP_ROOT;
00171 #if 0
00172     // I've changed this because this could be used to get the library to
00173     // use a different compression function when it builds compressed 
00174     // responses. The use of 'deflate' to compress responses should be
00175     // removed since Hyrax now uses Tomcat to perform this function.
00176     char *libdap_root = 0;
00177     return ((libdap_root = getenv("LIBDAP_ROOT")) ? libdap_root : LIBDAP_ROOT);
00178 #endif
00179 }
00180 
00181 extern "C"
00182     const char *
00183     libdap_version()
00184 {
00185     return PACKAGE_VERSION;
00186 }
00187 
00188 extern "C"
00189     const char *
00190     libdap_name()
00191 {
00192     return PACKAGE_NAME;
00193 }
00194 
00195 // Since Server4 can get compressed responses using Tomcat, bail on this
00196 // software (which complicates building under Win32). It can be turned on
00197 // for use with Server3 in configure.ac.
00198 
00199 #if COMPRESSION_FOR_SERVER3
00200 
00201 // Return true if the program deflate exists and is executable by user, group
00202 // and world. If this returns false the caller should assume that server
00203 // filter programs won't be able to find the deflate program and thus won't
00204 // be able to compress the return document.
00205 // NB: this works because this function uses the same rules as compressor()
00206 // (which follows) to look for deflate. 2/11/98 jhrg
00207 
00208 bool
00209 deflate_exists()
00210 {
00211     DBG(cerr << "Entering deflate_exists...");
00212 
00213     int status = false;
00214     struct stat buf;
00215 
00216 #ifdef WIN32
00217     string deflate = (string)libdap_root() + "\\bin\\deflate";
00218 #else
00219     string deflate = (string)libdap_root() + "/sbin/deflate";
00220 #endif
00221 
00222     // Check that the file exists...
00223     // First look for deflate using DODS_ROOT (compile-time constant subsumed
00224     // by an environment variable) and if that fails in the CWD which finds
00225     // the program when it is in the same directory as the dispatch script
00226     // and other server components. 2/11/98 jhrg
00227     status = (stat(deflate.c_str(), &buf) == 0)
00228 #ifdef WIN32
00229              || (stat(".\\deflate", &buf) == 0);
00230 #else
00231              || (stat("./deflate", &buf) == 0);
00232 #endif
00233 
00234     // and that it can be executed.
00235 #ifdef WIN32
00236     status &= (buf.st_mode & _S_IEXEC);
00237 #else
00238     status &= buf.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH);
00239 #endif
00240     DBG(cerr << " returning " << (status ? "true." : "false.") << endl);
00241     return (status != 0);
00242 }
00243 
00244 FILE *
00245 compressor(FILE *output, int &childpid)
00246 {
00247 #ifdef WIN32
00248     //  There is no such thing as a "fork" under win32. This makes it so that
00249     //  we have to juggle handles more aggressively. This code hasn't been
00250     //  tested and shown to work as of 07/2000.
00251     int pid, data[2];
00252     int hStdIn, hStdOut;
00253 
00254     if (_pipe(data, 512, O_BINARY | O_NOINHERIT) < 0) {
00255         cerr << "Could not create IPC channel for compressor process"
00256         << endl;
00257         return NULL;
00258     }
00259 
00260 
00261     // This sets up for the child process, but it has to be reversed for the
00262     // parent after the spawn takes place.
00263 
00264     // Store stdin, stdout so we have something to restore to
00265     hStdIn  = _dup(_fileno(stdin));
00266     hStdOut = _dup(_fileno(stdout));
00267 
00268     // Child is to read from read end of pipe
00269     if (_dup2(data[0], _fileno(stdin)) != 0) {
00270         cerr << "dup of child stdin failed" << endl;
00271         return NULL;
00272     }
00273     // Child is to write its's stdout to file
00274     if (_dup2(_fileno(output), _fileno(stdout)) != 0) {
00275         cerr << "dup of child stdout failed" << endl;
00276         return NULL;
00277     }
00278 
00279     // Spawn child process
00280     string deflate = "deflate.exe";
00281     if ((pid = _spawnlp(_P_NOWAIT, deflate.c_str(), deflate.c_str(),
00282                         "-c", "5", "-s", NULL)) < 0) {
00283         cerr << "Could not spawn to create compressor process" << endl;
00284         return NULL;
00285     }
00286 
00287     // Restore stdin, stdout for parent and close duplicate copies
00288     if (_dup2(hStdIn, _fileno(stdin)) != 0) {
00289         cerr << "dup of stdin failed" << endl;
00290         return NULL;
00291     }
00292     if (_dup2(hStdOut, _fileno(stdout)) != 0) {
00293         cerr << "dup of stdout failed" << endl;
00294         return NULL;
00295     }
00296     close(hStdIn);
00297     close(hStdOut);
00298 
00299     // Tell the parent that it reads from the opposite end of the
00300     // place where the child writes.
00301     close(data[0]);
00302     FILE *input = fdopen(data[1], "w");
00303     setbuf(input, 0);
00304     childpid = pid;
00305     return input;
00306 
00307 #else
00308     FILE *ret_file = NULL ;
00309 
00310     int pid, data[2];
00311 
00312     if (pipe(data) < 0) {
00313         cerr << "Could not create IPC channel for compressor process"
00314         << endl;
00315         return NULL;
00316     }
00317 
00318     if ((pid = fork()) < 0) {
00319         cerr << "Could not fork to create compressor process" << endl;
00320         return NULL;
00321     }
00322 
00323     // The parent process closes the write end of the Pipe, and creates a
00324     // FILE * using fdopen(). The FILE * is used by the calling program to
00325     // access the read end of the Pipe.
00326 
00327     if (pid > 0) {   // Parent, pid is that of the child
00328         close(data[0]);
00329         ret_file = fdopen(data[1], "w");
00330         setbuf(ret_file, 0);
00331         childpid = pid;
00332     }
00333     else {   // Child
00334         close(data[1]);
00335         dup2(data[0], 0); // Read from the pipe...
00336         dup2(fileno(output), 1); // Write to the FILE *output.
00337 
00338         DBG(cerr << "Opening compression stream." << endl);
00339 
00340         // First try to run deflate using DODS_ROOT (the value read from the
00341         // DODS_ROOT environment variable takes precedence over the value set
00342         // at build time. If that fails, try the CWD.
00343         string deflate = (string)libdap_root() + "/sbin/deflate";
00344         (void) execl(deflate.c_str(), "deflate", "-c",  "5", "-s", NULL);
00345         (void) execl("./deflate", "deflate", "-c",  "5", "-s", NULL);
00346         cerr << "Warning: Could not start compressor!" << endl;
00347         cerr << "defalte should be in DODS_ROOT/etc or in the CWD!"
00348         << endl;
00349         _exit(127);  // Only here if an error occurred.
00350     }
00351 
00352     return ret_file ;
00353 #endif
00354 }
00355 
00356 #endif // COMPRESSION_FOR_SERVER3
00357 
00358 // This function returns a pointer to the system time formated for an httpd
00359 // log file.
00360 
00361 string
00362 systime()
00363 {
00364     time_t TimBin;
00365 
00366     if (time(&TimBin) == (time_t) - 1)
00367         return string("time() error");
00368     else {
00369         string TimStr = ctime(&TimBin);
00370         return TimStr.substr(0, TimStr.size() - 2); // remove the \n
00371     }
00372 }
00373 
00374 void
00375 downcase(string &s)
00376 {
00377     for (unsigned int i = 0; i < s.length(); i++)
00378         s[i] = tolower(s[i]);
00379 }
00380 
00381 string
00382 remove_quotes(const string &s)
00383 {
00384     if (!s.empty() && s[0] == '\"' && s[s.length()-1] == '\"')
00385         return s.substr(1, s.length() - 2);
00386     else
00387         return s;
00388 }
00389 
00390 #ifdef WIN32
00391 //  Sometimes need to buffer within an iostream under win32 when
00392 //  we want the output to go to a FILE *.  This is because
00393 //  it's not possible to associate an ofstream with a FILE *
00394 //  under the Standard ANSI C++ Library spec.  Unix systems
00395 //  don't follow the spec in this regard.
00396 void flush_stream(iostream ios, FILE *out)
00397 {
00398     int nbytes;
00399     char buffer[512];
00400 
00401     ios.get(buffer, 512, NULL);
00402     while ((nbytes = ios.gcount()) > 0) {
00403         fwrite(buffer, 1, nbytes, out);
00404         ios.get(buffer, 512, NULL);
00405     }
00406 
00407     return;
00408 }
00409 #endif
00410 
00411 // Jose Garcia
00412 void
00413 append_long_to_string(long val, int base, string &str_val)
00414 {
00415     // The array digits contains 36 elements which are the
00416     // posible valid digits for out bases in the range
00417     // [2,36]
00418     char digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
00419     // result of val / base
00420     ldiv_t r;
00421 
00422     if (base > 36 || base < 2) {
00423         // no conversion if wrong base
00424         std::invalid_argument ex("The parameter base has an invalid value.");
00425         throw ex;
00426     }
00427     if (val < 0)
00428         str_val += '-';
00429     r = ldiv(labs(val), base);
00430 
00431     // output digits of val/base first
00432     if (r.quot > 0)
00433         append_long_to_string(r.quot, base, str_val);
00434 
00435     // output last digit
00436 
00437     str_val += digits[(int)r.rem];
00438 }
00439 
00440 // base defaults to 10
00441 string
00442 long_to_string(long val, int base)
00443 {
00444     string s;
00445     append_long_to_string(val, base, s);
00446     return s;
00447 }
00448 
00449 // Jose Garcia
00450 void append_double_to_string(const double &num, string &str)
00451 {
00452     // s having 100 characters should be enough for sprintf to do its job.
00453     // I want to banish all instances of sprintf. 10/5/2001 jhrg
00454     ostringstream oss;
00455     oss.precision(9);
00456     oss << num;
00457     str += oss.str();
00458 }
00459 
00460 string
00461 double_to_string(const double &num)
00462 {
00463     string s;
00464     append_double_to_string(num, s);
00465     return s;
00466 }
00467 
00468 // Get the version number of the core software. Defining this means that
00469 // clients of the DAP don't have to rely on config.h for the version
00470 // number.
00471 string
00472 dap_version()
00473 {
00474     return (string)"OPeNDAP DAP/" + libdap_version() + ": compiled on " + __DATE__ + ":" + __TIME__ ;
00475 }
00476 
00477 // Given a pathname, return the file at the end of the path. This is used
00478 // when reporting errors (maybe other times, too) to keep the server from
00479 // revealing too much about its organization when sending error responses
00480 // back to clients. 10/11/2000 jhrg
00481 // MT-safe. 08/05/02 jhrg
00482 
00483 #ifdef WIN32
00484 static const char path_sep[] =
00485     {"\\"
00486     };
00487 #else
00488 static const char path_sep[] =
00489     {"/"
00490     };
00491 #endif
00492 
00493 string
00494 path_to_filename(string path)
00495 {
00496     string::size_type pos = path.rfind(path_sep);
00497 
00498     return (pos == string::npos) ? path : path.substr(++pos);
00499 }
00500 
00501 #if 0
00502 // Look around for a reasonable place to put a temporary file. Check first
00503 // the value of the TMPDIR env var. If that does not yeild a path that's
00504 // writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as
00505 // defined in stdio.h. If both come up empty, then use `./'.
00506 //
00507 // This function allocates storage using new. The caller must delete the char
00508 // array.
00509 
00510 // Change this to a version that either returns a string or an open file
00511 // descriptor. Use information from https://buildsecurityin.us-cert.gov/
00512 // (see open()) to make it more secure. Ideal solution: get deserialize()
00513 // methods to read from a stream returned by libcurl, not from a temporary
00514 // file. 9/21/07 jhrg
00515 char *
00516 get_tempfile_template(char *file_template)
00517 {
00518     char *c;
00519     
00520 #ifdef WIN32
00521     // whitelist for a WIN32 directory
00522     Regex directory("[-a-zA-Z0-9_\\]*");
00523         
00524     c = getenv("TEMP");
00525     if (c && directory.match(c, strlen(c)) && (access(getenv("TEMP"), 6) == 0))
00526         goto valid_temp_directory;
00527 
00528     c= getenv("TMP");
00529     if (c && directory.match(c, strlen(c)) && (access(getenv("TEMP"), 6) == 0))
00530         goto valid_temp_directory;
00531 #else
00532         
00533         c = getenv("TMPDIR");
00534         // Changed this so that it uses the pathname_ok() method instead
00535         // of using its own regex. jhrg 2/4/08
00536         if (c) {
00537             string tmpdir = *c;
00538             if (pathname_ok(tmpdir) && (access(c, W_OK | R_OK) == 0))
00539                 goto valid_temp_directory;
00540         }
00541         
00542 #ifdef P_tmpdir
00543         if (access(P_tmpdir, W_OK | R_OK) == 0) {
00544         c = P_tmpdir;
00545         goto valid_temp_directory;
00546         }
00547 #endif
00548 
00549 #endif  // WIN32
00550 
00551     c = ".";
00552     
00553 valid_temp_directory:
00554         // Sanitize allocation
00555         int size = strlen(c) + strlen(file_template) + 2;
00556         if (!size_ok(1, size))
00557                 throw Error("Bad temporary file name.");
00558                 
00559     char *temp = new char[size];
00560     strncpy(temp, c, size-2);
00561     strcat(temp, "/");
00562 
00563     strcat(temp, file_template);
00564 
00565     return temp;
00566 }
00567 #endif
00568 
00574 #ifndef WIN32
00575 FILE *
00576 get_temp_file(char *temp)
00577 {
00578     int fd = mkstemp(temp);
00579     if (fd < 0)
00580         return 0;
00581     FILE *tmp = fdopen(fd, "a+");
00582     return tmp;
00583 }
00584 #endif
00585 
00590 string
00591 file_to_string(FILE *fp)
00592 {
00593     rewind(fp);
00594     ostringstream oss;
00595     char c;
00596     while (fread(&c, 1, 1, fp))
00597         oss << c;
00598     return oss.str();
00599 }
00600 
00603 
00609 bool
00610 size_ok(uint sz, uint nelem)
00611 {
00612     return (sz > 0 && nelem < UINT_MAX / sz);
00613 }
00614 
00631 bool
00632 pathname_ok(const string &path, bool strict)
00633 {
00634     if (path.length() > 255)
00635         return false;
00636     
00637     Regex name("[-0-9A-z_./]+");
00638     if (!strict)
00639         name = "[:print:]+";
00640         
00641     string::size_type len = path.length();
00642     int result = name.match(path.c_str(), len);
00643     // Protect against casting too big an uint to int
00644     // if LEN is bigger than the max int32, the second test can't work
00645     if (len > INT_MAX || result != static_cast<int>(len))
00646         return false;
00647  
00648     return true;
00649 }
00650 
00652 
00653 } // namespace libdap
00654 

Generated on Tue Mar 4 18:01:55 2008 for libdap++ by  doxygen 1.5.1