util.cc

Go to the documentation of this file.
00001 
00002 // -*- mode: c++; c-basic-offset:4 -*-
00003 
00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
00005 // Access Protocol.
00006 
00007 // Copyright (c) 2002,2003 OPeNDAP, Inc.
00008 // Author: James Gallagher <jgallagher@opendap.org>
00009 //
00010 // This library is free software; you can redistribute it and/or
00011 // modify it under the terms of the GNU Lesser General Public
00012 // License as published by the Free Software Foundation; either
00013 // version 2.1 of the License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful,
00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00023 //
00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
00025 
00026 // (c) COPYRIGHT URI/MIT 1994-1999
00027 // Please read the full copyright statement in the file COPYRIGHT_URI.
00028 //
00029 // Authors:
00030 //      jhrg,jimg       James Gallagher <jgallagher@gso.uri.edu>
00031 
00032 // Utility functions used by the api.
00033 //
00034 // jhrg 9/21/94
00035 
00036 #include "config.h"
00037 
00038 static char rcsid[] not_used =
00039     {"$Id: util.cc 17360 2007-11-08 16:05:18Z pwest $"
00040     };
00041 
00042 #include <stdio.h>
00043 #include <stdlib.h>
00044 #include <assert.h>
00045 #include <ctype.h>
00046 #ifndef TM_IN_SYS_TIME
00047 #include <time.h>
00048 #else
00049 #include <sys/time.h>
00050 #endif
00051 
00052 #ifndef WIN32
00053 #include <unistd.h>
00054 #else
00055 #include <io.h>
00056 #include <fcntl.h>
00057 #include <process.h>
00058 #endif
00059 
00060 #include <sys/types.h>
00061 #include <sys/stat.h>
00062 
00063 #include <string>
00064 #include <sstream>
00065 #include <vector>
00066 #include <algorithm>
00067 #include <stdexcept>
00068 
00069 #include "BaseType.h"
00070 #include "Str.h"
00071 #include "Url.h"
00072 #include "Sequence.h"
00073 #include "Error.h"
00074 #include "parser.h"
00075 #include "util.h"
00076 #include "GNURegex.h"
00077 #include "debug.h"
00078 
00079 
00080 using namespace std;
00081 
00082 // Remove spaces from the start of a URL and from the start of any constraint
00083 // expression it contains. 4/7/98 jhrg
00084 
00093 string
00094 prune_spaces(const string &name)
00095 {
00096     // If the URL does not even have white space return.
00097     if (name.find_first_of(' ') == name.npos)
00098         return name;
00099     else {
00100         // Strip leading spaces from http://...
00101         unsigned int i = name.find_first_not_of(' ');
00102         string tmp_name = name.substr(i);
00103 
00104         // Strip leading spaces from constraint part (following `?').
00105         unsigned int j = tmp_name.find('?') + 1;
00106         i = tmp_name.find_first_not_of(' ', j);
00107         tmp_name.erase(j, i - j);
00108 
00109         return tmp_name;
00110     }
00111 }
00112 
00113 // Compare elements in a list of (BaseType *)s and return true if there are
00114 // no duplicate elements, otherwise return false.
00115 
00116 bool
00117 unique_names(vector<BaseType *> l, const string &var_name,
00118              const string &type_name, string &msg)
00119 {
00120     // copy the identifier names to a vector
00121     vector<string> names(l.size());
00122 
00123     int nelem = 0;
00124     typedef std::vector<BaseType *>::const_iterator citer ;
00125     for (citer i = l.begin(); i != l.end(); i++) {
00126         assert(*i);
00127         names[nelem++] = (*i)->name();
00128         DBG(cerr << "NAMES[" << nelem - 1 << "]=" << names[nelem-1] << endl);
00129     }
00130 
00131     // sort the array of names
00132     sort(names.begin(), names.end());
00133 
00134 #ifdef DODS_DEBUG2
00135     cout << "unique:" << endl;
00136     for (int ii = 0; ii < nelem; ++ii)
00137         cout << "NAMES[" << ii << "]=" << names[ii] << endl;
00138 #endif
00139 
00140     // sort the array of names
00141     sort(names.begin(), names.end());
00142 
00143 #ifdef DODS_DEBUG2
00144     cout << "unique:" << endl;
00145     for (int ii = 0; ii < nelem; ++ii)
00146         cout << "NAMES[" << ii << "]=" << names[ii] << endl;
00147 #endif
00148 
00149     // look for any instance of consecutive names that are ==
00150     for (int j = 1; j < nelem; ++j) {
00151         if (names[j-1] == names[j]) {
00152             ostringstream oss;
00153             oss << "The variable `" << names[j]
00154             << "' is used more than once in " << type_name << " `"
00155             << var_name << "'";
00156             msg = oss.str();
00157 
00158             return false;
00159         }
00160     }
00161 
00162     return true;
00163 }
00164 
00165 const char *
00166 libdap_root()
00167 {
00168     return LIBDAP_ROOT;
00169 #if 0
00170     // I've changed this because this could be used to get the library to
00171     // use a different compression function when it builds compressed 
00172     // responses. The use of 'deflate' to compress responses should be
00173     // removed since Hyrax now uses Tomcat to perform this function.
00174     char *libdap_root = 0;
00175     return ((libdap_root = getenv("LIBDAP_ROOT")) ? libdap_root : LIBDAP_ROOT);
00176 #endif
00177 }
00178 
00179 extern "C"
00180     const char *
00181     libdap_version()
00182 {
00183     return PACKAGE_VERSION;
00184 }
00185 
00186 extern "C"
00187     const char *
00188     libdap_name()
00189 {
00190     return PACKAGE_NAME;
00191 }
00192 
00193 // Since Server4 can get compressed responses using Tomcat, bail on this
00194 // software (which complicates building under Win32). It can be turned on
00195 // for use with Server3 in configure.ac.
00196 
00197 #if COMPRESSION_FOR_SERVER3
00198 
00199 // Return true if the program deflate exists and is executable by user, group
00200 // and world. If this returns false the caller should assume that server
00201 // filter programs won't be able to find the deflate program and thus won't
00202 // be able to compress the return document.
00203 // NB: this works because this function uses the same rules as compressor()
00204 // (which follows) to look for deflate. 2/11/98 jhrg
00205 
00206 bool
00207 deflate_exists()
00208 {
00209     DBG(cerr << "Entering deflate_exists...");
00210 
00211     int status = false;
00212     struct stat buf;
00213 
00214 #ifdef WIN32
00215     string deflate = (string)libdap_root() + "\\bin\\deflate";
00216 #else
00217     string deflate = (string)libdap_root() + "/sbin/deflate";
00218 #endif
00219 
00220     // Check that the file exists...
00221     // First look for deflate using DODS_ROOT (compile-time constant subsumed
00222     // by an environment variable) and if that fails in the CWD which finds
00223     // the program when it is in the same directory as the dispatch script
00224     // and other server components. 2/11/98 jhrg
00225     status = (stat(deflate.c_str(), &buf) == 0)
00226 #ifdef WIN32
00227              || (stat(".\\deflate", &buf) == 0);
00228 #else
00229              || (stat("./deflate", &buf) == 0);
00230 #endif
00231 
00232     // and that it can be executed.
00233 #ifdef WIN32
00234     status &= (buf.st_mode & _S_IEXEC);
00235 #else
00236     status &= buf.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH);
00237 #endif
00238     DBG(cerr << " returning " << (status ? "true." : "false.") << endl);
00239     return (status != 0);
00240 }
00241 
00242 FILE *
00243 compressor(FILE *output, int &childpid)
00244 {
00245 #ifdef WIN32
00246     //  There is no such thing as a "fork" under win32. This makes it so that
00247     //  we have to juggle handles more aggressively. This code hasn't been
00248     //  tested and shown to work as of 07/2000.
00249     int pid, data[2];
00250     int hStdIn, hStdOut;
00251 
00252     if (_pipe(data, 512, O_BINARY | O_NOINHERIT) < 0) {
00253         cerr << "Could not create IPC channel for compressor process"
00254         << endl;
00255         return NULL;
00256     }
00257 
00258 
00259     // This sets up for the child process, but it has to be reversed for the
00260     // parent after the spawn takes place.
00261 
00262     // Store stdin, stdout so we have something to restore to
00263     hStdIn  = _dup(_fileno(stdin));
00264     hStdOut = _dup(_fileno(stdout));
00265 
00266     // Child is to read from read end of pipe
00267     if (_dup2(data[0], _fileno(stdin)) != 0) {
00268         cerr << "dup of child stdin failed" << endl;
00269         return NULL;
00270     }
00271     // Child is to write its's stdout to file
00272     if (_dup2(_fileno(output), _fileno(stdout)) != 0) {
00273         cerr << "dup of child stdout failed" << endl;
00274         return NULL;
00275     }
00276 
00277     // Spawn child process
00278     string deflate = "deflate.exe";
00279     if ((pid = _spawnlp(_P_NOWAIT, deflate.c_str(), deflate.c_str(),
00280                         "-c", "5", "-s", NULL)) < 0) {
00281         cerr << "Could not spawn to create compressor process" << endl;
00282         return NULL;
00283     }
00284 
00285     // Restore stdin, stdout for parent and close duplicate copies
00286     if (_dup2(hStdIn, _fileno(stdin)) != 0) {
00287         cerr << "dup of stdin failed" << endl;
00288         return NULL;
00289     }
00290     if (_dup2(hStdOut, _fileno(stdout)) != 0) {
00291         cerr << "dup of stdout failed" << endl;
00292         return NULL;
00293     }
00294     close(hStdIn);
00295     close(hStdOut);
00296 
00297     // Tell the parent that it reads from the opposite end of the
00298     // place where the child writes.
00299     close(data[0]);
00300     FILE *input = fdopen(data[1], "w");
00301     setbuf(input, 0);
00302     childpid = pid;
00303     return input;
00304 
00305 #else
00306     FILE *ret_file = NULL ;
00307 
00308     int pid, data[2];
00309 
00310     if (pipe(data) < 0) {
00311         cerr << "Could not create IPC channel for compressor process"
00312         << endl;
00313         return NULL;
00314     }
00315 
00316     if ((pid = fork()) < 0) {
00317         cerr << "Could not fork to create compressor process" << endl;
00318         return NULL;
00319     }
00320 
00321     // The parent process closes the write end of the Pipe, and creates a
00322     // FILE * using fdopen(). The FILE * is used by the calling program to
00323     // access the read end of the Pipe.
00324 
00325     if (pid > 0) {   // Parent, pid is that of the child
00326         close(data[0]);
00327         ret_file = fdopen(data[1], "w");
00328         setbuf(ret_file, 0);
00329         childpid = pid;
00330     }
00331     else {   // Child
00332         close(data[1]);
00333         dup2(data[0], 0); // Read from the pipe...
00334         dup2(fileno(output), 1); // Write to the FILE *output.
00335 
00336         DBG(cerr << "Opening compression stream." << endl);
00337 
00338         // First try to run deflate using DODS_ROOT (the value read from the
00339         // DODS_ROOT environment variable takes precedence over the value set
00340         // at build time. If that fails, try the CWD.
00341         string deflate = (string)libdap_root() + "/sbin/deflate";
00342         (void) execl(deflate.c_str(), "deflate", "-c",  "5", "-s", NULL);
00343         (void) execl("./deflate", "deflate", "-c",  "5", "-s", NULL);
00344         cerr << "Warning: Could not start compressor!" << endl;
00345         cerr << "defalte should be in DODS_ROOT/etc or in the CWD!"
00346         << endl;
00347         _exit(127);  // Only here if an error occurred.
00348     }
00349 
00350     return ret_file ;
00351 #endif
00352 }
00353 
00354 #endif // COMPRESSION_FOR_SERVER3
00355 
00356 // This function returns a pointer to the system time formated for an httpd
00357 // log file.
00358 
00359 string
00360 systime()
00361 {
00362     time_t TimBin;
00363 
00364     if (time(&TimBin) == (time_t) - 1)
00365         return string("time() error");
00366     else {
00367         string TimStr = ctime(&TimBin);
00368         return TimStr.substr(0, TimStr.size() - 2); // remove the \n
00369     }
00370 }
00371 
00372 void
00373 downcase(string &s)
00374 {
00375     for (unsigned int i = 0; i < s.length(); i++)
00376         s[i] = tolower(s[i]);
00377 }
00378 
00379 #ifdef WIN32
00380 //  Sometimes need to buffer within an iostream under win32 when
00381 //  we want the output to go to a FILE *.  This is because
00382 //  it's not possible to associate an ofstream with a FILE *
00383 //  under the Standard ANSI C++ Library spec.  Unix systems
00384 //  don't follow the spec in this regard.
00385 void flush_stream(iostream ios, FILE *out)
00386 {
00387     int nbytes;
00388     char buffer[512];
00389 
00390     ios.get(buffer, 512, NULL);
00391     while ((nbytes = ios.gcount()) > 0) {
00392         fwrite(buffer, 1, nbytes, out);
00393         ios.get(buffer, 512, NULL);
00394     }
00395 
00396     return;
00397 }
00398 #endif
00399 
00400 // Jose Garcia
00401 void
00402 append_long_to_string(long val, int base, string &str_val)
00403 {
00404     // The array digits contains 36 elements which are the
00405     // posible valid digits for out bases in the range
00406     // [2,36]
00407     char digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
00408     // result of val / base
00409     ldiv_t r;
00410 
00411     if (base > 36 || base < 2) {
00412         // no conversion if wrong base
00413         std::invalid_argument ex("The parameter base has an invalid value.");
00414         throw ex;
00415     }
00416     if (val < 0)
00417         str_val += '-';
00418     r = ldiv(labs(val), base);
00419 
00420     // output digits of val/base first
00421     if (r.quot > 0)
00422         append_long_to_string(r.quot, base, str_val);
00423 
00424     // output last digit
00425 
00426     str_val += digits[(int)r.rem];
00427 }
00428 
00429 // base defaults to 10
00430 string
00431 long_to_string(long val, int base)
00432 {
00433     string s;
00434     append_long_to_string(val, base, s);
00435     return s;
00436 }
00437 
00438 // Jose Garcia
00439 void append_double_to_string(const double &num, string &str)
00440 {
00441     // s having 100 characters should be enough for sprintf to do its job.
00442     // I want to banish all instances of sprintf. 10/5/2001 jhrg
00443     ostringstream oss;
00444     oss.precision(9);
00445     oss << num;
00446     str += oss.str();
00447 }
00448 
00449 string
00450 double_to_string(const double &num)
00451 {
00452     string s;
00453     append_double_to_string(num, s);
00454     return s;
00455 }
00456 
00457 // Get the version number of the core software. Defining this means that
00458 // clients of the DAP don't have to rely on config.h for the version
00459 // number.
00460 string
00461 dap_version()
00462 {
00463     return (string)"OPeNDAP DAP/" + libdap_version() + ": compiled on " + __DATE__ + ":" + __TIME__ ;
00464 }
00465 
00466 // Given a pathname, return the file at the end of the path. This is used
00467 // when reporting errors (maybe other times, too) to keep the server from
00468 // revealing too much about its organization when sending error responses
00469 // back to clients. 10/11/2000 jhrg
00470 // MT-safe. 08/05/02 jhrg
00471 
00472 #ifdef WIN32
00473 static const char path_sep[] =
00474     {"\\"
00475     };
00476 #else
00477 static const char path_sep[] =
00478     {"/"
00479     };
00480 #endif
00481 
00482 string
00483 path_to_filename(string path)
00484 {
00485     string::size_type pos = path.rfind(path_sep);
00486 
00487     return (pos == string::npos) ? path : path.substr(++pos);
00488 }
00489 
00490 // Look around for a reasonable place to put a temporary file. Check first
00491 // the value of the TMPDIR env var. If that does not yeild a path that's
00492 // writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as
00493 // defined in stdio.h. If both come up empty, then use `./'.
00494 //
00495 // This function allocates storage using new. The caller must delete the char
00496 // array.
00497 
00498 // Change this to a version that either returns a string or an open file
00499 // descriptor. Use information from https://buildsecurityin.us-cert.gov/
00500 // (see open()) to make it more secure. Ideal solution: get deserialize()
00501 // methods to read from a stream returned by libcurl, not from a temporary
00502 // file. 9/21/07 jhrg
00503 char *
00504 get_tempfile_template(char *file_template)
00505 {
00506     char *c;
00507     
00508 #ifdef WIN32
00509     // whitelist for a WIN32 directory
00510     Regex directory("[-a-zA-Z0-9_\\]*");
00511         
00512     c = getenv("TEMP");
00513     if (c && directory.match(c, strlen(c)) && (access(getenv("TEMP"), 6) == 0)
00514         goto valid_temp_directory;
00515 
00516     c= getenv("TMP");
00517     if (c && directory.match(c, strlen(c)) && (access(getenv("TEMP"), 6) == 0)
00518         goto valid_temp_directory;
00519 #else
00520         // whitelist for a directory
00521         Regex directory("[-a-zA-Z0-9_/]*");
00522         
00523         c = getenv("TMPDIR");
00524         if (c && directory.match(c, strlen(c)) && (access(c, W_OK | R_OK) == 0))
00525         goto valid_temp_directory;
00526 
00527 #ifdef P_tmpdir
00528         if (access(P_tmpdir, W_OK | R_OK) == 0) {
00529         c = P_tmpdir;
00530         goto valid_temp_directory;
00531         }
00532 #endif
00533 
00534 #endif  // WIN32
00535 
00536     c = ".";
00537     
00538 valid_temp_directory:
00539         // Sanitize allocation
00540         int size = strlen(c) + strlen(file_template) + 2;
00541         if (!size_ok(1, size))
00542                 throw Error("Bad temporary file name.");
00543                 
00544     char *temp = new char[size];
00545     strncpy(temp, c, size-2);
00546     strcat(temp, "/");
00547 
00548     strcat(temp, file_template);
00549 
00550     return temp;
00551 }
00552 
00558 #ifndef WIN32
00559 FILE *
00560 get_temp_file(char *temp)
00561 {
00562     int fd = mkstemp(temp);
00563     if (fd < 0)
00564         return 0;
00565     FILE *tmp = fdopen(fd, "a+");
00566     return tmp;
00567 }
00568 #endif
00569 
00574 string
00575 file_to_string(FILE *fp)
00576 {
00577     rewind(fp);
00578     ostringstream oss;
00579     char c;
00580     while (fread(&c, 1, 1, fp))
00581         oss << c;
00582     return oss.str();
00583 }
00584 
00587 
00593 bool
00594 size_ok(uint sz, uint nelem)
00595 {
00596     return (sz > 0 && nelem < UINT_MAX / sz);
00597 }
00598 
00615 bool
00616 pathname_ok(const string &path, bool strict)
00617 {
00618     if (path.length() > 255)
00619         return false;
00620     
00621     Regex name("[0-9A-z_./-]+");
00622     if (!strict)
00623         name = "[:print:]+";
00624         
00625     string::size_type len = path.length();
00626     int result = name.match(path.c_str(), len);
00627     if (result != len)
00628         return false;
00629  
00630     return true;
00631 }
00632 

Generated on Wed Nov 14 03:15:44 2007 for libdap++ by  doxygen 1.5.1