OPeNDAP Hyrax Back End Server (BES)  Updated for version 3.8.3
BESUtil.cc
Go to the documentation of this file.
1 // BESUtil.cc
2 
3 // This file is part of bes, A C++ back-end server implementation framework
4 // for the OPeNDAP Data Access Protocol.
5 
6 // Copyright (c) 2004-2009 University Corporation for Atmospheric Research
7 // Author: Patrick West <pwest@ucar.edu> and Jose Garcia <jgarcia@ucar.edu>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 //
23 // You can contact University Corporation for Atmospheric Research at
24 // 3080 Center Green Drive, Boulder, CO 80301
25 
26 // (c) COPYRIGHT University Corporation for Atmospheric Research 2004-2005
27 // Please read the full copyright statement in the file COPYRIGHT_UCAR.
28 //
29 // Authors:
30 // pwest Patrick West <pwest@ucar.edu>
31 // jgarcia Jose Garcia <jgarcia@ucar.edu>
32 
33 #include "config.h"
34 
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 
38 #if HAVE_UNISTD_H
39 #include <unistd.h>
40 #endif
41 
42 #include <cstdio>
43 #include <cerrno>
44 #include <cstring>
45 #include <cstdlib>
46 #include <sstream>
47 #include <iostream>
48 
49 using std::istringstream;
50 using std::cout;
51 using std::endl;
52 
53 #include "BESUtil.h"
54 #include "BESForbiddenError.h"
55 #include "BESNotFoundError.h"
56 #include "BESInternalError.h"
57 
58 #define CRLF "\r\n"
59 
64 void BESUtil::set_mime_text(ostream &strm) {
65  strm << "HTTP/1.0 200 OK" << CRLF;
66  strm << "XBES-Server: " << PACKAGE_STRING << CRLF;
67 
68  const time_t t = time(0);
69  strm << "Date: " << rfc822_date(t).c_str() << CRLF;
70  strm << "Last-Modified: " << rfc822_date(t).c_str() << CRLF;
71 
72  strm << "Content-Type: text/plain" << CRLF;
73  // Note that Content-Description is from RFC 2045 (MIME, pt 1), not 2616.
74  strm << "Content-Description: unknown" << CRLF;
75  strm << CRLF;
76 }
77 
82 void BESUtil::set_mime_html(ostream &strm) {
83  strm << "HTTP/1.0 200 OK" << CRLF;
84  strm << "XBES-Server: " << PACKAGE_STRING << CRLF;
85 
86  const time_t t = time(0);
87  strm << "Date: " << rfc822_date(t).c_str() << CRLF;
88  strm << "Last-Modified: " << rfc822_date(t).c_str() << CRLF;
89 
90  strm << "Content-type: text/html" << CRLF;
91  // Note that Content-Description is from RFC 2045 (MIME, pt 1), not 2616.
92  strm << "Content-Description: unknown" << CRLF;
93  strm << CRLF;
94 }
95 
96 // Return a MIME rfc-822 date. The grammar for this is:
97 // date-time = [ day "," ] date time ; dd mm yy
98 // ; hh:mm:ss zzz
99 //
100 // day = "Mon" / "Tue" / "Wed" / "Thu"
101 // / "Fri" / "Sat" / "Sun"
102 //
103 // date = 1*2DIGIT month 2DIGIT ; day month year
104 // ; e.g. 20 Jun 82
105 // NB: year is 4 digit; see RFC 1123. 11/30/99 jhrg
106 //
107 // month = "Jan" / "Feb" / "Mar" / "Apr"
108 // / "May" / "Jun" / "Jul" / "Aug"
109 // / "Sep" / "Oct" / "Nov" / "Dec"
110 //
111 // time = hour zone ; ANSI and Military
112 //
113 // hour = 2DIGIT ":" 2DIGIT [":" 2DIGIT]
114 // ; 00:00:00 - 23:59:59
115 //
116 // zone = "UT" / "GMT" ; Universal Time
117 // ; North American : UT
118 // / "EST" / "EDT" ; Eastern: - 5/ - 4
119 // / "CST" / "CDT" ; Central: - 6/ - 5
120 // / "MST" / "MDT" ; Mountain: - 7/ - 6
121 // / "PST" / "PDT" ; Pacific: - 8/ - 7
122 // / 1ALPHA ; Military: Z = UT;
123 // ; A:-1; (J not used)
124 // ; M:-12; N:+1; Y:+12
125 // / ( ("+" / "-") 4DIGIT ) ; Local differential
126 // ; hours+min. (HHMM)
127 
128 static const char *days[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" };
129 static const char *months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
130  "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
131 
141 string BESUtil::rfc822_date(const time_t t) {
142  struct tm *stm = gmtime(&t);
143  char d[256];
144 
145  snprintf(d, 255, "%s, %02d %s %4d %02d:%02d:%02d GMT", days[stm->tm_wday],
146  stm->tm_mday, months[stm->tm_mon], 1900 + stm->tm_year,
147  stm->tm_hour, stm->tm_min, stm->tm_sec);
148  d[255] = '\0';
149  return string(d);
150 }
151 
152 string BESUtil::unhexstring(string s) {
153  int val;
154  istringstream ss(s);
155  ss >> std::hex >> val;
156  char tmp_str[2];
157  tmp_str[0] = static_cast<char> (val);
158  tmp_str[1] = '\0';
159  return string(tmp_str);
160 }
161 
162 // I modified this to mirror the version in libdap. The change allows several
163 // escape sequences to by listed in 'except'. jhrg 2/18/09
164 string BESUtil::www2id(const string &in, const string &escape,
165  const string &except) {
166  string::size_type i = 0;
167  string res = in;
168  while ((i = res.find_first_of(escape, i)) != string::npos) {
169  if (except.find(res.substr(i, 3)) != string::npos) {
170  i += 3;
171  continue;
172  }
173  res.replace(i, 3, unhexstring(res.substr(i + 1, 2)));
174  }
175 
176  return res;
177 }
178 
179 string BESUtil::lowercase(const string &s) {
180  string return_string = s;
181  for (int j = 0; j < static_cast<int> (return_string.length()); j++) {
182  return_string[j] = (char) tolower(return_string[j]);
183  }
184 
185  return return_string;
186 }
187 
188 string BESUtil::unescape(const string &s) {
189  bool done = false;
190  string::size_type index = 0;
191  /* string::size_type new_index = 0 ; */
192  string new_str;
193  while (!done) {
194  string::size_type bs = s.find('\\', index);
195  if (bs == string::npos) {
196  new_str += s.substr(index, s.length() - index);
197  done = true;
198  } else {
199  new_str += s.substr(index, bs - index);
200  new_str += s[bs + 1];
201  index = bs + 2;
202  }
203  }
204 
205  return new_str;
206 }
207 
229 void BESUtil::check_path(const string &path, const string &root,
230  bool follow_sym_links) {
231  // if nothing is passed in path, then the path checks out since root is
232  // assumed to be valid.
233  if (path == "")
234  return;
235 
236  // make sure there are no ../ in the directory, backing up in any way is
237  // not allowed.
238  string::size_type dotdot = path.find("..");
239  if (dotdot != string::npos) {
240  string s = (string) "You are not allowed to access the node " + path;
241  throw BESForbiddenError(s, __FILE__, __LINE__);
242  }
243 
244  // What I want to do is to take each part of path and check to see if it
245  // is a symbolic link and it is accessible. If everything is ok, add the
246  // next part of the path.
247  bool done = false;
248 
249  // what is remaining to check
250  string rem = path;
251  if (rem[0] == '/')
252  rem = rem.substr(1, rem.length() - 1);
253  if (rem[rem.length() - 1] == '/')
254  rem = rem.substr(0, rem.length() - 1);
255 
256  // full path of the thing to check
257  string fullpath = root;
258  if (fullpath[fullpath.length() - 1] == '/') {
259  fullpath = fullpath.substr(0, fullpath.length() - 1);
260  }
261 
262  // path checked so far
263  string checked;
264 
265  while (!done) {
266  size_t slash = rem.find('/');
267  if (slash == string::npos) {
268  fullpath = fullpath + "/" + rem;
269  checked = checked + "/" + rem;
270  done = true;
271  } else {
272  fullpath = fullpath + "/" + rem.substr(0, slash);
273  checked = checked + "/" + rem.substr(0, slash);
274  rem = rem.substr(slash + 1, rem.length() - slash);
275  }
276 
277  if (!follow_sym_links) {
278  struct stat buf;
279  int statret = lstat(fullpath.c_str(), &buf);
280  if (statret == -1) {
281  int errsv = errno;
282  // stat failed, so not accessible. Get the error string,
283  // store in error, and throw exception
284  char *s_err = strerror(errsv);
285  string error = "Unable to access node " + checked + ": ";
286  if (s_err) {
287  error = error + s_err;
288  } else {
289  error = error + "unknown access error";
290  }
291  // ENOENT means that the node wasn't found. Otherwise, access
292  // is denied for some reason
293  if (errsv == ENOENT) {
294  throw BESNotFoundError(error, __FILE__, __LINE__);
295  } else {
296  throw BESForbiddenError(error, __FILE__, __LINE__);
297  }
298  } else {
299  // lstat was successful, now check if sym link
300  if (S_ISLNK( buf.st_mode )) {
301  string error = "You do not have permission to access "
302  + checked;
303  throw BESForbiddenError(error, __FILE__, __LINE__);
304  }
305  }
306  } else {
307  // just do a stat and see if we can access the thing. If we
308  // can't, get the error information and throw an exception
309  struct stat buf;
310  int statret = stat(fullpath.c_str(), &buf);
311  if (statret == -1) {
312  int errsv = errno;
313  // stat failed, so not accessible. Get the error string,
314  // store in error, and throw exception
315  char *s_err = strerror(errsv);
316  string error = "Unable to access node " + checked + ": ";
317  if (s_err) {
318  error = error + s_err;
319  } else {
320  error = error + "unknown access error";
321  }
322  // ENOENT means that the node wasn't found. Otherwise, access
323  // is denied for some reason
324  if (errsv == ENOENT) {
325  throw BESNotFoundError(error, __FILE__, __LINE__);
326  } else {
327  throw BESForbiddenError(error, __FILE__, __LINE__);
328  }
329  }
330  }
331  }
332 }
333 
334 char *
335 BESUtil::fastpidconverter(char *buf, int base) {
336  return fastpidconverter(getpid(), buf, base);
337 }
338 
339 char *
340 BESUtil::fastpidconverter(long val, /* value to be converted */
341 char *buf, /* output string */
342 int base) /* conversion base */
343 {
344  ldiv_t r; /* result of val / base */
345 
346  if (base > 36 || base < 2) /* no conversion if wrong base */
347  {
348  *buf = '\0';
349  return buf;
350  }
351  if (val < 0)
352  *buf++ = '-';
353  r = ldiv(labs(val), base);
354 
355  /* output digits of val/base first */
356 
357  if (r.quot > 0)
358  buf = fastpidconverter(r.quot, buf, base);
359  /* output last digit */
360 
361  *buf++ = "0123456789abcdefghijklmnopqrstuvwxyz"[(int) r.rem];
362  *buf = '\0';
363  return buf;
364 }
365 
367  if (!key.empty()) {
368  string::size_type first = key.find_first_not_of(" \t\n\r");
369  string::size_type last = key.find_last_not_of(" \t\n\r");
370  if (first == string::npos)
371  key = "";
372  else {
373  string::size_type num = last - first + 1;
374  string new_key = key.substr(first, num);
375  key = new_key;
376  }
377  }
378 }
379 
380 string BESUtil::entity(char c) {
381  switch (c) {
382  case '>':
383  return "&gt;";
384  case '<':
385  return "&lt;";
386  case '&':
387  return "&amp;";
388  case '\'':
389  return "&apos;";
390  case '\"':
391  return "&quot;";
392  default:
393  return string(1, c); // is this proper default, just the char?
394  }
395 }
396 
403 string BESUtil::id2xml(string in, const string &not_allowed) {
404  string::size_type i = 0;
405 
406  while ((i = in.find_first_of(not_allowed, i)) != string::npos) {
407  in.replace(i, 1, entity(in[i]));
408  i++;
409  }
410 
411  return in;
412 }
413 
419 string BESUtil::xml2id(string in) {
420  string::size_type i = 0;
421 
422  while ((i = in.find("&gt;", i)) != string::npos)
423  in.replace(i, 4, ">");
424 
425  i = 0;
426  while ((i = in.find("&lt;", i)) != string::npos)
427  in.replace(i, 4, "<");
428 
429  i = 0;
430  while ((i = in.find("&amp;", i)) != string::npos)
431  in.replace(i, 5, "&");
432 
433  i = 0;
434  while ((i = in.find("&apos;", i)) != string::npos)
435  in.replace(i, 6, "'");
436 
437  i = 0;
438  while ((i = in.find("&quot;", i)) != string::npos)
439  in.replace(i, 6, "\"");
440 
441  return in;
442 }
443 
457 void BESUtil::explode(char delim, const string &str, list<string> &values) {
458  std::string::size_type start = 0;
459  std::string::size_type qstart = 0;
460  std::string::size_type adelim = 0;
461  std::string::size_type aquote = 0;
462  bool done = false;
463  while (!done) {
464  string aval;
465  if (str[start] == '"') {
466  bool endquote = false;
467  qstart = start + 1;
468  while (!endquote) {
469  aquote = str.find('"', qstart);
470  if (aquote == string::npos) {
471  string currval = str.substr(start, str.length() - start);
472  string err = "BESUtil::explode - No end quote after value "
473  + currval;
474  throw BESInternalError(err, __FILE__, __LINE__);
475  }
476  // could be an escaped escape character and an escaped
477  // quote, or an escaped escape character and a quote
478  if (str[aquote - 1] == '\\') {
479  if (str[aquote - 2] == '\\') {
480  endquote = true;
481  qstart = aquote + 1;
482  } else {
483  qstart = aquote + 1;
484  }
485  } else {
486  endquote = true;
487  qstart = aquote + 1;
488  }
489  }
490  if (str[qstart] != delim && qstart != str.length()) {
491  string currval = str.substr(start, qstart - start);
492  string err = "BESUtil::explode - No delim after end quote "
493  + currval;
494  throw BESInternalError(err, __FILE__, __LINE__);
495  }
496  if (qstart == str.length()) {
497  adelim = string::npos;
498  } else {
499  adelim = qstart;
500  }
501  } else {
502  adelim = str.find(delim, start);
503  }
504  if (adelim == string::npos) {
505  aval = str.substr(start, str.length() - start);
506  done = true;
507  } else {
508  aval = str.substr(start, adelim - start);
509  }
510 
511  values.push_back(aval);
512  start = adelim + 1;
513  if (start == str.length()) {
514  values.push_back("");
515  done = true;
516  }
517  }
518 }
519 
530 string BESUtil::implode(const list<string> &values, char delim) {
531  string result;
532  list<string>::const_iterator i = values.begin();
533  list<string>::const_iterator e = values.end();
534  bool first = true;
535  string::size_type d; // = string::npos ;
536  for (; i != e; i++) {
537  if (!first)
538  result += delim;
539  d = (*i).find(delim);
540  if (d != string::npos && (*i)[0] != '"') {
541  string err =
542  (string) "BESUtil::implode - delimiter exists in value "
543  + (*i);
544  throw BESInternalError(err, __FILE__, __LINE__);
545  }
546  //d = string::npos ;
547  result += (*i);
548  first = false;
549  }
550  return result;
551 }
552 
572 void BESUtil::url_explode(const string &url_str, BESUtil::url &url_parts) {
573  string rest;
574 
575  string::size_type colon = url_str.find(":");
576  if (colon == string::npos) {
577  string err = "BESUtil::url_explode: missing colon for protocol";
578  throw BESInternalError(err, __FILE__, __LINE__);
579  }
580 
581  url_parts.protocol = url_str.substr(0, colon);
582 
583  if (url_str.substr(colon, 3) != "://") {
584  string err = "BESUtil::url_explode: no :// in the URL";
585  throw BESInternalError(err, __FILE__, __LINE__);
586  }
587 
588  colon += 3;
589  rest = url_str.substr(colon);
590 
591  string::size_type slash = rest.find("/");
592  if (slash == string::npos)
593  slash = rest.length();
594 
595  string::size_type at = rest.find("@");
596  if ((at != string::npos) && (at < slash)) {
597  // everything before the @ is username:password
598  string up = rest.substr(0, at);
599  colon = up.find(":");
600  if (colon != string::npos) {
601  url_parts.uname = up.substr(0, colon);
602  url_parts.psswd = up.substr(colon + 1);
603  } else {
604  url_parts.uname = up;
605  }
606  // everything after the @ is domain/path
607  rest = rest.substr(at + 1);
608  }
609  slash = rest.find("/");
610  if (slash == string::npos)
611  slash = rest.length();
612  colon = rest.find(":");
613  if ((colon != string::npos) && (colon < slash)) {
614  // everything before the colon is the domain
615  url_parts.domain = rest.substr(0, colon);
616  // everything after the folon is port/path
617  rest = rest.substr(colon + 1);
618  slash = rest.find("/");
619  if (slash != string::npos) {
620  url_parts.port = rest.substr(0, slash);
621  url_parts.path = rest.substr(slash + 1);
622  } else {
623  url_parts.port = rest;
624  url_parts.path = "";
625  }
626  } else {
627  slash = rest.find("/");
628  if (slash != string::npos) {
629  url_parts.domain = rest.substr(0, slash);
630  url_parts.path = rest.substr(slash + 1);
631  } else {
632  url_parts.domain = rest;
633  }
634  }
635 }
636 
637 string BESUtil::url_create(BESUtil::url &url_parts) {
638  string url = url_parts.protocol + "://";
639  if (!url_parts.uname.empty()) {
640  url += url_parts.uname;
641  if (!url_parts.psswd.empty())
642  url += ":" + url_parts.psswd;
643  url += "@";
644  }
645  url += url_parts.domain;
646  if (!url_parts.port.empty())
647  url += ":" + url_parts.port;
648  if (!url_parts.path.empty())
649  url += "/" + url_parts.path;
650 
651  return url;
652 }
653 
error thrown if the resource requested cannot be found
static string id2xml(string in, const string &not_allowed="><&'\"")
convert characters not allowed in xml to escaped characters
Definition: BESUtil.cc:403
exception thrown if inernal error encountered
static string lowercase(const string &s)
Convert a string to all lower case.
Definition: BESUtil.cc:179
static string www2id(const string &in, const string &escape="%", const string &except="")
This functions are used to unescape hex characters from strings.
Definition: BESUtil.cc:164
static void removeLeadingAndTrailingBlanks(string &key)
remove leading and trailing blanks from a string
Definition: BESUtil.cc:366
static string implode(const list< string > &values, char delim)
implode a list of values into a single string delimited by delim
Definition: BESUtil.cc:530
string port
Definition: BESUtil.h:112
static string xml2id(string in)
unescape xml escaped characters
Definition: BESUtil.cc:419
static void set_mime_html(ostream &strm)
Generate an HTTP 1.0 response header for a html document.
Definition: BESUtil.cc:82
static void explode(char delim, const string &str, list< string > &values)
explode a string into an array given a delimiter
Definition: BESUtil.cc:457
#define CRLF
Definition: BESUtil.cc:58
static void set_mime_text(ostream &strm)
Generate an HTTP 1.0 response header for a text document.
Definition: BESUtil.cc:64
static string unhexstring(string s)
Definition: BESUtil.cc:152
static void url_explode(const string &url_str, BESUtil::url &url_parts)
Given a url, break the url into its different parts.
Definition: BESUtil.cc:572
string path
Definition: BESUtil.h:113
error thrown if the BES is not allowed to access the resource requested
static string url_create(BESUtil::url &url_parts)
Definition: BESUtil.cc:637
string protocol
Definition: BESUtil.h:108
static char * fastpidconverter(char *buf, int base)
convert pid and place in provided buffer
Definition: BESUtil.cc:335
string uname
Definition: BESUtil.h:110
string psswd
Definition: BESUtil.h:111
static string unescape(const string &s)
Unescape characters with backslash before them.
Definition: BESUtil.cc:188
#define PACKAGE_STRING
Definition: config.h:138
if(!yyvaluep) return
static void check_path(const string &path, const string &root, bool follow_sym_links)
Check if the specified path is valid.
Definition: BESUtil.cc:229
string domain
Definition: BESUtil.h:109