vul_url.cxx
Go to the documentation of this file.
1 // This is core/vul/vul_url.cxx
2 //:
3 // \file
4 // \author Ian Scott
5 // Based on vil_stream_url by fsm
6 // \verbatim
7 // Modifications
8 // 8 Nov 2002 - Peter Vanroose - corrected HTTP client request syntax
9 // \endverbatim
10 
11 #include <cstdio>
12 #include <cstring>
13 #include <cstdlib>
14 #include <sstream>
15 #include <fstream>
16 #include <iostream>
17 #include "vul_url.h"
18 #ifdef _MSC_VER
19 # include <vcl_msvc_warnings.h>
20 #endif
21 #include <cassert>
22 #include <vul/vul_file.h>
23 
24 #if defined (_WIN32) && !defined(__CYGWIN__)
25 # include <winsock2.h>
26 #else
27 # include <unistd.h> // read(), write(), close()
28 # include <netdb.h> // gethostbyname(), sockaddr_in()
29 # include <sys/socket.h>
30 # include <netinet/in.h> // htons()
31 # define SOCKET int
32 #endif // unix
33 
34 #if defined(_WIN32) && !defined(__CYGWIN__)
35 // So that we don't call WSAStartup more than we need to
36 static int called_WSAStartup = 0;
37 #endif
38 
39 //: only call this method with a correctly formatted http URL
40 std::istream * vul_http_open(char const *url)
41 {
42  // split URL into auth, host, path and port number.
43  std::string host;
44  std::string path;
45  std::string auth;
46  int port = 80; // default
47 
48  // check it is an http URL.
49  assert (std::strncmp(url, "http://", 7) == 0);
50 
51  char const *p = url + 7;
52  while (*p && *p!='/')
53  ++ p;
54  host = std::string(url+7, p);
55 
56 
57  if (*p)
58  path = p+1;
59  else
60  path = "";
61 
62  //authentication
63  for (unsigned int i=0; i<host.size(); ++i)
64  if (host[i] == '@') {
65  auth = std::string(host.c_str(), host.c_str()+i);
66  host = std::string(host.c_str()+i+1, host.c_str() + host.size());
67  break;
68  }
69 
70  // port?
71  if (host.size() > 0L)
72  for (unsigned int i=(unsigned int)(host.size()-1); i>0; --i)
73  if (host[i] == ':') {
74  port = std::atoi(host.c_str() + i + 1);
75  host = std::string(host.c_str(), host.c_str() + i);
76  break;
77  }
78 
79  // do character translation
80  unsigned k =0;
81  while (k < path.size())
82  {
83  if (path[k] == ' ')
84  path.replace(k, 1, "%20");
85  else if (path[k] == '%')
86  path.replace(k, 1, "%25");
87  ++k;
88  }
89 
90  // so far so good.
91 #ifdef DEBUG
92  std::cerr << "auth = \'" << auth << "\'\n"
93  << "host = \'" << host << "\'\n"
94  << "path = \'" << path << "\'\n"
95  << "port = " << port << std::endl;
96 #endif
97 
98 #if defined(_WIN32) && !defined(__CYGWIN__)
99  if (called_WSAStartup==0)
100  {
101  WORD wVersionRequested;
102  WSADATA wsaData;
103 
104  wVersionRequested = MAKEWORD( 2, 2 );
105 
106  /* int err = */ WSAStartup( wVersionRequested, &wsaData );
107  }
108 #endif
109 
110  // create socket endpoint.
111  SOCKET tcp_socket = socket(PF_INET, // IPv4 protocols.
112  SOCK_STREAM, // two-way, reliable,
113  // connection-based stream socket.
114  PF_UNSPEC); // protocol number.
115 #if defined(_WIN32) && !defined(__CYGWIN__)
116  if (tcp_socket == INVALID_SOCKET) {
117 # ifndef NDEBUG
118  std::cerr << __FILE__ "error code : " << WSAGetLastError() << '\n';
119 # endif
120 #else
121  if (tcp_socket < 0) {
122 #endif
123  std::cerr << __FILE__ ": failed to create socket.\n";
124  return nullptr;
125  }
126 
127 #ifdef DEBUG
128  std::cerr << __FILE__ ": tcp_socket = " << tcp_socket << '\n';
129 #endif
130 
131  // get network address of server.
132  hostent *hp = gethostbyname(host.c_str());
133  if (! hp) {
134  std::cerr << __FILE__ ": failed to lookup host\n";
135 
136 #if defined(_WIN32) && !defined(__CYGWIN__)
137  closesocket(tcp_socket);
138 #else
139  close(tcp_socket);
140 #endif
141 
142  return nullptr;
143  }
144 
145  // make socket address.
146  sockaddr_in my_addr;
147  my_addr.sin_family = AF_INET;
148  // convert port number to network byte order..
149  my_addr.sin_port = htons(port);
150  std::memcpy(&my_addr.sin_addr, hp->h_addr_list[0], hp->h_length);
151 
152  // connect to server.
153  if (connect(tcp_socket , (sockaddr *) &my_addr, sizeof my_addr) < 0) {
154  std::cerr << __FILE__ ": failed to connect to host\n";
155  //perror(__FILE__);
156 
157 #if defined(_WIN32) && !defined(__CYGWIN__)
158  closesocket(tcp_socket);
159 #else
160  close(tcp_socket);
161 #endif
162 
163  return nullptr;
164  }
165 
166  // buffer for data transfers over socket.
167  char buffer[4096];
168 
169  // send HTTP 1.1 request.
170  std::snprintf(buffer, 4090-std::strlen(buffer),
171  "GET %s HTTP/1.1\r\nUser-Agent: vul_url\r\nHost: %s\r\nAccept: */*\r\n",
172  url, host.c_str());
173 
174  if (auth != "")
175  std::snprintf(buffer+std::strlen(buffer), 4090-std::strlen(buffer),
176  "Authorization: Basic %s\r\n",
177  vul_url::encode_base64(auth).c_str());
178 
179  if (std::snprintf(buffer+std::strlen(buffer), 4090-std::strlen(buffer), "\r\n") < 0)
180  {
181  std::cerr << "ERROR: vul_http_open buffer overflow.";
182  std::abort();
183  }
184 
185 #if defined(_WIN32) && !defined(__CYGWIN__)
186  if (send(tcp_socket, buffer, (int)std::strlen(buffer), 0) < 0) {
187 #else
188  if (::write(tcp_socket, buffer, std::strlen(buffer)) < 0) {
189 #endif
190  std::cerr << __FILE__ ": error sending HTTP request\n";
191 
192 #if defined(_WIN32) && !defined(__CYGWIN__)
193  closesocket(tcp_socket);
194 #else
195  close(tcp_socket);
196 #endif
197  return nullptr;
198  }
199 
200 
201  // read from socket into memory.
202  std::string contents;
203  {
204  int n;
205 #if defined(_WIN32) && !defined(__CYGWIN__)
206  while ((n = recv(tcp_socket, buffer, sizeof buffer,0 )) > 0) {
207 #else
208  while ((n = ::read(tcp_socket, buffer, sizeof buffer)) > 0) {
209 #endif
210  contents.append(buffer, n);
211 #ifdef DEBUG
212  std::cerr << n << " bytes\n";
213 #endif
214  }
215  }
216 
217  // close connection to server.
218 #if defined(_WIN32) && !defined(__CYGWIN__)
219  closesocket(tcp_socket);
220 #else
221  close(tcp_socket);
222 #endif
223 
224 #ifdef DEBUG
225  std::cerr << "HTTP server returned:\n" << contents << '\n';
226 #endif
227 
228  if (contents.find("HTTP/1.1 200") == contents.npos)
229  {
230  return nullptr;
231  }
232  std::string::size_type n = contents.find("\r\n\r\n");
233  if (n == contents.npos)
234  {
235  return nullptr;
236  }
237 
238  contents.erase(0,n+4);
239 #ifdef DEBUG
240  std::cerr << "vul_url::vul_http_open() returns:\n" << contents << '\n';
241 #endif
242  return new std::istringstream(contents);
243 }
244 
245 
246 //: only call this method with a correctly formatted http URL
247 bool vul_http_exists(char const *url)
248 {
249  // split URL into auth, host, path and port number.
250  std::string host;
251  std::string path;
252  std::string auth;
253  int port = 80; // default
254  assert (std::strncmp(url, "http://", 7) == 0);
255 
256  char const *p = url + 7;
257  while (*p && *p!='/')
258  ++ p;
259  host = std::string(url+7, p);
260 
261 
262  if (*p)
263  path = p+1; // may be the empty string, if URL ends in a slash
264  else
265  path = "";
266 
267  //authentication
268  for (unsigned int i=0; i<host.size(); ++i)
269  if (host[i] == '@') {
270  auth = std::string(host.c_str(), host.c_str()+i);
271  host = std::string(host.c_str()+i+1, host.c_str() + host.size());
272  break;
273  }
274 
275  // port?
276  for (unsigned int i=0; i<host.size(); ++i)
277  if (host[i] == ':') {
278  port = std::atoi(host.c_str() + i + 1);
279  host = std::string(host.c_str(), host.c_str() + i);
280  break;
281  }
282 
283  // do character translation
284  unsigned k =0;
285  while (k < path.size())
286  {
287  if (path[k] == ' ')
288  path.replace(k, 1, "%20");
289  else if (path[k] == '%')
290  path.replace(k, 1, "%25");
291  k++;
292  }
293 
294  // so far so good.
295 #ifdef DEBUG
296  std::cerr << "auth = \'" << auth << "\'\n"
297  << "host = \'" << host << "\'\n"
298  << "path = \'" << path << "\'\n"
299  << "port = " << port << std::endl;
300 #endif
301 
302 #if defined(_WIN32) && !defined(__CYGWIN__)
303  if (called_WSAStartup==0)
304  {
305  WORD wVersionRequested;
306  WSADATA wsaData;
307 
308  wVersionRequested = MAKEWORD( 2, 2 );
309 
310  /* int err = */ WSAStartup( wVersionRequested, &wsaData );
311  }
312 #endif
313 
314  // create socket endpoint.
315  SOCKET tcp_socket = socket(PF_INET, // IPv4 protocols.
316  SOCK_STREAM, // two-way, reliable,
317  // connection-based stream socket.
318  PF_UNSPEC); // protocol number.
319 
320 #if defined(_WIN32) && !defined(__CYGWIN__)
321  if (tcp_socket == INVALID_SOCKET) {
322 # ifndef NDEBUG
323  std::cerr << "error code : " << WSAGetLastError() << std::endl;
324 # endif
325 #else
326  if (tcp_socket < 0) {
327 #endif
328  std::cerr << __FILE__ ": failed to create socket.\n";
329  return false;
330  }
331 
332 #ifdef DEBUG
333  std::cerr << __FILE__ ": tcp_socket = " << tcp_socket << std::endl;
334 #endif
335 
336  // get network address of server.
337  hostent *hp = gethostbyname(host.c_str());
338  if (! hp) {
339  std::cerr << __FILE__ ": failed to lookup host\n";
340  return false;
341  }
342 
343  // make socket address.
344  sockaddr_in my_addr;
345  my_addr.sin_family = AF_INET;
346  // convert port number to network byte order..
347  my_addr.sin_port = htons(port);
348  std::memcpy(&my_addr.sin_addr, hp->h_addr_list[0], hp->h_length);
349 
350  // connect to server.
351  if (connect(tcp_socket , (sockaddr *) &my_addr, sizeof my_addr) < 0)
352  {
353  std::cerr << __FILE__ ": failed to connect to host\n";
354  //perror(__FILE__);
355 #if defined(_WIN32) && !defined(__CYGWIN__)
356  closesocket(tcp_socket);
357 #else
358  close(tcp_socket);
359 #endif
360 
361  return false;
362  }
363 
364  // buffer for data transfers over socket.
365  char buffer[4096];
366 
367  // send HTTP 1.1 request.
368  std::snprintf(buffer, 4090,
369  "HEAD %s HTTP/1.1\r\nUser-Agent: vul_url\r\nHost: %s\r\nAccept: */*\r\n",
370  url, host.c_str());
371  if (auth != "")
372  std::snprintf(buffer+std::strlen(buffer), 4090-std::strlen(buffer),
373  "Authorization: Basic %s\r\n",
374  vul_url::encode_base64(auth).c_str() );
375 
376  if (std::snprintf(buffer+std::strlen(buffer), 4090-std::strlen(buffer), "\r\n") < 0)
377  {
378  std::cerr << "ERROR: vul_http_exists buffer overflow.";
379  std::abort();
380  }
381 
382 #if defined(_WIN32) && !defined(__CYGWIN__)
383  if (send(tcp_socket, buffer, (int)std::strlen(buffer), 0) < 0) {
384 #else
385  if (::write(tcp_socket, buffer, std::strlen(buffer)) < 0) {
386 #endif
387  std::cerr << __FILE__ ": error sending HTTP request\n";
388 
389 #if defined(_WIN32) && !defined(__CYGWIN__)
390  closesocket(tcp_socket);
391 #else
392  close(tcp_socket);
393 #endif
394  return false;
395  }
396 
397 
398  // read from socket into memory.
399  std::string contents;
400  {
401  int n;
402 #if defined(_WIN32) && !defined(__CYGWIN__)
403  if ((n = recv(tcp_socket, buffer, sizeof buffer,0 )) > 0) {
404 #else
405  if ((n = ::read(tcp_socket, buffer, sizeof buffer)) > 0) {
406 #endif
407  contents.append(buffer, n);
408  //std::cerr << n << " bytes\n";
409  }
410  else
411  {
412 #if defined(_WIN32) && !defined(__CYGWIN__)
413  closesocket(tcp_socket);
414 #else
415  close(tcp_socket);
416 #endif
417  return false;
418  }
419  }
420 
421  // close connection to server.
422 #if defined(_WIN32) && !defined(__CYGWIN__)
423  closesocket(tcp_socket);
424 #else
425  close(tcp_socket);
426 #endif
427 
428 #ifdef DEBUG
429  std::cerr << "HTTP server returned:\n" << contents << '\n';
430 #endif
431 
432  return contents.find("HTTP/1.1 200") != contents.npos;
433 }
434 
435 
436 std::istream * vul_url::open(const char * url, std::ios::openmode mode)
437 {
438  // check for null pointer or empty strings.
439  if (!url || !*url)
440  return nullptr;
441  unsigned int l = (unsigned int)std::strlen(url);
442 
443  // check for filenames beginning "file:".
444  if (l > 7 && std::strncmp(url, "file://", 7) == 0)
445  return new std::ifstream(url+7,mode);
446 
447  // maybe it's an http URL?
448  if (l > 7 && std::strncmp(url, "http://", 7) == 0)
449  return vul_http_open(url);
450 
451  // maybe it's an ftp URL?
452  if (l > 6 && std::strncmp(url, "ftp://", 6) == 0)
453  {
454  std::cerr << __LINE__ << "ERROR:\n vul_read_url(const char * url)\n"
455  "Doesn't support FTP yet, url=" << url << std::endl;
456  return nullptr;
457  }
458 
459  // try an ordinary filename
460  return new std::ifstream(url, mode);
461 }
462 
463 
464 //: Does that URL exist
465 bool vul_url::exists(const char * url)
466 {
467  // check for null pointer or empty strings.
468  if (!url || !*url)
469  return false;
470  unsigned int l = (unsigned int)std::strlen(url);
471 
472  // check for filenames beginning "file:".
473  if (l > 7 && std::strncmp(url, "file://", 7) == 0)
474  return vul_file::exists(url+7);
475 
476  // maybe it's an http URL?
477  if (l > 7 && std::strncmp(url, "http://", 7) == 0)
478  return vul_http_exists(url);
479 
480  // maybe it's an ftp URL?
481  if (l > 6 && std::strncmp(url, "ftp://", 6) == 0)
482  {
483  std::cerr << "ERROR: vul_read_url(const char * url)\n"
484  "Doesn't support FTP yet, url=" << url << std::endl;
485  return false;
486  }
487 
488  // try an ordinary filename
489  return vul_file::exists(url);
490 }
491 
492 //: Is that a URL
493 bool vul_url::is_url(const char * url)
494 {
495  // check for null pointer or empty strings.
496  if (!url || !*url)
497  return false;
498  unsigned int l = (unsigned int)std::strlen(url);
499 
500  // check for filenames beginning "file:".
501  if (l > 7 && std::strncmp(url, "file://", 7) == 0)
502  return true;
503 
504  // maybe it's an http URL?
505  if (l > 7 && std::strncmp(url, "http://", 7) == 0)
506  return true;
507 
508  // maybe it's an https URL?
509  if (l > 8 && std::strncmp(url, "https://", 7) == 0)
510  return true;
511 
512  // maybe it's an ftp URL?
513  if (l > 6 && std::strncmp(url, "ftp://", 6) == 0)
514  return true;
515 
516  return false;
517 }
518 
519 //=======================================================================
520 
521 bool vul_url::is_file(const char * fn)
522 {
523  if (vul_url::is_url(fn))
524  return vul_url::exists(fn);
525  else
526  return vul_file::exists(fn) && ! vul_file::is_directory(fn);
527 }
528 
529 //=======================================================================
530 
531 static const
532 char base64_encoding[]=
533 {
534  'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
535  'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
536  'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
537  'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
538 };
539 
540 static char out_buf[4];
541 
542 static const char * encode_triplet(char data[3], unsigned int n)
543 {
544  assert (n>0 && n <4);
545  out_buf[0] = base64_encoding[(data[0] & 0xFC) >> 2];
546  out_buf[1] = base64_encoding[
547  ((data[0] & 0x3) << 4) + ((data[1] & 0xf0)>>4)];
548 
549  if (n==1)
550  {
551  out_buf[2] = out_buf[3] = '=';
552  return out_buf;
553  }
554 
555  out_buf[2] = base64_encoding[
556  ((data[1] & 0xf) << 2) + ((data[2] & 0xc0)>>6)];
557 
558  if (n==2)
559  {
560  out_buf[3] = '=';
561  return out_buf;
562  }
563 
564  out_buf[3] = base64_encoding[ (data[2] & 0x3f) ];
565  return out_buf;
566 }
567 
568 //=======================================================================
569 
570 std::string vul_url::encode_base64(const std::string& in)
571 {
572  std::string out;
573  unsigned int i = 0, line_octets = 0;
574  const unsigned int l = (unsigned int)(in.size());
575  char data[3];
576  while (i <= l)
577  {
578  if (i == l)
579  {
580  out.append("=");
581  return out;
582  }
583 
584  data[0] = in[i++];
585  data[1] = data[2] = 0;
586 
587  if (i == l)
588  {
589  out.append(encode_triplet(data,1),4);
590  return out;
591  }
592 
593  data[1] = in[i++];
594 
595  if (i == l)
596  {
597  out.append(encode_triplet(data,2),4);
598  return out;
599  }
600 
601  data[2] = in[i++];
602 
603  out.append(encode_triplet(data,3),4);
604 
605  if (line_octets >= 68/4) // print carriage return
606  {
607  out.append("\r\n",2);
608  line_octets = 0;
609  }
610  else
611  ++line_octets;
612  }
613 
614  return out;
615 }
616 
617 //=======================================================================
618 
619 static int get_next_char(const std::string &in, unsigned int *i)
620 {
621  while (*i < in.size())
622  {
623  char c;
624  c = in[(*i)++];
625 
626  if (c == '+')
627  return 62;
628 
629  if (c == '/')
630  return 63;
631 
632  if (c >= 'A' && c <= 'Z')
633  return 0 + (int)c - (int)'A';
634 
635  if (c >= 'a' && c <= 'z')
636  return 26 + (int)c - (int)'a';
637 
638  if (c >= '0' && c <= '9')
639  return 52 + (int)c - (int)'0';
640 
641  if (c == '=')
642  return 64;
643  }
644  return -1;
645 }
646 
647 //=======================================================================
648 
649 std::string vul_url::decode_base64(const std::string& in)
650 {
651  int c;
652  char data[3];
653 
654  unsigned int i=0;
655  const unsigned int l = (unsigned int)(in.size());
656  std::string out;
657  while (i < l)
658  {
659  data[0] = data[1] = data[2] = 0;
660 
661  // -=- 0 -=-
662  // Search next valid char...
663  c = get_next_char(in , &i);
664 
665  // treat '=' as end of message
666  if (c == 64)
667  return out;
668  if (c==-1)
669  return "";
670 
671  data[0] = char(((c & 0x3f) << 2) | (0x3 & data[0]));
672 
673  // -=- 1 -=-
674  // Search next valid char...
675  c = get_next_char(in , &i);
676 
677  // Error! Second character in octet can't be '='
678  if (c == 64 || c==-1)
679  return "";
680 
681  data[0] = char(((c & 0x30) >> 4) | (0xfc & data[0]));
682  data[1] = char(((c & 0x0f) << 4) | (0x0f & data[1]));
683 
684  // -=- 2 -=-
685  // Search next valid char...
686 
687  c = get_next_char(in , &i);
688 
689  if (c==-1)
690  return "";
691  if (c == 64)
692  {
693  // should really read next char and check it is '='
694  out.append(data,1); // write 1 byte to output
695  return out;
696  }
697 
698  data[1] = char(((c & 0x3c) >> 2) | (0xf0 & data[1]));
699  data[2] = char(((c & 0x03) << 6) | (0x3f & data[2]));
700 
701  // -=- 3 -=-
702  // Search next valid char...
703  c = get_next_char(in , &i);
704 
705  if (c==-1)
706  return "";
707 
708  if (c == 64)
709  {
710  out.append(data,2); // write 2 bytes to output
711  return out;
712  }
713 
714  data[2] = char((c & 0x3f) | (0xc0 & data[2]));
715 
716  out.append(data,3); // write 3 bytes to output
717  }
718 
719  return out;
720 }
#define SOCKET
Definition: vul_url.cxx:31
static std::string encode_base64(const std::string &in)
Encode a string of chars into base64 format.
Definition: vul_url.cxx:570
static bool is_file(const char *url)
Is that a file.
Definition: vul_url.cxx:521
A collection of miscellaneous filesystem-type utilities.
static bool exists(const char *url)
Does that URL exist.
Definition: vul_url.cxx:465
static bool is_directory(char const *filename)
Return true iff filename is a directory.
Definition: vul_file.cxx:55
std::istream * vul_http_open(char const *url)
only call this method with a correctly formatted http URL.
Definition: vul_url.cxx:40
static std::istream * open(const char *url, std::ios::openmode mode=std::ios::in)
open a URL.
Definition: vul_url.cxx:436
static bool is_url(const char *url)
Is that a URL.
Definition: vul_url.cxx:493
bool vul_http_exists(char const *url)
only call this method with a correctly formatted http URL.
Definition: vul_url.cxx:247
static std::string decode_base64(const std::string &in)
Decode a string of chars from base64 format.
Definition: vul_url.cxx:649
Static class methods to test and open streams via a URL.
static bool exists(char const *filename)
Return true iff filename exists. It may be any sort of file.
Definition: vul_file.cxx:108