diff options
-rw-r--r-- | libbutl/curl.cxx | 37 | ||||
-rw-r--r-- | libbutl/curl.hxx | 28 | ||||
-rw-r--r-- | tests/curl/driver.cxx | 43 | ||||
-rw-r--r-- | tests/curl/testscript | 65 |
4 files changed, 165 insertions, 8 deletions
diff --git a/libbutl/curl.cxx b/libbutl/curl.cxx index 5649965..6e3c27a 100644 --- a/libbutl/curl.cxx +++ b/libbutl/curl.cxx @@ -217,6 +217,20 @@ namespace butl return r; } + bool curl:: + http_content_aspect (const string& h) + { + // There are a lot more other less common headers (Content-Language, etc), + // but let's keep this list short for now and extend it when required. + // + return icasecmp (h, "Content-Type:", 13) == 0 || + icasecmp (h, "Content-Length:", 15) == 0 || + icasecmp (h, "Content-Encoding:", 17) == 0 || + icasecmp (h, "Transfer-Encoding:", 18) == 0 || + icasecmp (h, "Last-Modified:", 14) == 0 || + icasecmp (h, "ETag:", 5) == 0; + } + curl::http_status curl:: read_http_status (ifdstream& is, bool skip_headers) { @@ -299,9 +313,30 @@ namespace butl rs = read_status (); // Reads the final status code. } + // Skips headers. + // if (skip_headers) { - while (!read_http_response_line (is).empty ()) ; // Skips headers. + // Keep track of content related headers. + // + bool content_expected (false); + for (string l; !(l = read_http_response_line (is)).empty (); ) + { + if (http_content_aspect (l)) + content_expected = true; + } + + // If no content related headers are encountered for the status code + // 200, then assume this is the the CONNECT response and we are + // positioned at the beginning of the HTTP server response and thus + // re-read the status line and skip the headers. + // + if (rs.code == 200 && !content_expected) + { + rs = read_status (); // HTTP server's status code. + + while (!read_http_response_line (is).empty ()) ; + } } is.exceptions (es); diff --git a/libbutl/curl.hxx b/libbutl/curl.hxx index ea91807..2d7602d 100644 --- a/libbutl/curl.hxx +++ b/libbutl/curl.hxx @@ -171,8 +171,25 @@ namespace butl // std::invalid_argument if the status line could not be parsed. Pass // through the ios::failure exception on the stream error. // - // Note that if ios::failure is thrown the stream's exception mask may not - // be preserved. + // Note that the curl's output may include the CONNECT request response + // from the HTTP proxy prior to the response from the HTTP server. The + // CONNECT response has the regular HTTP status line, followed by the HTTP + // headers, may not contain the response body, and is separated from the + // HTTP server response with an empty line. There is no fully reliable way + // to determine if the curl's output starts with the CONNECT response or + // not. Thus, we apply heuristics, assuming that if the HTTP status is 200 + // and there are no headers which describe some aspect of the response + // content (Content-Type, Content-Length, etc), then this is the CONNECT + // response and what follows is the HTTP server response status line + // rather than the response body. The thinking here is that the HTTP + // server response normally contains some headers which describe some + // aspects of the upcoming response content. Also, we assume that the + // failed CONNECT request (status code is not 2XX) may not be followed + // with the HTTP server response and thus we just treat such a proxy + // response as the server response. + // + // Also note that if ios::failure is thrown the stream's exception mask + // may not be preserved. // struct http_status { @@ -196,6 +213,13 @@ namespace butl static std::string read_http_response_line (ifdstream&); + // Return true if the passed header reflects some aspect of the upcoming + // HTTP response content (Content-Type, Content-Length, etc) and which is + // meaningless if no content is present in the response. + // + static bool + http_content_aspect (const std::string& header); + private: enum method_proto {ftp_get, ftp_put, http_get, http_post}; using method_proto_options = small_vector<const char*, 2>; diff --git a/tests/curl/driver.cxx b/tests/curl/driver.cxx index 856fde3..304f972 100644 --- a/tests/curl/driver.cxx +++ b/tests/curl/driver.cxx @@ -2,6 +2,7 @@ // license : MIT; see accompanying LICENSE file #include <iostream> +#include <stdexcept> // invalid_argument #include <system_error> #include <libbutl/curl.hxx> @@ -16,7 +17,7 @@ using namespace std; using namespace butl; -// Usage: argv[0] tftp|http +// Usage: argv[0] tftp|http|http-parse <http-response-file> // static void @@ -126,17 +127,49 @@ http () } } +static void +http_parse (const path& p) +{ + try + { + ifdstream ifs (p); + curl::read_http_status (ifs); + cout << ifs.rdbuf(); + } + catch (const invalid_argument& e) + { + cerr << "error: " << e << endl; + } +} + int main (int argc, const char* argv[]) try { - assert (argc == 2); + assert (argc >= 2); string a (argv[1]); - if (a == "tftp") tftp (); - else if (a == "http") http (); - else assert (false); + if (a == "tftp") + { + assert (argc == 2); + + tftp (); + } + else if (a == "http") + { + assert (argc == 2); + + http (); + } + else if (a == "http-parse") + { + assert (argc == 3); + + http_parse (path (argv[2])); + } + else + assert (false); } catch (const system_error& e) { diff --git a/tests/curl/testscript b/tests/curl/testscript index d2056cd..59a94e9 100644 --- a/tests/curl/testscript +++ b/tests/curl/testscript @@ -62,3 +62,68 @@ sudo /usr/sbin/in.tftpd \ %curl: \(22\) The requested URL returned error: 404( Not Found)?% EOE } + +: http-parse +: +{ + test.arguments += http-parse + + : basics + : + { + cat <<EOI >=f; + HTTP/1.1 200 OK + Content-Length: 83 + Content-Type: text/plain;charset=utf-8 + + body + EOI + + $* f >'body' + } + + : continue + : + { + cat <<EOI >=f; + HTTP/1.1 100 Continue + + HTTP/1.1 200 OK + Content-Length: 83 + Content-Type: text/plain;charset=utf-8 + + body + EOI + + $* f >'body' + } + + : connect-proxy-success + : + { + cat <<EOI >=f; + HTTP/1.1 200 Connection Established + Proxy-Agent: Zscaler/6.2 + + HTTP/1.1 200 OK + Content-Length: 83 + Content-Type: text/plain;charset=utf-8 + + body + EOI + + $* f >'body' + } + + : connect-proxy-failure + : + { + cat <<EOI >=f; + HTTP/1.1 502 Bad Gateway + Proxy-Agent: Zscaler/6.2 + + EOI + + $* f >:'' + } +} |