diff --git a/src/yb/server/webserver-test.cc b/src/yb/server/webserver-test.cc index 4b6681eada8..b5582b4c4b3 100644 --- a/src/yb/server/webserver-test.cc +++ b/src/yb/server/webserver-test.cc @@ -29,7 +29,9 @@ // or implied. See the License for the specific language governing permissions and limitations // under the License. // +#include "yb/server/webserver.h" +#include #include #include @@ -39,14 +41,16 @@ #include "yb/gutil/strings/util.h" #include "yb/gutil/stringprintf.h" #include "yb/server/default-path-handlers.h" -#include "yb/server/webserver.h" #include "yb/util/curl_util.h" #include "yb/util/net/sockaddr.h" #include "yb/util/test_util.h" +#include "yb/util/zlib.h" using std::string; +using strings::Substitute; DECLARE_int32(webserver_max_post_length_bytes); +DECLARE_int64(webserver_compression_threshold_kb); namespace yb { @@ -72,6 +76,7 @@ class WebserverTest : public YBTest { ASSERT_OK(server_->GetBoundAddresses(&addrs)); ASSERT_EQ(addrs.size(), 1); addr_ = addrs[0]; + url_ = Substitute("http://$0", ToString(addr_)); } protected: @@ -79,6 +84,7 @@ class WebserverTest : public YBTest { faststring buf_; std::unique_ptr server_; Endpoint addr_; + string url_; string static_dir_; }; @@ -93,6 +99,56 @@ TEST_F(WebserverTest, TestIndexPage) { ASSERT_STR_CONTAINS(buf_.ToString(), "Home"); } +TEST_F(WebserverTest, TestHttpCompression) { + std::ostringstream oss; + string decoded_str; + FLAGS_webserver_compression_threshold_kb = 0; + + // Curl with gzip compression enabled. + ASSERT_OK(curl_.FetchURL(url_, &buf_, EasyCurl::kDefaultTimeoutSec, + {"Accept-Encoding: deflate, br, gzip"})); + + // If compressed successfully, we should be able to uncompress. + ASSERT_OK(zlib::Uncompress(Slice(buf_.ToString()), &oss)); + decoded_str = oss.str(); + + // Should have expected title. + ASSERT_STR_CONTAINS(decoded_str, "YugabyteDB"); + + // Should have expected header when compressed with headers returned. + curl_.set_return_headers(true); + ASSERT_OK(curl_.FetchURL(url_, &buf_, EasyCurl::kDefaultTimeoutSec, + {"Accept-Encoding: deflate, megaturbogzip, gzip , br"})); + ASSERT_STR_CONTAINS(buf_.ToString(), "Content-Encoding: gzip"); + + + // Curl with compression disabled. + curl_.set_return_headers(true); + ASSERT_OK(curl_.FetchURL(url_, &buf_)); + // Check expected header. + ASSERT_STR_CONTAINS(buf_.ToString(), "Content-Type:"); + + // Check unexpected header. + ASSERT_STR_NOT_CONTAINS(buf_.ToString(), "Content-Encoding: gzip"); + + // Should have expected title. + ASSERT_STR_CONTAINS(buf_.ToString(), "YugabyteDB"); + + // Curl with compression enabled but not accepted by YugabyteDB. + curl_.set_return_headers(true); + ASSERT_OK(curl_.FetchURL(url_, &buf_, EasyCurl::kDefaultTimeoutSec, + {"Accept-Encoding: megaturbogzip, deflate, xz"})); + // Check expected header. + ASSERT_STR_CONTAINS(buf_.ToString(), "HTTP/1.1 200 OK"); + + // Check unexpected header. + ASSERT_STR_NOT_CONTAINS(buf_.ToString(), "Content-Encoding: gzip"); + + // Should have expected title. + ASSERT_STR_CONTAINS(buf_.ToString(), "YugabyteDB"); + +} + TEST_F(WebserverTest, TestDefaultPaths) { // Test memz ASSERT_OK(curl_.FetchURL(strings::Substitute("http://$0/memz?raw=1", ToString(addr_)), diff --git a/src/yb/server/webserver.cc b/src/yb/server/webserver.cc index 35adf9b4bb2..48b7eb846ba 100644 --- a/src/yb/server/webserver.cc +++ b/src/yb/server/webserver.cc @@ -70,6 +70,7 @@ #include "yb/gutil/strings/numbers.h" #include "yb/gutil/strings/split.h" #include "yb/gutil/strings/stringpiece.h" +#include "yb/gutil/strings/strip.h" #include "yb/util/env.h" #include "yb/util/flag_tags.h" #include "yb/util/net/net_util.h" @@ -79,6 +80,7 @@ #include "yb/util/url-coding.h" #include "yb/util/version_info.h" #include "yb/util/shared_lock.h" +#include "yb/util/zlib.h" #if defined(__APPLE__) typedef sig_t sighandler_t; @@ -90,6 +92,18 @@ DEFINE_int32(webserver_max_post_length_bytes, 1024 * 1024, TAG_FLAG(webserver_max_post_length_bytes, advanced); TAG_FLAG(webserver_max_post_length_bytes, runtime); +DEFINE_int32(webserver_zlib_compression_level, 1, + "The zlib compression level." + "Lower compression levels result in faster execution, but less compression"); +TAG_FLAG(webserver_zlib_compression_level, advanced); +TAG_FLAG(webserver_zlib_compression_level, runtime); + +DEFINE_int64(webserver_compression_threshold_kb, 4, + "The threshold of response size above which compression is performed." + "Default value is 4KB"); +TAG_FLAG(webserver_compression_threshold_kb, advanced); +TAG_FLAG(webserver_compression_threshold_kb, runtime); + namespace yb { using std::string; @@ -435,20 +449,51 @@ int Webserver::RunPathHandler(const PathHandler& handler, if (use_style) { BootstrapPageFooter(output); } + // Check if gzip compression is accepted by the caller. If so, compress the + // content and replace the prerendered output. + const char* accept_encoding_str = sq_get_header(connection, "Accept-Encoding"); + bool is_compressed = false; + vector encodings = strings::Split(accept_encoding_str, ","); + for (string& encoding : encodings) { + StripWhiteSpace(&encoding); + if (encoding == "gzip") { + // Don't bother compressing empty content. + const string& uncompressed = resp_ptr->output.str(); + if (uncompressed.size() < FLAGS_webserver_compression_threshold_kb * 1024) { + break; + } + + std::ostringstream oss; + int level = FLAGS_webserver_zlib_compression_level > 0 && + FLAGS_webserver_zlib_compression_level <= 9 ? + FLAGS_webserver_zlib_compression_level : 1; + Status s = zlib::CompressLevel(uncompressed, level, &oss); + if (s.ok()) { + resp_ptr->output.str(oss.str()); + is_compressed = true; + } else { + LOG(WARNING) << "Could not compress output: " << s.ToString(); + } + break; + } + } + string str = output->str(); // Without styling, render the page as plain text if (!use_style) { sq_printf(connection, "HTTP/1.1 200 OK\r\n" "Content-Type: text/plain\r\n" "Content-Length: %zd\r\n" + "%s" "Access-Control-Allow-Origin: *\r\n" - "\r\n", str.length()); + "\r\n", str.length(), is_compressed ? "Content-Encoding: gzip\r\n" : ""); } else { sq_printf(connection, "HTTP/1.1 200 OK\r\n" "Content-Type: text/html\r\n" "Content-Length: %zd\r\n" + "%s" "Access-Control-Allow-Origin: *\r\n" - "\r\n", str.length()); + "\r\n", str.length(), is_compressed ? "Content-Encoding: gzip\r\n" : ""); } // Make sure to use sq_write for printing the body; sq_printf truncates at 8kb diff --git a/src/yb/util/CMakeLists.txt b/src/yb/util/CMakeLists.txt index 351ef8994b3..4bfa2b6377b 100644 --- a/src/yb/util/CMakeLists.txt +++ b/src/yb/util/CMakeLists.txt @@ -259,6 +259,7 @@ set(UTIL_SRCS uuid.cc varint.cc version_info.cc + zlib.cc async_util.cc ) diff --git a/src/yb/util/curl_util.cc b/src/yb/util/curl_util.cc index 07e8212c47e..a8394e6a8c9 100644 --- a/src/yb/util/curl_util.cc +++ b/src/yb/util/curl_util.cc @@ -31,6 +31,7 @@ // #include "yb/util/curl_util.h" +#include "yb/util/scope_exit.h" #include @@ -67,8 +68,11 @@ EasyCurl::~EasyCurl() { curl_easy_cleanup(curl_); } -Status EasyCurl::FetchURL(const string& url, faststring* buf, int64_t timeout_sec) { - return DoRequest(url, boost::none, boost::none, timeout_sec, buf); +Status EasyCurl::FetchURL(const string& url, + faststring* buf, + int64_t timeout_sec, + const vector& headers) { + return DoRequest(url, boost::none, boost::none, timeout_sec, buf, headers); } Status EasyCurl::PostToURL( @@ -100,10 +104,25 @@ Status EasyCurl::DoRequest( const boost::optional& post_data, const boost::optional& content_type, int64_t timeout_sec, - faststring* dst) { + faststring* dst, + const std::vector& headers) { CHECK_NOTNULL(dst)->clear(); + // Add headers if specified. + struct curl_slist* curl_headers = nullptr; + auto clean_up_curl_slist = ScopeExit([&]() { + curl_slist_free_all(curl_headers); + }); + + for (const auto& header : headers) { + curl_headers = CHECK_NOTNULL(curl_slist_append(curl_headers, header.c_str())); + } + RETURN_NOT_OK(TranslateError(curl_easy_setopt(curl_, CURLOPT_HTTPHEADER, curl_headers))); + RETURN_NOT_OK(TranslateError(curl_easy_setopt(curl_, CURLOPT_URL, url.c_str()))); + if (return_headers_) { + RETURN_NOT_OK(TranslateError(curl_easy_setopt(curl_, CURLOPT_HEADER, 1))); + } RETURN_NOT_OK(TranslateError(curl_easy_setopt(curl_, CURLOPT_WRITEFUNCTION, WriteCallback))); RETURN_NOT_OK(TranslateError(curl_easy_setopt(curl_, CURLOPT_WRITEDATA, static_cast(dst)))); diff --git a/src/yb/util/curl_util.h b/src/yb/util/curl_util.h index f09689d8101..bfa4614924e 100644 --- a/src/yb/util/curl_util.h +++ b/src/yb/util/curl_util.h @@ -34,6 +34,7 @@ #include #include +#include #include @@ -57,8 +58,13 @@ class EasyCurl { // Fetch the given URL into the provided buffer. // Any existing data in the buffer is replaced. + // The optional param 'headers' holds additional headers. + // e.g. {"Accept-Encoding: gzip"} CHECKED_STATUS FetchURL( - const std::string& url, faststring* dst, int64_t timeout_sec = kDefaultTimeoutSec); + const std::string& url, + faststring* dst, + int64_t timeout_sec = kDefaultTimeoutSec, + const std::vector& headers = {}); // Issue an HTTP POST to the given URL with the given data. // Returns results in 'dst' as above. @@ -79,6 +85,10 @@ class EasyCurl { static const int64_t kDefaultTimeoutSec = 600; + void set_return_headers(bool v) { + return_headers_ = v; + } + private: // Do a request. If 'post_data' is non-NULL, does a POST. // Otherwise, does a GET. @@ -87,9 +97,12 @@ class EasyCurl { const boost::optional& post_data, const boost::optional& content_type, int64_t timeout_sec, - faststring* dst); + faststring* dst, + const std::vector& headers = {}); CURL* curl_; + // Whether to return the HTTP headers with the response. + bool return_headers_ = false; DISALLOW_COPY_AND_ASSIGN(EasyCurl); }; diff --git a/src/yb/util/test_macros.h b/src/yb/util/test_macros.h index a2fe2f2866c..6fab7a551ba 100644 --- a/src/yb/util/test_macros.h +++ b/src/yb/util/test_macros.h @@ -175,6 +175,14 @@ std::string TEST_SetDifferenceStr(const std::set& expected, const std::set } \ } while (0) +#define ASSERT_STR_NOT_CONTAINS(str, substr) do { \ + std::string _s = (str); \ + if (_s.find((substr)) != std::string::npos) { \ + FAIL() << "Expected not to find substring '" << (substr) \ + << "'. Got: '" << _s << "'"; \ + } \ + } while (0) + #define ASSERT_FILE_EXISTS(env, path) do { \ std::string _s = (path); \ ASSERT_TRUE(env->FileExists(_s)) \ diff --git a/src/yb/util/zlib.cc b/src/yb/util/zlib.cc new file mode 100644 index 00000000000..5a7f1517638 --- /dev/null +++ b/src/yb/util/zlib.cc @@ -0,0 +1,139 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "yb/util/zlib.h" + +#include +#include + +#include +#include +#include +#include + +#include "yb/gutil/macros.h" +#include "yb/gutil/strings/substitute.h" +#include "yb/util/slice.h" +#include "yb/util/status.h" + +using std::ostream; +using std::string; +using std::unique_ptr; + +#define ZRETURN_NOT_OK(call) \ + RETURN_NOT_OK(ZlibResultToStatus(call)) + +namespace yb { +namespace zlib { + +namespace { +Status ZlibResultToStatus(int rc) { + switch (rc) { + case Z_OK: + return Status::OK(); + case Z_STREAM_END: + return STATUS(EndOfFile, "zlib EOF"); + case Z_NEED_DICT: + return STATUS(Corruption, "zlib error: NEED_DICT"); + case Z_ERRNO: + return STATUS(IOError, "zlib error: Z_ERRNO"); + case Z_STREAM_ERROR: + return STATUS(Corruption, "zlib error: STREAM_ERROR"); + case Z_DATA_ERROR: + return STATUS(Corruption, "zlib error: DATA_ERROR"); + case Z_MEM_ERROR: + return STATUS(RuntimeError, "zlib error: MEM_ERROR"); + case Z_BUF_ERROR: + return STATUS(RuntimeError, "zlib error: BUF_ERROR"); + case Z_VERSION_ERROR: + return STATUS(RuntimeError, "zlib error: VERSION_ERROR"); + default: + return STATUS_FORMAT(RuntimeError, "zlib error: unknown error $0", rc); + } +} +} // anonymous namespace + +Status Compress(Slice input, ostream* out) { + return CompressLevel(input, Z_DEFAULT_COMPRESSION, out); +} + +// See https://zlib.net/zlib_how.html for context on using zlib. +Status CompressLevel(Slice input, int level, ostream* out) { + z_stream zs; + memset(&zs, 0, sizeof(zs)); + ZRETURN_NOT_OK(deflateInit2(&zs, level, Z_DEFLATED, + MAX_WBITS + 16 /* enable gzip */, + 8 /* memory level, max is 9 */, + Z_DEFAULT_STRATEGY)); + + zs.avail_in = input.size(); + zs.next_in = const_cast(input.data()); + const int kChunkSize = 64 * 1024; + unique_ptr chunk(new unsigned char[kChunkSize]); + int flush; + do { + zs.avail_out = kChunkSize; + zs.next_out = chunk.get(); + flush = (zs.avail_in == 0) ? Z_FINISH : Z_NO_FLUSH; + Status s = ZlibResultToStatus(deflate(&zs, flush)); + if (!s.ok() && !s.IsEndOfFile()) { + deflateEnd(&zs); + return s; + } + int out_size = zs.next_out - chunk.get(); + if (out_size > 0) { + out->write(reinterpret_cast(chunk.get()), out_size); + } + } while (flush != Z_FINISH); + ZRETURN_NOT_OK(deflateEnd(&zs)); + return Status::OK(); +} + +// See https://zlib.net/zlib_how.html for context on using zlib. +Status Uncompress(const Slice& compressed, std::ostream* out) { + // Initialize the z_stream at the start of the data with the + // data size as the available input. + z_stream zs; + memset(&zs, 0, sizeof(zs)); + zs.next_in = const_cast(compressed.data()); + zs.avail_in = compressed.size(); + // Initialize inflation with the windowBits set to be GZIP compatible. + // The documentation (https://www.zlib.net/manual.html#Advanced) describes that + // Adding 16 configures inflate to decode the gzip format. + ZRETURN_NOT_OK(inflateInit2(&zs, MAX_WBITS + 16 /* enable gzip */)); + // Continue calling inflate, decompressing data into the buffer in `zs.next_out` and writing + // the buffer content to `out`, until an error is received or there is no more data + // to decompress. + Status s; + do { + unsigned char buf[4096]; + zs.next_out = buf; + zs.avail_out = arraysize(buf); + s = ZlibResultToStatus(inflate(&zs, Z_NO_FLUSH)); + if (!s.ok() && !s.IsEndOfFile()) { + inflateEnd(&zs); + return s; + } + out->write(reinterpret_cast(buf), zs.next_out - buf); + } while (zs.avail_out == 0); + // If we haven't returned early with a bad status, finalize inflation. + ZRETURN_NOT_OK(inflateEnd(&zs)); + return Status::OK(); +} + +} // namespace zlib +} // namespace yb diff --git a/src/yb/util/zlib.h b/src/yb/util/zlib.h new file mode 100644 index 00000000000..4832c3bd13b --- /dev/null +++ b/src/yb/util/zlib.h @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#ifndef YB_UTIL_ZLIB_H +#define YB_UTIL_ZLIB_H + +#include + +#include "yb/util/slice.h" +#include "yb/util/status.h" + +namespace yb { +namespace zlib { + +// Zlib-compress the data in 'input', appending the result to 'out'. +// +// In case of an error, some data may still be appended to 'out'. +Status Compress(Slice input, std::ostream* out); + +// The same as the above, but with a custom level (1-9, where 1 is fastest +// and 9 is best compression). +Status CompressLevel(Slice input, int level, std::ostream* out); + +// Uncompress the zlib-compressed data in 'compressed', appending the result +// to 'out'. +// +// In case of an error, some data may still be appended to 'out'. +Status Uncompress(const Slice& compressed, std::ostream* out); + +} // namespace zlib +} // namespace yb + +#endif /* YB_UTIL_ZLIB_H */