Skip to content

Commit

Permalink
[#9090] Support gzipped responses in webserver
Browse files Browse the repository at this point in the history
Summary:
Metrics can be pretty large in certain scenarios and we sometimes scrape them every 10s.
Returning gzipped responses when the http client specifies Accept-Encoding: gzip would help make the prometheus scrape consume less network bandwidth.

Ported from apache/kudu@8f52582

From zlib manual:

  Lower compression levels result in faster execution, but less compression

Here level 1 is used. We can start with this level.
If prometheous metrics are very large, we can choose higher level.

A flag, webserver_zlib_compression_level, has been introduced with default level of 1.
Another flag, webserver_compression_threshold_kb (default 4KB), allows specifying the threshold above which compression is performed.

Test Plan: WebserverTest.TestHttpCompression

Reviewers: mbautin, amitanand, sanketh

Reviewed By: sanketh

Subscribers: sanketh, mbautin, ybase

Differential Revision: https://phabricator.dev.yugabyte.com/D12072
  • Loading branch information
tedyu committed Jul 7, 2021
1 parent 60944d3 commit f53c481
Show file tree
Hide file tree
Showing 8 changed files with 335 additions and 8 deletions.
58 changes: 57 additions & 1 deletion src/yb/server/webserver-test.cc
Expand Up @@ -29,7 +29,9 @@
// or implied. See the License for the specific language governing permissions and limitations
// under the License.
//
#include "yb/server/webserver.h"

#include <iosfwd>
#include <string>

#include <gflags/gflags.h>
Expand All @@ -39,14 +41,16 @@
#include "yb/gutil/strings/util.h"
#include "yb/gutil/stringprintf.h"
#include "yb/server/default-path-handlers.h"
#include "yb/server/webserver.h"
#include "yb/util/curl_util.h"
#include "yb/util/net/sockaddr.h"
#include "yb/util/test_util.h"
#include "yb/util/zlib.h"

using std::string;
using strings::Substitute;

DECLARE_int32(webserver_max_post_length_bytes);
DECLARE_int64(webserver_compression_threshold_kb);

namespace yb {

Expand All @@ -72,13 +76,15 @@ class WebserverTest : public YBTest {
ASSERT_OK(server_->GetBoundAddresses(&addrs));
ASSERT_EQ(addrs.size(), 1);
addr_ = addrs[0];
url_ = Substitute("http://$0", ToString(addr_));
}

protected:
EasyCurl curl_;
faststring buf_;
std::unique_ptr<Webserver> server_;
Endpoint addr_;
string url_;

string static_dir_;
};
Expand All @@ -93,6 +99,56 @@ TEST_F(WebserverTest, TestIndexPage) {
ASSERT_STR_CONTAINS(buf_.ToString(), "Home");
}

TEST_F(WebserverTest, TestHttpCompression) {
std::ostringstream oss;
string decoded_str;
FLAGS_webserver_compression_threshold_kb = 0;

// Curl with gzip compression enabled.
ASSERT_OK(curl_.FetchURL(url_, &buf_, EasyCurl::kDefaultTimeoutSec,
{"Accept-Encoding: deflate, br, gzip"}));

// If compressed successfully, we should be able to uncompress.
ASSERT_OK(zlib::Uncompress(Slice(buf_.ToString()), &oss));
decoded_str = oss.str();

// Should have expected title.
ASSERT_STR_CONTAINS(decoded_str, "YugabyteDB");

// Should have expected header when compressed with headers returned.
curl_.set_return_headers(true);
ASSERT_OK(curl_.FetchURL(url_, &buf_, EasyCurl::kDefaultTimeoutSec,
{"Accept-Encoding: deflate, megaturbogzip, gzip , br"}));
ASSERT_STR_CONTAINS(buf_.ToString(), "Content-Encoding: gzip");


// Curl with compression disabled.
curl_.set_return_headers(true);
ASSERT_OK(curl_.FetchURL(url_, &buf_));
// Check expected header.
ASSERT_STR_CONTAINS(buf_.ToString(), "Content-Type:");

// Check unexpected header.
ASSERT_STR_NOT_CONTAINS(buf_.ToString(), "Content-Encoding: gzip");

// Should have expected title.
ASSERT_STR_CONTAINS(buf_.ToString(), "YugabyteDB");

// Curl with compression enabled but not accepted by YugabyteDB.
curl_.set_return_headers(true);
ASSERT_OK(curl_.FetchURL(url_, &buf_, EasyCurl::kDefaultTimeoutSec,
{"Accept-Encoding: megaturbogzip, deflate, xz"}));
// Check expected header.
ASSERT_STR_CONTAINS(buf_.ToString(), "HTTP/1.1 200 OK");

// Check unexpected header.
ASSERT_STR_NOT_CONTAINS(buf_.ToString(), "Content-Encoding: gzip");

// Should have expected title.
ASSERT_STR_CONTAINS(buf_.ToString(), "YugabyteDB");

}

TEST_F(WebserverTest, TestDefaultPaths) {
// Test memz
ASSERT_OK(curl_.FetchURL(strings::Substitute("http://$0/memz?raw=1", ToString(addr_)),
Expand Down
49 changes: 47 additions & 2 deletions src/yb/server/webserver.cc
Expand Up @@ -70,6 +70,7 @@
#include "yb/gutil/strings/numbers.h"
#include "yb/gutil/strings/split.h"
#include "yb/gutil/strings/stringpiece.h"
#include "yb/gutil/strings/strip.h"
#include "yb/util/env.h"
#include "yb/util/flag_tags.h"
#include "yb/util/net/net_util.h"
Expand All @@ -79,6 +80,7 @@
#include "yb/util/url-coding.h"
#include "yb/util/version_info.h"
#include "yb/util/shared_lock.h"
#include "yb/util/zlib.h"

#if defined(__APPLE__)
typedef sig_t sighandler_t;
Expand All @@ -90,6 +92,18 @@ DEFINE_int32(webserver_max_post_length_bytes, 1024 * 1024,
TAG_FLAG(webserver_max_post_length_bytes, advanced);
TAG_FLAG(webserver_max_post_length_bytes, runtime);

DEFINE_int32(webserver_zlib_compression_level, 1,
"The zlib compression level."
"Lower compression levels result in faster execution, but less compression");
TAG_FLAG(webserver_zlib_compression_level, advanced);
TAG_FLAG(webserver_zlib_compression_level, runtime);

DEFINE_int64(webserver_compression_threshold_kb, 4,
"The threshold of response size above which compression is performed."
"Default value is 4KB");
TAG_FLAG(webserver_compression_threshold_kb, advanced);
TAG_FLAG(webserver_compression_threshold_kb, runtime);

namespace yb {

using std::string;
Expand Down Expand Up @@ -435,20 +449,51 @@ int Webserver::RunPathHandler(const PathHandler& handler,
if (use_style) {
BootstrapPageFooter(output);
}
// Check if gzip compression is accepted by the caller. If so, compress the
// content and replace the prerendered output.
const char* accept_encoding_str = sq_get_header(connection, "Accept-Encoding");
bool is_compressed = false;
vector<string> encodings = strings::Split(accept_encoding_str, ",");
for (string& encoding : encodings) {
StripWhiteSpace(&encoding);
if (encoding == "gzip") {
// Don't bother compressing empty content.
const string& uncompressed = resp_ptr->output.str();
if (uncompressed.size() < FLAGS_webserver_compression_threshold_kb * 1024) {
break;
}

std::ostringstream oss;
int level = FLAGS_webserver_zlib_compression_level > 0 &&
FLAGS_webserver_zlib_compression_level <= 9 ?
FLAGS_webserver_zlib_compression_level : 1;
Status s = zlib::CompressLevel(uncompressed, level, &oss);
if (s.ok()) {
resp_ptr->output.str(oss.str());
is_compressed = true;
} else {
LOG(WARNING) << "Could not compress output: " << s.ToString();
}
break;
}
}

string str = output->str();
// Without styling, render the page as plain text
if (!use_style) {
sq_printf(connection, "HTTP/1.1 200 OK\r\n"
"Content-Type: text/plain\r\n"
"Content-Length: %zd\r\n"
"%s"
"Access-Control-Allow-Origin: *\r\n"
"\r\n", str.length());
"\r\n", str.length(), is_compressed ? "Content-Encoding: gzip\r\n" : "");
} else {
sq_printf(connection, "HTTP/1.1 200 OK\r\n"
"Content-Type: text/html\r\n"
"Content-Length: %zd\r\n"
"%s"
"Access-Control-Allow-Origin: *\r\n"
"\r\n", str.length());
"\r\n", str.length(), is_compressed ? "Content-Encoding: gzip\r\n" : "");
}

// Make sure to use sq_write for printing the body; sq_printf truncates at 8kb
Expand Down
1 change: 1 addition & 0 deletions src/yb/util/CMakeLists.txt
Expand Up @@ -259,6 +259,7 @@ set(UTIL_SRCS
uuid.cc
varint.cc
version_info.cc
zlib.cc
async_util.cc
)

Expand Down
25 changes: 22 additions & 3 deletions src/yb/util/curl_util.cc
Expand Up @@ -31,6 +31,7 @@
//

#include "yb/util/curl_util.h"
#include "yb/util/scope_exit.h"

#include <glog/logging.h>

Expand Down Expand Up @@ -67,8 +68,11 @@ EasyCurl::~EasyCurl() {
curl_easy_cleanup(curl_);
}

Status EasyCurl::FetchURL(const string& url, faststring* buf, int64_t timeout_sec) {
return DoRequest(url, boost::none, boost::none, timeout_sec, buf);
Status EasyCurl::FetchURL(const string& url,
faststring* buf,
int64_t timeout_sec,
const vector<string>& headers) {
return DoRequest(url, boost::none, boost::none, timeout_sec, buf, headers);
}

Status EasyCurl::PostToURL(
Expand Down Expand Up @@ -100,10 +104,25 @@ Status EasyCurl::DoRequest(
const boost::optional<const string>& post_data,
const boost::optional<const string>& content_type,
int64_t timeout_sec,
faststring* dst) {
faststring* dst,
const std::vector<std::string>& headers) {
CHECK_NOTNULL(dst)->clear();

// Add headers if specified.
struct curl_slist* curl_headers = nullptr;
auto clean_up_curl_slist = ScopeExit([&]() {
curl_slist_free_all(curl_headers);
});

for (const auto& header : headers) {
curl_headers = CHECK_NOTNULL(curl_slist_append(curl_headers, header.c_str()));
}
RETURN_NOT_OK(TranslateError(curl_easy_setopt(curl_, CURLOPT_HTTPHEADER, curl_headers)));

RETURN_NOT_OK(TranslateError(curl_easy_setopt(curl_, CURLOPT_URL, url.c_str())));
if (return_headers_) {
RETURN_NOT_OK(TranslateError(curl_easy_setopt(curl_, CURLOPT_HEADER, 1)));
}
RETURN_NOT_OK(TranslateError(curl_easy_setopt(curl_, CURLOPT_WRITEFUNCTION, WriteCallback)));
RETURN_NOT_OK(TranslateError(curl_easy_setopt(curl_, CURLOPT_WRITEDATA,
static_cast<void *>(dst))));
Expand Down
17 changes: 15 additions & 2 deletions src/yb/util/curl_util.h
Expand Up @@ -34,6 +34,7 @@

#include <curl/curl.h>
#include <string>
#include <vector>

#include <boost/optional.hpp>

Expand All @@ -57,8 +58,13 @@ class EasyCurl {

// Fetch the given URL into the provided buffer.
// Any existing data in the buffer is replaced.
// The optional param 'headers' holds additional headers.
// e.g. {"Accept-Encoding: gzip"}
CHECKED_STATUS FetchURL(
const std::string& url, faststring* dst, int64_t timeout_sec = kDefaultTimeoutSec);
const std::string& url,
faststring* dst,
int64_t timeout_sec = kDefaultTimeoutSec,
const std::vector<std::string>& headers = {});

// Issue an HTTP POST to the given URL with the given data.
// Returns results in 'dst' as above.
Expand All @@ -79,6 +85,10 @@ class EasyCurl {

static const int64_t kDefaultTimeoutSec = 600;

void set_return_headers(bool v) {
return_headers_ = v;
}

private:
// Do a request. If 'post_data' is non-NULL, does a POST.
// Otherwise, does a GET.
Expand All @@ -87,9 +97,12 @@ class EasyCurl {
const boost::optional<const std::string>& post_data,
const boost::optional<const std::string>& content_type,
int64_t timeout_sec,
faststring* dst);
faststring* dst,
const std::vector<std::string>& headers = {});

CURL* curl_;
// Whether to return the HTTP headers with the response.
bool return_headers_ = false;
DISALLOW_COPY_AND_ASSIGN(EasyCurl);
};

Expand Down
8 changes: 8 additions & 0 deletions src/yb/util/test_macros.h
Expand Up @@ -175,6 +175,14 @@ std::string TEST_SetDifferenceStr(const std::set<T>& expected, const std::set<T>
} \
} while (0)

#define ASSERT_STR_NOT_CONTAINS(str, substr) do { \
std::string _s = (str); \
if (_s.find((substr)) != std::string::npos) { \
FAIL() << "Expected not to find substring '" << (substr) \
<< "'. Got: '" << _s << "'"; \
} \
} while (0)

#define ASSERT_FILE_EXISTS(env, path) do { \
std::string _s = (path); \
ASSERT_TRUE(env->FileExists(_s)) \
Expand Down

0 comments on commit f53c481

Please sign in to comment.