From d2724285cfc6a4eaf32f505b392b53fffec2405e Mon Sep 17 00:00:00 2001 From: David Smith Date: Thu, 17 Aug 2023 16:57:16 +0200 Subject: [PATCH 1/3] [XrdHttp] Refactoring of the request statemachine for GET This change was mostly extracted from a larger pull request that included other features, #1957. The current change aims to be a minimal refactor in preparation for other work in the same area of the code. Co-authored-by: Brian Bockelman --- src/XrdHttp/XrdHttpReq.cc | 156 ++++++++++++++++---------------------- 1 file changed, 66 insertions(+), 90 deletions(-) diff --git a/src/XrdHttp/XrdHttpReq.cc b/src/XrdHttp/XrdHttpReq.cc index c1d13495155..426d9ff1f4e 100644 --- a/src/XrdHttp/XrdHttpReq.cc +++ b/src/XrdHttp/XrdHttpReq.cc @@ -1115,6 +1115,11 @@ int XrdHttpReq::ProcessHTTPReq() { } + // The reqstate parameter basically moves us through a simple state machine. + // - 0: Perform a stat on the resource + // - 1: Perform a checksum request on the resource (only if requested in header; otherwise skipped) + // - 2: Perform an open request (dirlist as appropriate). + // - 3+: Reads from file; if at end, perform a close. switch (reqstate) { case 0: // Stat() @@ -1127,7 +1132,36 @@ int XrdHttpReq::ProcessHTTPReq() { } return 0; - case 1: // Open() or dirlist + case 1: // Checksum request + if (!(fileflags & kXR_isDir) && !m_req_digest.empty()) { + // In this case, the Want-Digest header was set. + bool has_opaque = strchr(resourceplusopaque.c_str(), '?'); + // Note that doChksum requires that the memory stays alive until the callback is invoked. + m_req_cksum = prot->cksumHandler.getChecksumToRun(m_req_digest); + if(!m_req_cksum) { + // No HTTP IANA checksums have been configured by the server admin, return a "METHOD_NOT_ALLOWED" error + prot->SendSimpleResp(403, NULL, NULL, (char *) "No HTTP-IANA compatible checksums have been configured.", 0, false); + return -1; + } + m_resource_with_digest = resourceplusopaque; + if (has_opaque) { + m_resource_with_digest += "&cks.type="; + m_resource_with_digest += m_req_cksum->getXRootDConfigDigestName().c_str(); + } else { + m_resource_with_digest += "?cks.type="; + m_resource_with_digest += m_req_cksum->getXRootDConfigDigestName().c_str(); + } + if (prot->doChksum(m_resource_with_digest) < 0) { + prot->SendSimpleResp(500, NULL, NULL, (char *) "Failed to start internal checksum request to satisfy Want-Digest header.", 0, false); + return -1; + } + return 0; + } else { + TRACEI(DEBUG, "No checksum requested; skipping to request state 2"); + reqstate += 1; + } + // fallthrough + case 2: // Open() or dirlist { if (!prot->Bridge) { @@ -1176,29 +1210,6 @@ int XrdHttpReq::ProcessHTTPReq() { // We don't want to be invoked again after this request is finished return 1; - } else if (!m_req_digest.empty()) { - // In this case, the Want-Digest header was set. - bool has_opaque = strchr(resourceplusopaque.c_str(), '?'); - // Note that doChksum requires that the memory stays alive until the callback is invoked. - m_req_cksum = prot->cksumHandler.getChecksumToRun(m_req_digest); - if(!m_req_cksum) { - // No HTTP IANA checksums have been configured by the server admin, return a "METHOD_NOT_ALLOWED" error - prot->SendSimpleResp(403, NULL, NULL, (char *) "No HTTP-IANA compatible checksums have been configured.", 0, false); - return -1; - } - m_resource_with_digest = resourceplusopaque; - if (has_opaque) { - m_resource_with_digest += "&cks.type="; - m_resource_with_digest += m_req_cksum->getXRootDConfigDigestName().c_str(); - } else { - m_resource_with_digest += "?cks.type="; - m_resource_with_digest += m_req_cksum->getXRootDConfigDigestName().c_str(); - } - if (prot->doChksum(m_resource_with_digest) < 0) { - prot->SendSimpleResp(500, NULL, NULL, (char *) "Failed to start internal checksum request to satisfy Want-Digest header.", 0, false); - return -1; - } - return 0; } else { @@ -1224,41 +1235,15 @@ int XrdHttpReq::ProcessHTTPReq() { } - } - case 2: // Open() in the case the user also requested a checksum. - { - if (!m_req_digest.empty()) { - // --------- OPEN - memset(&xrdreq, 0, sizeof (ClientRequest)); - xrdreq.open.requestid = htons(kXR_open); - l = resourceplusopaque.length() + 1; - xrdreq.open.dlen = htonl(l); - xrdreq.open.mode = 0; - xrdreq.open.options = htons(kXR_retstat | kXR_open_read); - - if (!prot->Bridge->Run((char *) &xrdreq, (char *) resourceplusopaque.c_str(), l)) { - prot->SendSimpleResp(404, NULL, NULL, (char *) "Could not run request.", 0, false); - return -1; - } - - // Prepare to chunk up the request - writtenbytes = 0; - - // We want to be invoked again after this request is finished - return 0; - } } // fallthrough - default: // Read() or Close() + default: // Read() or Close(); reqstate is 3+ { - if ( ((reqstate == 3 || (!m_req_digest.empty() && (reqstate == 4))) && (rwOps.size() > 1)) || - (writtenbytes >= length) ) { - - // Close() if this was a readv or we have finished, otherwise read the next chunk - - // --------- CLOSE - + // --------- CLOSE + if ( ((reqstate == 4) && (rwOps.size() > 1)) || // In this case, we performed a ReadV and it's done. + (writtenbytes >= length) ) // No ReadV but we have completed the request. + { memset(&xrdreq, 0, sizeof (ClientRequest)); xrdreq.close.requestid = htons(kXR_close); memcpy(xrdreq.close.fhandle, fhandle, 4); @@ -1272,6 +1257,7 @@ int XrdHttpReq::ProcessHTTPReq() { return 1; } + // --------- READ or READV if (rwOps.size() <= 1) { // No chunks or one chunk... Request the whole file or single read @@ -1332,7 +1318,7 @@ int XrdHttpReq::ProcessHTTPReq() { return -1; } } else { - // More than one chunk to read... use readv + // --------- READV length = ReqReadV(); @@ -1345,12 +1331,12 @@ int XrdHttpReq::ProcessHTTPReq() { // We want to be invoked again after this request is finished return 0; - } + } // case 3+ - } + } // switch (reqstate) - } + } // case XrdHttpReq::rtGET case XrdHttpReq::rtPUT: { @@ -2088,10 +2074,15 @@ int XrdHttpReq::PostProcessHTTPReq(bool final_) { } - } else { - - + } // end handling of dirlist + else + { // begin handling of open-read-close + // To duplicate the state diagram from the rtGET request state + // - 0: Perform a stat on the resource + // - 1: Perform a checksum request on the resource (only if requested in header; otherwise skipped) + // - 2: Perform an open request (dirlist as appropriate). + // - 3+: Reads from file; if at end, perform a close. switch (reqstate) { case 0: //stat { @@ -2137,21 +2128,14 @@ int XrdHttpReq::PostProcessHTTPReq(bool final_) { // We are here in the case of a negative response in a non-manager return 0; + } // end stat + case 1: // checksum was requested and now we have its response. + { + return PostProcessChecksum(m_digest_header); } - case 1: // open - case 2: // open when digest was requested + case 2: // open { - - if (reqstate == 1 && !m_req_digest.empty()) { // We requested a checksum and now have its response. - int response = PostProcessChecksum(m_digest_header); - if (-1 == response) { - return -1; - } - return 0; - } else if (((reqstate == 2 && !m_req_digest.empty()) || - (reqstate == 1 && m_req_digest.empty())) - && (xrdresp == kXR_ok)) { - + if (xrdresp == kXR_ok) { getfhandle(); @@ -2254,28 +2238,21 @@ int XrdHttpReq::PostProcessHTTPReq(bool final_) { - } else if (xrdresp != kXR_ok) { + } else { // xrdresp indicates an error occurred - // If it's a dir then we are in the wrong place and we did the wrong thing. - //if (xrderrcode == 3016) { - // fileflags &= kXR_isDir; - // reqstate--; - // return 0; - //} prot->SendSimpleResp(httpStatusCode, NULL, NULL, httpStatusText.c_str(), httpStatusText.length(), false); return -1; } - // Remaining case: reqstate == 2 and we didn't ask for a digest (should be a read). + // Case should not be reachable + return -1; } - // fallthrough default: //read or readv { - // If we are postprocessing a close, potentially send out informational trailers if ((ntohs(xrdreq.header.requestid) == kXR_close) || - ((reqstate == 3) && (ntohs(xrdreq.header.requestid) == kXR_readv))) + ((reqstate == 4) && (ntohs(xrdreq.header.requestid) == kXR_readv))) { if (m_transfer_encoding_chunked && m_trailer_headers) { @@ -2330,7 +2307,7 @@ int XrdHttpReq::PostProcessHTTPReq(bool final_) { // Prevent scenario where data is expected but none is actually read // E.g. Accessing files which return the results of a script if ((ntohs(xrdreq.header.requestid) == kXR_read) && - (reqstate > 2) && (iovN == 0)) { + (reqstate > 3) && (iovN == 0)) { TRACEI(REQ, "Stopping request because more data is expected " "but no data has been read."); return -1; @@ -2406,12 +2383,11 @@ int XrdHttpReq::PostProcessHTTPReq(bool final_) { this->iovN = 0; return 0; - } + } // end read or readv } // switch reqstate - - } + } // End handling of the open-read-close case break; From 0a62bd4d04654705276997912db0c917b2430fa2 Mon Sep 17 00:00:00 2001 From: David Smith Date: Thu, 14 Sep 2023 11:13:16 +0200 Subject: [PATCH 2/3] [XrdHttp] Refactor read issuing during GET and fix read vector too long (#1976) Co-authored-by: Cedric Caffy --- src/XrdHttp.cmake | 1 + src/XrdHttp/XrdHttpProtocol.cc | 10 +- src/XrdHttp/XrdHttpProtocol.hh | 4 + src/XrdHttp/XrdHttpReadRangeHandler.cc | 658 +++++++++++++++++++++++++ src/XrdHttp/XrdHttpReadRangeHandler.hh | 282 +++++++++++ src/XrdHttp/XrdHttpReq.cc | 510 ++++++++++--------- src/XrdHttp/XrdHttpReq.hh | 42 +- src/XrdHttp/XrdHttpUtils.hh | 5 + tests/XrdHttpTests/XrdHttpTests.cc | 413 +++++++++++++++- 9 files changed, 1656 insertions(+), 269 deletions(-) create mode 100644 src/XrdHttp/XrdHttpReadRangeHandler.cc create mode 100644 src/XrdHttp/XrdHttpReadRangeHandler.hh diff --git a/src/XrdHttp.cmake b/src/XrdHttp.cmake index 391e290b4ce..ac0ed07033e 100644 --- a/src/XrdHttp.cmake +++ b/src/XrdHttp.cmake @@ -25,6 +25,7 @@ if( BUILD_HTTP ) XrdHttp/XrdHttpStatic.hh XrdHttp/XrdHttpTrace.hh XrdHttp/XrdHttpUtils.cc XrdHttp/XrdHttpUtils.hh + XrdHttp/XrdHttpReadRangeHandler.cc XrdHttp/XrdHttpReadRangeHandler.hh XrdHttp/XrdHttpChecksumHandler.cc XrdHttp/XrdHttpChecksumHandler.hh XrdHttp/XrdHttpChecksum.cc XrdHttp/XrdHttpChecksum.hh) diff --git a/src/XrdHttp/XrdHttpProtocol.cc b/src/XrdHttp/XrdHttpProtocol.cc index 7492193bb20..b2efb2d20ee 100644 --- a/src/XrdHttp/XrdHttpProtocol.cc +++ b/src/XrdHttp/XrdHttpProtocol.cc @@ -110,6 +110,7 @@ int XrdHttpProtocol::m_bio_type = 0; // BIO type identifier for our custom BIO. BIO_METHOD *XrdHttpProtocol::m_bio_method = NULL; // BIO method constructor. char *XrdHttpProtocol::xrd_cslist = nullptr; XrdHttpChecksumHandler XrdHttpProtocol::cksumHandler = XrdHttpChecksumHandler(); +XrdHttpReadRangeHandler::Configuration XrdHttpProtocol::ReadRangeConfig; XrdSysTrace XrdHttpTrace("http"); @@ -185,7 +186,7 @@ int BIO_get_shutdown(BIO *bio) { XrdHttpProtocol::XrdHttpProtocol(bool imhttps) : XrdProtocol("HTTP protocol handler"), ProtLink(this), -SecEntity(""), CurrentReq(this) { +SecEntity(""), CurrentReq(this, ReadRangeConfig) { myBuff = 0; Addr_str = 0; Reset(); @@ -951,6 +952,10 @@ int XrdHttpProtocol::Config(const char *ConfigFN, XrdOucEnv *myEnv) { char *var; int cfgFD, GoNo, NoGo = 0, ismine; + var = nullptr; + XrdOucEnv::Import("XRD_READV_LIMITS", var); + XrdHttpReadRangeHandler::Configure(eDest, var, ReadRangeConfig); + cksumHandler.configure(xrd_cslist); auto nonIanaChecksums = cksumHandler.getNonIANAConfiguredCksums(); if(nonIanaChecksums.size()) { @@ -1527,11 +1532,12 @@ int XrdHttpProtocol::StartSimpleResp(int code, const char *desc, const char *hea else if (code == 206) ss << "Partial Content"; else if (code == 302) ss << "Redirect"; else if (code == 307) ss << "Temporary Redirect"; + else if (code == 400) ss << "Bad Request"; else if (code == 403) ss << "Forbidden"; else if (code == 404) ss << "Not Found"; else if (code == 405) ss << "Method Not Allowed"; + else if (code == 416) ss << "Range Not Satisfiable"; else if (code == 500) ss << "Internal Server Error"; - else if (code == 400) ss << "Bad Request"; else ss << "Unknown"; } ss << crlf; diff --git a/src/XrdHttp/XrdHttpProtocol.hh b/src/XrdHttp/XrdHttpProtocol.hh index 0663dfd70c7..8db563595e7 100644 --- a/src/XrdHttp/XrdHttpProtocol.hh +++ b/src/XrdHttp/XrdHttpProtocol.hh @@ -47,6 +47,7 @@ #include "Xrd/XrdProtocol.hh" #include "XrdOuc/XrdOucHash.hh" #include "XrdHttpChecksumHandler.hh" +#include "XrdHttpReadRangeHandler.hh" #include @@ -129,6 +130,9 @@ public: // XrdHttp checksum handling class static XrdHttpChecksumHandler cksumHandler; + /// configuration for the read range handler + static XrdHttpReadRangeHandler::Configuration ReadRangeConfig; + /// called via https bool isHTTPS() { return ishttps; } diff --git a/src/XrdHttp/XrdHttpReadRangeHandler.cc b/src/XrdHttp/XrdHttpReadRangeHandler.cc new file mode 100644 index 00000000000..f6157bb6644 --- /dev/null +++ b/src/XrdHttp/XrdHttpReadRangeHandler.cc @@ -0,0 +1,658 @@ +//------------------------------------------------------------------------------ +// This file is part of XrdHTTP: A pragmatic implementation of the +// HTTP/WebDAV protocol for the Xrootd framework +// +// Copyright (c) 2013 by European Organization for Nuclear Research (CERN) +// Authors: Cedric Caffy , David Smith +// File Date: Aug 2023 +//------------------------------------------------------------------------------ +// XRootD is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// XRootD is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with XRootD. If not, see . +//------------------------------------------------------------------------------ + +#include "XProtocol/XPtypes.hh" +#include "XrdHttpReadRangeHandler.hh" +#include "XrdOuc/XrdOuca2x.hh" +#include "XrdOuc/XrdOucTUtils.hh" +#include "XrdOuc/XrdOucUtils.hh" + +#include +#include +#include +#include +#include +#include +#include + +//------------------------------------------------------------------------------ +//! static, class method: initialise a configuraiton object. parms is currently +//! only content of environment variable XRD_READV_LIMITS, to get the specific +//! kXR_readv limits. +//------------------------------------------------------------------------------ +int XrdHttpReadRangeHandler::Configure +( + XrdSysError &Eroute, + const char *const parms, + Configuration &cfg) +{ + if( !parms ) return 0; + + std::vector splitArgs; + XrdOucTUtils::splitString( splitArgs, parms, "," ); + if( splitArgs.size() < 2 ) return 0; + + //---------------------------------------------------------------------------- + // params is expected to be "," + //---------------------------------------------------------------------------- + std::string iorstr = splitArgs[0]; + std::string iovstr = splitArgs[1]; + XrdOucUtils::trim( iorstr ); + XrdOucUtils::trim( iovstr ); + + int val; + if( XrdOuca2x::a2i( Eroute, "Error reading specific value of readv_ior_max", + iorstr.c_str(), &val, 1, -1 ) ) + { + return -1; + } + + cfg.readv_ior_max = val; + if( XrdOuca2x::a2i( Eroute, "Error reading specific value of readv_iov_max", + iovstr.c_str(), &val, 1, -1 ) ) + { + return -1; + } + + cfg.readv_iov_max = val; + cfg.reqs_max = RREQ_MAXSIZE; + cfg.haveSizes = true; + + return 0; +} + +//------------------------------------------------------------------------------ +//! return the Error object +//------------------------------------------------------------------------------ +const XrdHttpReadRangeHandler::Error & XrdHttpReadRangeHandler::getError() const +{ + return error_; +} + +//------------------------------------------------------------------------------ +//! indicates when there were no valid Range head ranges supplied +//------------------------------------------------------------------------------ +bool XrdHttpReadRangeHandler::isFullFile() +{ + return rawUserRanges_.empty(); +} + +//------------------------------------------------------------------------------ +//! indicates a single range (implied whole file, or single range) or empty file +//------------------------------------------------------------------------------ +bool XrdHttpReadRangeHandler::isSingleRange() +{ + if( !rangesResolved_ ) + resolveRanges(); + + return( resolvedUserRanges_.size() <= 1 ); +} + +//------------------------------------------------------------------------------ +//! return resolved (i.e. obsolute start and end) byte ranges desired +//------------------------------------------------------------------------------ +const XrdHttpReadRangeHandler::UserRangeList &XrdHttpReadRangeHandler::ListResolvedRanges() +{ + static const UserRangeList emptyList; + + if( !rangesResolved_ ) + resolveRanges(); + + if( error_ ) + return emptyList; + + return resolvedUserRanges_; +} + +//------------------------------------------------------------------------------ +//! return XrdHttpIOList for sending to read or readv +//------------------------------------------------------------------------------ +const XrdHttpIOList &XrdHttpReadRangeHandler::NextReadList() +{ + static const XrdHttpIOList emptyList; + + if( !rangesResolved_ ) + resolveRanges(); + + if( error_ ) + return emptyList; + + if( !splitRange_.empty() ) + { + if( currSplitRangeIdx_ == 0 && currSplitRangeOff_ == 0 ) + { + //------------------------------------------------------------------------ + // Nothing read: Prevent scenario where data is expected but none is + // actually read E.g. Accessing files which return the results of a script + //------------------------------------------------------------------------ + error_.set( 500, "Stopping request because more data is expected " + "but no data has been read." ); + return emptyList; + } + + //-------------------------------------------------------------------------- + // we may have some unacknowledged portion of the last range; maybe due to a + // short read. so remove what was received and potentially reissue. + //-------------------------------------------------------------------------- + + trimSplit(); + if( !splitRange_.empty() ) + return splitRange_; + } + + if( splitRangeIdx_ >= resolvedUserRanges_.size() ) + return emptyList; + + splitRanges(); + + return splitRange_; +} + +//------------------------------------------------------------------------------ +//! Force handler to enter error state +//------------------------------------------------------------------------------ +void XrdHttpReadRangeHandler::NotifyError() +{ + if( error_ ) + return; + + error_.set( 500, "An error occured." ); +} + +//------------------------------------------------------------------------------ +//! Advance internal counters concerning received bytes +//------------------------------------------------------------------------------ +int XrdHttpReadRangeHandler::NotifyReadResult +( + const ssize_t ret, + const UserRange** const urp, + bool &start, + bool &allend +) +{ + if( error_ ) + return -1; + + if( ret == 0 ) + return 0; + + if( ret < 0 ) + { + error_.set( 500, "Range handler read failure." ); + return -1; + } + + if( !rangesResolved_ ) + { + error_.set( 500, "Range handler ranges not yet resolved." ); + return -1; + } + + if( splitRange_.empty() ) + { + error_.set( 500, "No ranges being read." ); + return -1; + } + + start = false; + allend = false; + + if( currSplitRangeIdx_ >= splitRange_.size() || + resolvedRangeIdx_ >= resolvedUserRanges_.size() ) + { + error_.set( 500, "Range handler index invalid." ); + return -1; + } + + if( urp ) + *urp = &resolvedUserRanges_[resolvedRangeIdx_]; + + if( resolvedRangeOff_ == 0 ) + start = true; + + const int clen = splitRange_[currSplitRangeIdx_].size; + + const off_t ulen = resolvedUserRanges_[resolvedRangeIdx_].end + - resolvedUserRanges_[resolvedRangeIdx_].start + 1; + + currSplitRangeOff_ += ret; + resolvedRangeOff_ += ret; + + if( currSplitRangeOff_ > clen || resolvedRangeOff_ > ulen ) + { + error_.set( 500, "Range handler read crossing chunk boundary." ); + return -1; + } + + if( currSplitRangeOff_ == clen ) + { + currSplitRangeOff_ = 0; + currSplitRangeIdx_++; + + if( currSplitRangeIdx_ >= splitRange_.size() ) + { + currSplitRangeIdx_ = 0; + splitRange_.clear(); + } + } + + if( resolvedRangeOff_ == ulen ) + { + resolvedRangeIdx_++; + resolvedRangeOff_ = 0; + if( resolvedRangeIdx_ >= resolvedUserRanges_.size() ) + allend = true; + } + + return 0; +} + +//------------------------------------------------------------------------------ +//! parse the line after a "Range: " http request header +//------------------------------------------------------------------------------ +void XrdHttpReadRangeHandler::ParseContentRange(const char* const line) +{ + char *str1, *saveptr1, *token; + + std::unique_ptr< char, decltype(std::free)* > + line_copy { strdup( line ), std::free }; + + //---------------------------------------------------------------------------- + // line_copy is argument of the Range header. + // + // e.g. "bytes=15-17,20-25" + // We skip the unit prefix (upto first '='). We don't + // enforce this prefix nor check what it is (e.g. 'bytes') + //---------------------------------------------------------------------------- + + str1 = line_copy.get(); + token = strchr(str1,'='); + if (token) str1 = token + 1; + + //---------------------------------------------------------------------------- + // break up the ranges and process each + //---------------------------------------------------------------------------- + + for( ; ; str1 = NULL ) + { + token = strtok_r( str1, " ,\n\r", &saveptr1 ); + if( token == NULL ) + break; + + if( !strlen(token) ) continue; + + const int rc = parseOneRange( token ); + if( rc ) + { + //------------------------------------------------------------------------ + // on error we ignore the whole range header + //------------------------------------------------------------------------ + rawUserRanges_.clear(); + return; + } + } +} + +//------------------------------------------------------------------------------ +//! resets this handler +//------------------------------------------------------------------------------ +void XrdHttpReadRangeHandler::reset() +{ + error_.reset(); + rawUserRanges_.clear(); + rawUserRanges_.shrink_to_fit(); + resolvedUserRanges_.clear(); + resolvedUserRanges_.shrink_to_fit(); + splitRange_.clear(); + splitRange_.shrink_to_fit(); + rangesResolved_ = false; + splitRangeIdx_ = 0; + splitRangeOff_ = 0; + currSplitRangeIdx_ = 0; + currSplitRangeOff_ = 0; + resolvedRangeIdx_ = 0; + resolvedRangeOff_ = 0; + filesize_ = 0; +} + +//------------------------------------------------------------------------------ +//! sets the filesize, used during resolving and issuing range requests +//------------------------------------------------------------------------------ +int XrdHttpReadRangeHandler::SetFilesize(const off_t fs) +{ + if( error_ ) + return -1; + + if( rangesResolved_ ) + { + error_.set( 500, "Filesize notified after ranges resolved." ); + return -1; + } + + filesize_ = fs; + return 0; +} + +//------------------------------------------------------------------------------ +//! private method: paring a single range from the header +//------------------------------------------------------------------------------ +int XrdHttpReadRangeHandler::parseOneRange(char* const str) +{ + UserRange ur; + char *sep; + + //---------------------------------------------------------------------------- + // expected input is an individual range, e.g. + // 5-6 + // 5- + // -2 + //---------------------------------------------------------------------------- + + sep = strchr( str, '-' ); + if( !sep ) + { + //-------------------------------------------------------------------------- + // Unexpected range format + //-------------------------------------------------------------------------- + return -1; + } + + *sep = '\0'; + if( rangeFig( str, ur.start_set, ur.start )<0 ) + { + //-------------------------------------------------------------------------- + // Error in range start + //-------------------------------------------------------------------------- + *sep = '-'; + return -1; + } + *sep = '-'; + if( rangeFig( sep+1, ur.end_set, ur.end )<0 ) + { + //-------------------------------------------------------------------------- + // Error in range end + //-------------------------------------------------------------------------- + return -1; + } + + if( !ur.start_set && !ur.end_set ) + { + //-------------------------------------------------------------------------- + // Unexpected range format + //-------------------------------------------------------------------------- + return -1; + } + + if( ur.start_set && ur.end_set && ur.start > ur.end ) + { + //-------------------------------------------------------------------------- + // Range start is after range end + //-------------------------------------------------------------------------- + return -1; + } + + if( !ur.start_set && ur.end_set && ur.end == 0 ) + { + //-------------------------------------------------------------------------- + // Request to return last 0 bytes of file + //-------------------------------------------------------------------------- + return -1; + } + + rawUserRanges_.push_back(ur); + return 0; +} + +//------------------------------------------------------------------------------ +//! private method: decode a decimal value from range header +//------------------------------------------------------------------------------ +int XrdHttpReadRangeHandler::rangeFig(const char* const s, bool &set, off_t &val) +{ + char *endptr = (char*)s; + errno = 0; + long long int v = strtoll( s, &endptr, 10 ); + if( (errno == ERANGE && (v == LONG_MAX || v == LONG_MIN)) + || (errno != 0 && errno != EINVAL && v == 0) ) + { + return -1; + } + if( *endptr != '\0' ) + { + return -1; + } + if( endptr == s ) + { + set = false; + } + else + { + set = true; + val = v; + } + return 0; +} + +//------------------------------------------------------------------------------ +//! private method: turn user supplied range into absolute range using filesize +//------------------------------------------------------------------------------ +void XrdHttpReadRangeHandler::resolveRanges() +{ + if( error_ ) + return; + + resolvedUserRanges_.clear(); + + for( const auto &rr: rawUserRanges_ ) + { + off_t start = 0; + off_t end = 0; + + if( rr.end_set ) + { + if( rr.start_set ) + { + //---------------------------------------------------------------------- + // end and start set + // e.g. 5-6 + //---------------------------------------------------------------------- + start = rr.start; + end = rr.end; + + //---------------------------------------------------------------------- + // skip ranges outside the file + //---------------------------------------------------------------------- + if( start >= filesize_ ) + continue; + + if( end >= filesize_ ) + { + end = filesize_ - 1; + } + } + else // !start + { + //---------------------------------------------------------------------- + // end is set but not start + // e.g. -5 + //---------------------------------------------------------------------- + if( rr.end == 0 ) + continue; + end = filesize_ -1; + if( rr.end > filesize_ ) + { + start = 0; + } + else + { + start = filesize_ - rr.end; + } + } + } + else // !end + { + //------------------------------------------------------------------------ + // end is not set + // e.g. 5- + //------------------------------------------------------------------------ + if( !rr.start_set ) continue; + if( rr.start >= filesize_ ) + continue; + start = rr.start; + end = filesize_ - 1; + } + resolvedUserRanges_.emplace_back( start, end ); + } + + if( rawUserRanges_.empty() && filesize_>0 ) + { + //-------------------------------------------------------------------------- + // special case: no ranges: speficied, return whole file + //-------------------------------------------------------------------------- + resolvedUserRanges_.emplace_back( 0, filesize_ - 1 ); + } + + if( !rawUserRanges_.empty() && resolvedUserRanges_.empty() ) + { + error_.set( 416, "None of the range-specifier values in the Range " + "request-header field overlap the current extent of the selected resource." ); + } + + rangesResolved_ = true; +} + +//------------------------------------------------------------------------------ +//! private method: proceed through the resolved ranges, splitting into ranges +//! suitable for read or readv. This method is called repeatedly until we've +//! gone though all the resolved ranges. +//------------------------------------------------------------------------------ +void XrdHttpReadRangeHandler::splitRanges() +{ + splitRange_.clear(); + currSplitRangeIdx_ = 0; + currSplitRangeOff_ = 0; + resolvedRangeIdx_ = splitRangeIdx_; + resolvedRangeOff_ = splitRangeOff_; + + //---------------------------------------------------------------------------- + // If we make a list of just one range XrdHttpReq will issue kXR_read, + // otherwise kXR_readv. + // + // If this is a full file read, or single user range, we'll fetch only one + // range at a time, so it is sent as a series of kXR_read requests. + // + // For multi range requests we pack a number of suitably sized ranges, thereby + // using kXR_readv. However, if there's a long user range we can we try to + // proceed by issuing single range requests and thereby using kXR_read. + // + // We don't merge user ranges in a single chunk as we always expect to be + // able to notify at boundaries with the output bools of NotifyReadResult. + //---------------------------------------------------------------------------- + + size_t maxch = vectorReadMaxChunks_; + size_t maxchs = vectorReadMaxChunkSize_; + if( isSingleRange() ) + { + maxchs = rRequestMaxBytes_; + maxch = 1; + } + + splitRange_.reserve( maxch ); + + //---------------------------------------------------------------------------- + // Start/continue splitting the resolvedUserRanges_ into a XrdHttpIOList. + //---------------------------------------------------------------------------- + + const size_t cs = resolvedUserRanges_.size(); + size_t nc = 0; + size_t rsr = rRequestMaxBytes_; + UserRange tmpur; + + while( ( splitRangeIdx_ < cs ) && ( rsr > 0 ) ) + { + //-------------------------------------------------------------------------- + // Check if we've readed the maximum number of allowed chunks. + //-------------------------------------------------------------------------- + if( nc >= maxch ) + break; + + if( !tmpur.start_set ) + { + tmpur = resolvedUserRanges_[splitRangeIdx_]; + tmpur.start += splitRangeOff_; + } + + const off_t l = tmpur.end - tmpur.start + 1; + size_t maxsize = std::min( rsr, maxchs ); + + //-------------------------------------------------------------------------- + // If we're starting a new set of chunks and we have enough data available + // in the current user range we allow a kXR_read of the max request size. + //-------------------------------------------------------------------------- + if( nc == 0 && l >= (off_t)rRequestMaxBytes_ ) + maxsize = rRequestMaxBytes_; + + if( l > (off_t)maxsize ) + { + splitRange_.emplace_back( nullptr, tmpur.start, maxsize ); + tmpur.start += maxsize; + splitRangeOff_ += maxsize; + rsr -= maxsize; + } + else + { + splitRange_.emplace_back( nullptr, tmpur.start, l ); + rsr -= l; + tmpur = UserRange(); + splitRangeOff_ = 0; + splitRangeIdx_++; + } + nc++; + } +} + +//------------------------------------------------------------------------------ +//! private method: remove partially received request +//------------------------------------------------------------------------------ +void XrdHttpReadRangeHandler::trimSplit() +{ + if( currSplitRangeIdx_ < splitRange_.size() ) + { + splitRange_.erase( splitRange_.begin(), + splitRange_.begin() + currSplitRangeIdx_ ); + } + else + splitRange_.clear(); + + if( splitRange_.size() > 0 ) + { + if( currSplitRangeOff_ < splitRange_[0].size ) + { + splitRange_[0].offset += currSplitRangeOff_; + splitRange_[0].size -= currSplitRangeOff_; + } + else + splitRange_.clear(); + } + + currSplitRangeIdx_ = 0; + currSplitRangeOff_ = 0; +} diff --git a/src/XrdHttp/XrdHttpReadRangeHandler.hh b/src/XrdHttp/XrdHttpReadRangeHandler.hh new file mode 100644 index 00000000000..b0b21b9bf32 --- /dev/null +++ b/src/XrdHttp/XrdHttpReadRangeHandler.hh @@ -0,0 +1,282 @@ +//------------------------------------------------------------------------------ +// This file is part of XrdHTTP: A pragmatic implementation of the +// HTTP/WebDAV protocol for the Xrootd framework +// +// Copyright (c) 2013 by European Organization for Nuclear Research (CERN) +// Authors: Cedric Caffy , David Smith +// File Date: Aug 2023 +//------------------------------------------------------------------------------ +// XRootD is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// XRootD is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with XRootD. If not, see . +//------------------------------------------------------------------------------ + +#ifndef XROOTD_XRDHTTPREADRANGEHANDLER_HH +#define XROOTD_XRDHTTPREADRANGEHANDLER_HH + +#include "XrdHttpUtils.hh" +#include +#include + + +/** + * Class responsible for parsing the HTTP Content-Range header + * coming from the client, generating appropriate read ranges + * for read or readv and tracking the responses to the requests. + */ +class XrdHttpReadRangeHandler { +public: + + /** + * These are defaults for: + * READV_MAXCHUNKS Max length of the XrdHttpIOList vector. + * READV_MAXCHUNKSIZE Max length of a XrdOucIOVec2 element. + * RREQ_MAXSIZE Max bytes to issue in a whole readv/read. + */ + static constexpr size_t READV_MAXCHUNKS = 512; + static constexpr size_t READV_MAXCHUNKSIZE = 512*1024; + static constexpr size_t RREQ_MAXSIZE = 8*1024*1024; + + /** + * Configuration can give specific values for the max chunk + * size, number of chunks and maximum overall request size, + * to override the defaults. + */ + struct Configuration { + Configuration() : haveSizes(false) { } + + Configuration(const size_t vectorReadMaxChunkSize, + const size_t vectorReadMaxChunks, + const size_t rRequestMaxBytes) : + haveSizes(true), readv_ior_max(vectorReadMaxChunkSize), + readv_iov_max(vectorReadMaxChunks), reqs_max(rRequestMaxBytes) { } + + + bool haveSizes; + size_t readv_ior_max; // max chunk size + size_t readv_iov_max; // max number of chunks + size_t reqs_max; // max bytes in read or readv + }; + + /** + * Error structure for storing error codes and message. + * operator bool() can be used to query if a value is set. + */ + struct Error { + bool errSet{false}; + int httpRetCode{0}; + std::string errMsg; + + explicit operator bool() const { return errSet; } + + void set(int rc, const std::string &m) + { httpRetCode = rc; errMsg = m; errSet = true; } + + void reset() { httpRetCode = 0; errMsg.clear(); errSet = false; } + }; + + /** + * Structure for recording or reporting user ranges. The can specify an + * unbounded range where eiter the start or end offset is not specified. + */ + struct UserRange { + bool start_set; + bool end_set; + off_t start; + off_t end; + + UserRange() : start_set(false), end_set(false), start(0), end(0) { } + + UserRange(off_t st, off_t en) : start_set(true), end_set(true), start(st), + end(en) { } + }; + + typedef std::vector UserRangeList; + + /** + * Constructor. + * Supplied with an Configuration object. The supplied object remains owned + * by the caller, but should remain valid throughout the lifetime of the + * ReadRangeHandler. + * + * @param @conf Configuration object. + */ + XrdHttpReadRangeHandler(const Configuration &conf) + { + rRequestMaxBytes_ = RREQ_MAXSIZE; + vectorReadMaxChunkSize_ = READV_MAXCHUNKSIZE; + vectorReadMaxChunks_ = READV_MAXCHUNKS; + + if( conf.haveSizes ) + { + vectorReadMaxChunkSize_ = conf.readv_ior_max; + vectorReadMaxChunks_ = conf.readv_iov_max; + rRequestMaxBytes_ = conf.reqs_max; + } + reset(); + } + + /** + * Parses a configuration into a Configuration object. + * @param @Eroute Error reporting object + * @param @parms Configuration string. + * @param @cfg an output Configuration object + * @return 0 for success, otherwise failure. + */ + static int Configure(XrdSysError &Eroute, const char *const parms, + Configuration &cfg); + + /** + * getter for the Error object. The object can be inspected with its operator + * bool() method to indicate an error has happened. Error code and message are + * available in other members of Error. + */ + const Error& getError() const; + + /** + * Indicates no valid Range header was given and thus the implication is that + * whole file is required. A range or ranges may be given that cover the whole + * file but that situation is not detected. + */ + bool isFullFile(); + + /** + * Incidcates whether there is a single range, either given by a Range header + * with single range or implied by having no Range header. + * Also returns true for an empty file, although there is no range of bytes. + * @return true if there is a single range. + */ + bool isSingleRange(); + + /** + * Returns a reference of the list of ranges. These are resolved, meaning that + * if there was no Range header, or it was in the form -N or N-, the file size + * is used to compute the actual range of bytes that are needed. The list + * remains owned by the handler and may be invalidated on reset(). + * @return List of ranges in a UserRangeList object. + * The returned list may be empty, i.e. for an empty file or if there + * is an error. Use getError() to see if there is an error. + */ + const UserRangeList &ListResolvedRanges(); + + /** + * Requests a XrdHttpIOList (vector of XrdOucIOVec2) that describes the next + * bytes that need to be fetched from a file. If there is more than one chunk + * it is size appropriately for a readv request, if there is one request it + * should be sent as a read request. Therefore the chunks do not necessarily + * correspond to the ranges the user requested. The caller issue the requests + * in the order provided and call NotifyReadResult with the ordered results. + * @return a reference to a XrdHttpIOList. The object remains owned by the + * handler. It may be invalided by a new call to NextReadList() or + * reset(). The returned list may be empty, which implies no more + * reads are needed One can use getError() to see if there is an + * error. + */ + const XrdHttpIOList &NextReadList(); + + /** + * Force the handler to enter error state. Sets a generic error message + * if there was not already an error. + */ + void NotifyError(); + + /** + * Notifies the handler about the arrival of bytes from a read or readv + * request. The handler tracks the progress of the arriving bytes against + * the bytes ranges the user requested. + * @param ret the number of bytes received + * @param urp a pointer to a pointer of a UserRange object. If urp is not + * nullptr, the pointer to a UserRange is returned that describes + * the current range associated with the received bytes. The + * handler retains ownership of the returned object. reset() of + * the handler invalidates the UserRange object. + * @param start is an output bool parameter that indicates whether the + * received bytes mark the start of a UserRange. + * @param allend is an output bool parameter that indicates whether the + * received bytes mark the end of all the UserRanges + * @return 0 upon success, -1 if an error happened. + * One needs to call the getError() method to return the error. + */ + int NotifyReadResult(const ssize_t ret, + const UserRange** const urp, + bool &start, + bool &allend); + + /** + * Parses the Content-Range header value and sets the ranges within the + * object. + * @param line the line under the format "bytes=0-19, 25-30" + * In case the parsing fails any partial results are cleared. There is no + * error notification as the rest of the request processing should continue + * in any case. + */ + void ParseContentRange(const char* const line); + + /** + * Resets the object state, ready for handling a new request. + */ + void reset(); + + /** + * Notifies of the current file size. This information is required for + * processing range requests that imply reading to the end or a certain + * position before the end of a file. It is also used to determine when read + * or readv need no longer be issued when reading the whole file. + * Can be called once or more, after reset() but before isSingleRange(), + * ListResolvedRanges() or NextReadList() methods. + * @param sz the size of the file + * @return 0 upon success, -1 if an error happened. + * One needs to call the getError() method to return the error. + */ + int SetFilesize(const off_t sz); + +private: + int parseOneRange(char* const str); + int rangeFig(const char* const s, bool &set, off_t &start); + void resolveRanges(); + void splitRanges(); + void trimSplit(); + + Error error_; + + UserRangeList rawUserRanges_; + + bool rangesResolved_; + + UserRangeList resolvedUserRanges_; + + XrdHttpIOList splitRange_; + + // the position in resolvedUserRanges_ corresponding to all the + // bytes notified via the NotifyReadResult() method + size_t resolvedRangeIdx_; + off_t resolvedRangeOff_; + + // position of the method splitRanges() in within resolvedUserRanges_ + // from where it split ranges into chunks for sending to read/readv + size_t splitRangeIdx_; + off_t splitRangeOff_; + + // the position in splitRange_ corresponding to all the + // bytes notified via the NotifyReadResult() method + size_t currSplitRangeIdx_; + int currSplitRangeOff_; + + off_t filesize_; + + size_t vectorReadMaxChunkSize_; + size_t vectorReadMaxChunks_; + size_t rRequestMaxBytes_; +}; + + +#endif //XROOTD_XRDHTTPREADRANGEHANDLER_HH diff --git a/src/XrdHttp/XrdHttpReq.cc b/src/XrdHttp/XrdHttpReq.cc index 426d9ff1f4e..b39c5883a53 100644 --- a/src/XrdHttp/XrdHttpReq.cc +++ b/src/XrdHttp/XrdHttpReq.cc @@ -165,7 +165,10 @@ int XrdHttpReq::parseLine(char *line, int len) { } else if (!strcmp(key, "Host")) { parseHost(val); } else if (!strcmp(key, "Range")) { - parseContentRange(val); + // (rfc2616 14.35.1) says if Range header contains any range + // which is syntactically invalid the Range header should be ignored. + // Therefore no need for the range handler to report an error. + readRangeHandler.ParseContentRange(val); } else if (!strcmp(key, "Content-Length")) { length = atoll(val); @@ -221,89 +224,6 @@ int XrdHttpReq::parseHost(char *line) { return 0; } -int XrdHttpReq::parseContentRange(char *line) { - int j; - char *str1, *saveptr1, *token; - - - - for (j = 1, str1 = line;; j++, str1 = NULL) { - token = strtok_r(str1, " ,\n=", &saveptr1); - if (token == NULL) - break; - - //printf("%d: %s\n", j, token); - - if (!strlen(token)) continue; - - - parseRWOp(token); - - } - - return j; -} - -int XrdHttpReq::parseRWOp(char *str) { - ReadWriteOp o1; - int j; - char *saveptr2, *str2, *subtoken, *endptr; - bool ok = false; - - for (str2 = str, j = 0;; str2 = NULL, j++) { - subtoken = strtok_r(str2, "-", &saveptr2); - if (subtoken == NULL) - break; - - switch (j) { - case 0: - o1.bytestart = strtoll(subtoken, &endptr, 0); - if (!o1.bytestart && (endptr == subtoken)) o1.bytestart = -1; - break; - case 1: - o1.byteend = strtoll(subtoken, &endptr, 0); - if (!o1.byteend && (endptr == subtoken)) o1.byteend = -1; - ok = true; - break; - default: - // Malformed! - ok = false; - break; - } - - } - - - // This can be largely optimized - if (ok) { - - kXR_int32 len_ok = 0; - long long sz = o1.byteend - o1.bytestart + 1; - kXR_int32 newlen = sz; - - if (filesize > 0) - newlen = (kXR_int32) std::min(filesize - o1.bytestart, sz); - - rwOps.push_back(o1); - - while (len_ok < newlen) { - ReadWriteOp nfo; - int len = std::min(newlen - len_ok, READV_MAXCHUNKSIZE); - - nfo.bytestart = o1.bytestart + len_ok; - nfo.byteend = nfo.bytestart + len - 1; - len_ok += len; - rwOps_split.push_back(nfo); - } - length += len_ok; - - - } - - - return j; -} - int XrdHttpReq::parseFirstLine(char *line, int len) { char *key = line; @@ -436,29 +356,23 @@ void XrdHttpReq::clientUnMarshallReadAheadList(int nitems) { } } -int XrdHttpReq::ReqReadV() { +int XrdHttpReq::ReqReadV(const XrdHttpIOList &cl) { - kXR_int64 total_len = 0; - rwOpPartialDone = 0; // Now we build the protocol-ready read ahead list // and also put the correct placeholders inside the cache - int n = rwOps_split.size(); - if (!ralist) ralist = (readahead_list *) malloc(n * sizeof (readahead_list)); + int n = cl.size(); + ralist.clear(); + ralist.reserve(n); int j = 0; - for (int i = 0; i < n; i++) { - - // We can suppose that we know the length of the file - // Hence we can sort out requests that are out of boundary or trim them - if (rwOps_split[i].bytestart > filesize) continue; - if (rwOps_split[i].byteend > filesize - 1) rwOps_split[i].byteend = filesize - 1; - - memcpy(&(ralist[j].fhandle), this->fhandle, 4); + for (const auto &c: cl) { + ralist.emplace_back(); + auto &ra = ralist.back(); + memcpy(&ra.fhandle, this->fhandle, 4); - ralist[j].offset = rwOps_split[i].bytestart; - ralist[j].rlen = rwOps_split[i].byteend - rwOps_split[i].bytestart + 1; - total_len += ralist[j].rlen; + ra.offset = c.offset; + ra.rlen = c.size; j++; } @@ -523,11 +437,21 @@ int XrdHttpReq::File(XrdXrootd::Bridge::Context &info, //!< the result context int dlen //!< byte count ) { + // sendfile about to be sent by bridge for fetching data for GET: + // no https, no chunked+trailer, no multirange + //prot->SendSimpleResp(200, NULL, NULL, NULL, dlen); int rc = info.Send(0, 0, 0, 0); TRACE(REQ, " XrdHttpReq::File dlen:" << dlen << " send rc:" << rc); - if (rc) return false; - writtenbytes += dlen; + bool start, finish; + // short read will be classed as error + if (rc) { + readRangeHandler.NotifyError(); + return false; + } + + if (readRangeHandler.NotifyReadResult(dlen, nullptr, start, finish) < 0) + return false; return true; @@ -538,7 +462,8 @@ bool XrdHttpReq::Done(XrdXrootd::Bridge::Context & info) { TRACE(REQ, " XrdHttpReq::Done"); xrdresp = kXR_ok; - + + this->iovN = 0; int r = PostProcessHTTPReq(true); // Beware, we don't have to reset() if the result is 0 @@ -1240,10 +1165,14 @@ int XrdHttpReq::ProcessHTTPReq() { default: // Read() or Close(); reqstate is 3+ { + const XrdHttpIOList &readChunkList = readRangeHandler.NextReadList(); + + // Close() if we have finished, otherwise read the next chunk + // --------- CLOSE - if ( ((reqstate == 4) && (rwOps.size() > 1)) || // In this case, we performed a ReadV and it's done. - (writtenbytes >= length) ) // No ReadV but we have completed the request. + if ( readChunkList.empty() ) { + memset(&xrdreq, 0, sizeof (ClientRequest)); xrdreq.close.requestid = htons(kXR_close); memcpy(xrdreq.close.fhandle, fhandle, 4); @@ -1254,13 +1183,14 @@ int XrdHttpReq::ProcessHTTPReq() { } // We have finished + readClosing = true; return 1; } // --------- READ or READV - - if (rwOps.size() <= 1) { - // No chunks or one chunk... Request the whole file or single read + + if ( readChunkList.size() == 1 ) { + // Use a read request for single range long l; long long offs; @@ -1271,21 +1201,17 @@ int XrdHttpReq::ProcessHTTPReq() { memcpy(xrdreq.read.fhandle, fhandle, 4); xrdreq.read.dlen = 0; - if (rwOps.size() == 0) { - l = (long)std::min(filesize-writtenbytes, (long long)1024*1024); - offs = writtenbytes; - xrdreq.read.offset = htonll(writtenbytes); - xrdreq.read.rlen = htonl(l); - } else { - l = std::min(rwOps[0].byteend - rwOps[0].bytestart + 1 - writtenbytes, (long long)1024*1024); - offs = rwOps[0].bytestart + writtenbytes; - xrdreq.read.offset = htonll(offs); - xrdreq.read.rlen = htonl(l); - } + offs = readChunkList[0].offset; + l = readChunkList[0].size; + + xrdreq.read.offset = htonll(offs); + xrdreq.read.rlen = htonl(l); - // If we are using HTTPS or if the client requested trailers, disable sendfile - // (in the latter case, the chunked encoding prevents sendfile usage) - if (prot->ishttps || (m_transfer_encoding_chunked && m_trailer_headers)) { + // If we are using HTTPS or if the client requested trailers, or if the + // read concerns a multirange reponse, disable sendfile + // (in the latter two cases, the extra framing is only done in PostProcessHTTPReq) + if (prot->ishttps || (m_transfer_encoding_chunked && m_trailer_headers) || + !readRangeHandler.isSingleRange()) { if (!prot->Bridge->setSF((kXR_char *) fhandle, false)) { TRACE(REQ, " XrdBridge::SetSF(false) failed."); @@ -1320,9 +1246,9 @@ int XrdHttpReq::ProcessHTTPReq() { } else { // --------- READV - length = ReqReadV(); + length = ReqReadV(readChunkList); - if (!prot->Bridge->Run((char *) &xrdreq, (char *) ralist, length)) { + if (!prot->Bridge->Run((char *) &xrdreq, (char *) &ralist[0], length)) { prot->SendSimpleResp(404, NULL, NULL, (char *) "Could not run read request.", 0, false); return -1; } @@ -2105,6 +2031,8 @@ int XrdHttpReq::PostProcessHTTPReq(bool final_) { &fileflags, &filemodtime); + readRangeHandler.SetFilesize(filesize); + // We will default the response size specified by the headers; if that // wasn't given, use the file size. if (!length) { @@ -2152,6 +2080,8 @@ int XrdHttpReq::PostProcessHTTPReq(bool final_) { &fileflags, &filemodtime); + readRangeHandler.SetFilesize(filesize); + // As above: if the client specified a response size, we use that. // Otherwise, utilize the filesize if (!length) { @@ -2175,7 +2105,13 @@ int XrdHttpReq::PostProcessHTTPReq(bool final_) { responseHeader += std::string("Age: ") + std::to_string(object_age < 0 ? 0 : object_age); } - if (rwOps.size() == 0) { + const XrdHttpReadRangeHandler::UserRangeList &uranges = readRangeHandler.ListResolvedRanges(); + if (uranges.empty() && readRangeHandler.getError()) { + prot->SendSimpleResp(readRangeHandler.getError().httpRetCode, NULL, NULL, readRangeHandler.getError().errMsg.c_str(),0,false); + return -1; + } + + if (readRangeHandler.isFullFile()) { // Full file. if (m_transfer_encoding_chunked && m_trailer_headers) { @@ -2184,59 +2120,53 @@ int XrdHttpReq::PostProcessHTTPReq(bool final_) { prot->SendSimpleResp(200, NULL, responseHeader.empty() ? NULL : responseHeader.c_str(), NULL, filesize, keepalive); } return 0; - } else - if (rwOps.size() == 1) { - // Only one read to perform - if (rwOps[0].byteend < 0) // The requested range was along the lines of "Range: 1234-", meaning we need to fill in the end - rwOps[0].byteend = filesize - 1; - int cnt = (rwOps[0].byteend - rwOps[0].bytestart + 1); + } + + if (readRangeHandler.isSingleRange()) { + // Possibly with zero sized file but should have been included + // in the FullFile case above + if (uranges.size() != 1) + return -1; + + // Only one range to return to the user char buf[64]; - + const off_t cnt = uranges[0].end - uranges[0].start + 1; + XrdOucString s = "Content-Range: bytes "; - sprintf(buf, "%lld-%lld/%lld", rwOps[0].bytestart, rwOps[0].byteend, filesize); + sprintf(buf, "%lld-%lld/%lld", (long long int)uranges[0].start, (long long int)uranges[0].end, filesize); s += buf; if (!responseHeader.empty()) { s += "\r\n"; s += responseHeader.c_str(); } + prot->SendSimpleResp(206, NULL, (char *)s.c_str(), NULL, cnt, keepalive); return 0; - } else - if (rwOps.size() > 1) { - // First, check that the amount of range request can be handled by the vector read of the XRoot layer - if(rwOps.size() > XrdProto::maxRvecsz) { - std::string errMsg = "Too many range requests provided. Maximum range requests supported is " + std::to_string(XrdProto::maxRvecsz); - prot->SendSimpleResp(400, NULL, NULL,errMsg.c_str(), errMsg.size(), false); - return -1; - } - // Multiple reads to perform, compose and send the header - int cnt = 0; - for (size_t i = 0; i < rwOps.size(); i++) { + } - if (rwOps[i].bytestart > filesize) continue; - if (rwOps[i].byteend > filesize - 1) - rwOps[i].byteend = filesize - 1; + // Multiple reads to perform, compose and send the header + off_t cnt = 0; + for (auto &ur : uranges) { + cnt += ur.end - ur.start + 1; - cnt += (rwOps[i].byteend - rwOps[i].bytestart + 1); + cnt += buildPartialHdr(ur.start, + ur.end, + filesize, + (char *) "123456").size(); - cnt += buildPartialHdr(rwOps[i].bytestart, - rwOps[i].byteend, - filesize, - (char *) "123456").size(); - } - cnt += buildPartialHdrEnd((char *) "123456").size(); - std::string header = "Content-Type: multipart/byteranges; boundary=123456"; - if (!m_digest_header.empty()) { - header += "\n"; - header += m_digest_header; - } - - prot->SendSimpleResp(206, NULL, header.c_str(), NULL, cnt, keepalive); - return 0; + } + cnt += buildPartialHdrEnd((char *) "123456").size(); + std::string header = "Content-Type: multipart/byteranges; boundary=123456"; + if (!m_digest_header.empty()) { + header += "\n"; + header += m_digest_header; } + prot->SendSimpleResp(206, NULL, header.c_str(), NULL, cnt, keepalive); + return 0; + } else { // xrdresp indicates an error occurred @@ -2251,10 +2181,14 @@ int XrdHttpReq::PostProcessHTTPReq(bool final_) { default: //read or readv { // If we are postprocessing a close, potentially send out informational trailers - if ((ntohs(xrdreq.header.requestid) == kXR_close) || - ((reqstate == 4) && (ntohs(xrdreq.header.requestid) == kXR_readv))) + if ((ntohs(xrdreq.header.requestid) == kXR_close) || readClosing) { - + const XrdHttpReadRangeHandler::Error &rrerror = readRangeHandler.getError(); + if (rrerror) { + httpStatusCode = rrerror.httpRetCode; + httpStatusText = rrerror.errMsg; + } + if (m_transfer_encoding_chunked && m_trailer_headers) { if (prot->ChunkRespHeader(0)) return -1; @@ -2271,7 +2205,8 @@ int XrdHttpReq::PostProcessHTTPReq(bool final_) { return -1; } - return keepalive ? 1 : -1; + if (rrerror) return -1; + return keepalive ? 1 : -1; } // On error, we can only send out a message if trailers are enabled and the @@ -2304,84 +2239,24 @@ int XrdHttpReq::PostProcessHTTPReq(bool final_) { } } - // Prevent scenario where data is expected but none is actually read - // E.g. Accessing files which return the results of a script - if ((ntohs(xrdreq.header.requestid) == kXR_read) && - (reqstate > 3) && (iovN == 0)) { - TRACEI(REQ, "Stopping request because more data is expected " - "but no data has been read."); - return -1; - } - TRACEI(REQ, "Got data vectors to send:" << iovN); - if (ntohs(xrdreq.header.requestid) == kXR_readv) { - // Readv case, we must take out each individual header and format it according to the http rules - readahead_list *l; - char *p; - int len; - - // Cycle on all the data that is coming from the server - for (int i = 0; i < iovN; i++) { - - for (p = (char *) iovP[i].iov_base; p < (char *) iovP[i].iov_base + iovP[i].iov_len;) { - l = (readahead_list *) p; - len = ntohl(l->rlen); - - // Now we have a chunk coming from the server. This may be a partial chunk - if (rwOpPartialDone == 0) { - std::string s = buildPartialHdr(rwOps[rwOpDone].bytestart, - rwOps[rwOpDone].byteend, - filesize, - (char *) "123456"); - - TRACEI(REQ, "Sending multipart: " << rwOps[rwOpDone].bytestart << "-" << rwOps[rwOpDone].byteend); - if (prot->SendData((char *) s.c_str(), s.size())) return -1; - } - - // Send all the data we have - if (prot->SendData(p + sizeof (readahead_list), len)) return -1; - - // If we sent all the data relative to the current original chunk request - // then pass to the next chunk, otherwise wait for more data - rwOpPartialDone += len; - if (rwOpPartialDone >= rwOps[rwOpDone].byteend - rwOps[rwOpDone].bytestart + 1) { - rwOpDone++; - rwOpPartialDone = 0; - } - - p += sizeof (readahead_list); - p += len; - - } - } - - if (rwOpDone == rwOps.size()) { - std::string s = buildPartialHdrEnd((char *) "123456"); - if (prot->SendData((char *) s.c_str(), s.size())) return -1; - } + XrdHttpIOList received; + getReadResponse(received); + int rc; + if (readRangeHandler.isSingleRange()) { + rc = sendReadResponseSingleRange(received); } else { - // Send chunked encoding header - if (m_transfer_encoding_chunked && m_trailer_headers) { - int sum = 0; - for (int i = 0; i < iovN; i++) sum += iovP[i].iov_len; - prot->ChunkRespHeader(sum); - } - for (int i = 0; i < iovN; i++) { - if (prot->SendData((char *) iovP[i].iov_base, iovP[i].iov_len)) return -1; - writtenbytes += iovP[i].iov_len; - } - if (m_transfer_encoding_chunked && m_trailer_headers) { - prot->ChunkRespFooter(); - } + rc = sendReadResponsesMultiRanges(received); } - - // Let's make sure that we avoid sending the same data twice, - // in the case where PostProcessHTTPReq is invoked again - this->iovN = 0; - + if (rc) { + // make sure readRangeHandler will trigger close + // of file after next NextReadList(). + readRangeHandler.NotifyError(); + } + return 0; } // end read or readv @@ -2800,10 +2675,8 @@ void XrdHttpReq::reset() { TRACE(REQ, " XrdHttpReq request ended."); //if (xmlbody) xmlFreeDoc(xmlbody); - rwOps.clear(); - rwOps_split.clear(); - rwOpDone = 0; - rwOpPartialDone = 0; + readRangeHandler.reset(); + readClosing = false; writtenbytes = 0; etext.clear(); redirdest = ""; @@ -2819,8 +2692,8 @@ void XrdHttpReq::reset() { depth = 0; xrdresp = kXR_noResponsesYet; xrderrcode = kXR_noErrorYet; - if (ralist) free(ralist); - ralist = 0; + ralist.clear(); + ralist.shrink_to_fit(); request = rtUnset; resource = ""; @@ -2884,3 +2757,152 @@ void XrdHttpReq::getfhandle() { (int) fhandle[0] << ":" << (int) fhandle[1] << ":" << (int) fhandle[2] << ":" << (int) fhandle[3]); } + +void XrdHttpReq::getReadResponse(XrdHttpIOList &received) { + received.clear(); + + if (ntohs(xrdreq.header.requestid) == kXR_readv) { + readahead_list *l; + char *p; + kXR_int32 len; + + // Cycle on all the data that is coming from the server + for (int i = 0; i < iovN; i++) { + + for (p = (char *) iovP[i].iov_base; p < (char *) iovP[i].iov_base + iovP[i].iov_len;) { + l = (readahead_list *) p; + len = ntohl(l->rlen); + + received.emplace_back(p+sizeof(readahead_list), -1, len); + + p += sizeof (readahead_list); + p += len; + + } + } + return; + } + + // kXR_read result + for (int i = 0; i < iovN; i++) { + received.emplace_back((char*)iovP[i].iov_base, -1, iovP[i].iov_len); + } + +} + +int XrdHttpReq::sendReadResponsesMultiRanges(const XrdHttpIOList &received) { + + if (received.size() == 0) { + bool start, finish; + if (readRangeHandler.NotifyReadResult(0, nullptr, start, finish) < 0) { + return -1; + } + return 0; + } + + // user is expecting multiple ranges, we must be prepared to send an + // individual header for each and format it according to the http rules + + struct rinfo { + bool start; + bool finish; + const XrdOucIOVec2 *ci; + const XrdHttpReadRangeHandler::UserRange *ur; + std::string st_header; + std::string fin_header; + }; + + // report each received byte chunk to the range handler and record the details + // of original user range it related to and if starts a range or finishes all. + std::vector rvec; + + rvec.reserve(received.size()); + + for(const auto &rcv: received) { + rinfo rentry; + bool start, finish; + const XrdHttpReadRangeHandler::UserRange *ur; + + if (readRangeHandler.NotifyReadResult(rcv.size, &ur, start, finish) < 0) { + return -1; + } + rentry.ur = ur; + rentry.start = start; + rentry.finish = finish; + rentry.ci = &rcv; + + if (start) { + std::string s = buildPartialHdr(ur->start, + ur->end, + filesize, + (char *) "123456"); + + rentry.st_header = s; + } + + if (finish) { + std::string s = buildPartialHdrEnd((char *) "123456"); + rentry.fin_header = s; + } + + rvec.push_back(rentry); + } + + // send the user the headers / data + for(const auto &rentry: rvec) { + + if (rentry.start) { + TRACEI(REQ, "Sending multipart: " << rentry.ur->start << "-" << rentry.ur->end); + if (prot->SendData((char *) rentry.st_header.c_str(), rentry.st_header.size())) { + return -1; + } + } + + // Send all the data we have + if (prot->SendData((char *) rentry.ci->data, rentry.ci->size)) { + return -1; + } + + if (rentry.finish) { + if (prot->SendData((char *) rentry.fin_header.c_str(), rentry.fin_header.size())) { + return -1; + } + } + } + + return 0; +} + +int XrdHttpReq::sendReadResponseSingleRange(const XrdHttpIOList &received) { + // single range http transfer + + if (received.size() == 0) { + bool start, finish; + if (readRangeHandler.NotifyReadResult(0, nullptr, start, finish) < 0) { + return -1; + } + return 0; + } + + off_t sum = 0; + // notify the range handler and return if error + for(const auto &rcv: received) { + bool start, finish; + if (readRangeHandler.NotifyReadResult(rcv.size, nullptr, start, finish) < 0) { + return -1; + } + sum += rcv.size; + } + + // Send chunked encoding header + if (m_transfer_encoding_chunked && m_trailer_headers) { + prot->ChunkRespHeader(sum); + } + for(const auto &rcv: received) { + if (prot->SendData((char *) rcv.data, rcv.size)) return -1; + } + if (m_transfer_encoding_chunked && m_trailer_headers) { + prot->ChunkRespFooter(); + } + return 0; +} diff --git a/src/XrdHttp/XrdHttpReq.hh b/src/XrdHttp/XrdHttpReq.hh index 81e50683eea..48dbbcd003a 100644 --- a/src/XrdHttp/XrdHttpReq.hh +++ b/src/XrdHttp/XrdHttpReq.hh @@ -44,6 +44,7 @@ #include "XProtocol/XProtocol.hh" #include "XrdXrootd/XrdXrootdBridge.hh" #include "XrdHttpChecksumHandler.hh" +#include "XrdHttpReadRangeHandler.hh" #include #include @@ -54,14 +55,6 @@ -#define READV_MAXCHUNKS 512 -#define READV_MAXCHUNKSIZE (1024*128) - -struct ReadWriteOp { - // < 0 means "not specified" - long long bytestart; - long long byteend; -}; struct DirListInfo { std::string path; @@ -94,9 +87,7 @@ private: // after a response body has started bool m_status_trailer{false}; - int parseContentRange(char *); int parseHost(char *); - int parseRWOp(char *); //xmlDocPtr xmlbody; /* the resulting document tree */ XrdHttpProtocol *prot; @@ -126,6 +117,19 @@ private: // Sanitize the resource from http[s]://[host]/ questionable prefix void sanitizeResourcePfx(); + // parses the iovN data pointers elements as either a kXR_read or kXR_readv + // response and fills out a XrdHttpIOList with the corresponding length and + // buffer pointers. File offsets from kXR_readv responses are not recorded. + void getReadResponse(XrdHttpIOList &received); + + // notifies the range handler of receipt of bytes and sends the client + // the data. + int sendReadResponseSingleRange(const XrdHttpIOList &received); + + // notifies the range handler of receipt of bytes and sends the client + // the data and necessary headers, assuming multipart/byteranges content type. + int sendReadResponsesMultiRanges(const XrdHttpIOList &received); + /** * Extract a comma separated list of checksums+metadata into a vector * @param checksumList the list like "0:sha1, 1:adler32, 2:md5" @@ -142,13 +146,13 @@ private: static void determineXRootDChecksumFromUserDigest(const std::string & userDigest, std::vector & xrootdChecksums); public: - XrdHttpReq(XrdHttpProtocol *protinstance) : keepalive(true) { + XrdHttpReq(XrdHttpProtocol *protinstance, const XrdHttpReadRangeHandler::Configuration &rcfg) : + readRangeHandler(rcfg), keepalive(true) { prot = protinstance; length = 0; //xmlbody = 0; depth = 0; - ralist = 0; opaque = 0; writtenbytes = 0; fopened = false; @@ -169,8 +173,8 @@ public: int parseBody(char *body, long long len); /// Prepare the buffers for sending a readv request - int ReqReadV(); - readahead_list *ralist; + int ReqReadV(const XrdHttpIOList &cl); + std::vector ralist; /// Build a partial header for a multipart response std::string buildPartialHdr(long long bytestart, long long byteend, long long filesize, char *token); @@ -224,13 +228,9 @@ public: /// Tells if we have finished reading the header bool headerok; - - // This can be largely optimized... - /// The original list of multiple reads to perform - std::vector rwOps; - /// The new list got from chunking the original req respecting the xrootd - /// max sizes etc. - std::vector rwOps_split; + /// Tracking the next ranges of data to read during GET + XrdHttpReadRangeHandler readRangeHandler; + bool readClosing; bool keepalive; long long length; // Total size from client for PUT; total length of response TO client for GET. diff --git a/src/XrdHttp/XrdHttpUtils.hh b/src/XrdHttp/XrdHttpUtils.hh index 67d334bf11d..3b5ff6c2fe9 100644 --- a/src/XrdHttp/XrdHttpUtils.hh +++ b/src/XrdHttp/XrdHttpUtils.hh @@ -37,6 +37,9 @@ #include "XProtocol/XPtypes.hh" #include "XrdSec/XrdSecEntity.hh" +#include "XrdOuc/XrdOucIOVec.hh" +#include +#include #ifndef XRDHTTPUTILS_HH #define XRDHTTPUTILS_HH @@ -89,6 +92,8 @@ char *unquote(char *str); // Escape a string and return a new one char *escapeXML(const char *str); +typedef std::vector XrdHttpIOList; + #endif /* XRDHTTPUTILS_HH */ diff --git a/tests/XrdHttpTests/XrdHttpTests.cc b/tests/XrdHttpTests/XrdHttpTests.cc index 739b45b7f08..7b7ce778de3 100644 --- a/tests/XrdHttpTests/XrdHttpTests.cc +++ b/tests/XrdHttpTests/XrdHttpTests.cc @@ -3,10 +3,11 @@ #include "XrdHttp/XrdHttpReq.hh" #include "XrdHttp/XrdHttpProtocol.hh" #include "XrdHttp/XrdHttpChecksumHandler.hh" +#include "XrdHttp/XrdHttpReadRangeHandler.hh" #include #include #include - +#include using namespace testing; @@ -164,4 +165,412 @@ TEST(XrdHttpTests, checksumHandlerSelectionTest) { handler.configure(configChecksumList); ASSERT_EQ(nullptr, handler.getChecksumToRun(reqDigest)); } -} \ No newline at end of file +} + +TEST(XrdHttpTests, xrdHttpReadRangeHandlerTwoRangesOfSizeEqualToMaxChunkSize) { + long long filesize = 8; + int rangeBegin = 0; + int rangeEnd = 3; + int rangeBegin2 = 4; + int rangeEnd2 = 7; + int readvMaxChunkSize = 4; + int readvMaxChunks = 20; + int rReqMaxSize = 200; + std::stringstream ss; + ss << "bytes=" << rangeBegin << "-" << rangeEnd << ", " << rangeBegin2 << "-" << rangeEnd2; + std::string rs = ss.str(); + XrdHttpReadRangeHandler::Configuration cfg(readvMaxChunkSize, readvMaxChunks, rReqMaxSize); + XrdHttpReadRangeHandler h(cfg); + h.ParseContentRange(rs.c_str()); + h.SetFilesize(filesize); + const XrdHttpReadRangeHandler::UserRangeList &ul = h.ListResolvedRanges(); + const XrdHttpIOList &cl = h.NextReadList(); + ASSERT_EQ(2, cl.size()); + ASSERT_EQ(0, cl[0].offset); + ASSERT_EQ(4, cl[0].size); + ASSERT_EQ(4, cl[1].offset); + ASSERT_EQ(4, cl[1].size); + ASSERT_EQ(2, ul.size()); +} + +TEST(XrdHttpTests, xrdHttpReadRangeHandlerOneRangeSizeLessThanMaxChunkSize) { + long long filesize = 8; + int rangeBegin = 0; + int rangeEnd = 3; + int readvMaxChunkSize = 5; + int readvMaxChunks = 20; + int rReqMaxSize = 200; + std::stringstream ss; + ss << "bytes=" << rangeBegin << "-" << rangeEnd; + std::string rs = ss.str(); + XrdHttpReadRangeHandler::Configuration cfg(readvMaxChunkSize, readvMaxChunks, rReqMaxSize); + XrdHttpReadRangeHandler h(cfg); + h.ParseContentRange(rs.c_str()); + h.SetFilesize(filesize); + const XrdHttpReadRangeHandler::UserRangeList &ul = h.ListResolvedRanges(); + const XrdHttpIOList &cl = h.NextReadList(); + ASSERT_EQ(1, cl.size()); + ASSERT_EQ(0, cl[0].offset); + ASSERT_EQ(4, cl[0].size); + ASSERT_EQ(1, ul.size()); +} + +TEST(XrdHttpTests, xrdHttpReadRangeHandlerOneRangeSizeGreaterThanMaxChunkSize) { + long long filesize = 8; + int rangeBegin = 0; + int rangeEnd = 7; + int readvMaxChunkSize = 3; + int readvMaxChunks = 20; + int rReqMaxSize = 200; + std::stringstream ss; + ss << "bytes=" << rangeBegin << "-" << rangeEnd; + std::string rs = ss.str(); + XrdHttpReadRangeHandler::Configuration cfg(readvMaxChunkSize, readvMaxChunks, rReqMaxSize); + XrdHttpReadRangeHandler h(cfg); + h.ParseContentRange(rs.c_str()); + h.SetFilesize(filesize); + { + const XrdHttpReadRangeHandler::UserRangeList &ul = h.ListResolvedRanges(); + const XrdHttpIOList &cl = h.NextReadList(); + ASSERT_EQ(1, cl.size()); + ASSERT_EQ(0, cl[0].offset); + ASSERT_EQ(8, cl[0].size); + ASSERT_EQ(1, ul.size()); + } + ss.str(""); + ss << "bytes=0-0," << rangeBegin << "-" << rangeEnd; + rs = ss.str(); + h.reset(); + h.ParseContentRange(rs.c_str()); + h.SetFilesize(filesize); + { + const XrdHttpReadRangeHandler::UserRangeList &ul = h.ListResolvedRanges(); + const XrdHttpIOList &cl = h.NextReadList(); + ASSERT_EQ(4, cl.size()); + ASSERT_EQ(0, cl[0].offset); + ASSERT_EQ(1, cl[0].size); + ASSERT_EQ(0, cl[1].offset); + ASSERT_EQ(3, cl[1].size); + ASSERT_EQ(3, cl[2].offset); + ASSERT_EQ(3, cl[2].size); + ASSERT_EQ(6, cl[3].offset); + ASSERT_EQ(2, cl[3].size); + ASSERT_EQ(2, ul.size()); + } +} + +TEST(XrdHttpTests, xrdHttpReadRangeHandlerRange0ToEnd) { + long long filesize = 200; + int rangeBegin = 0; + int readvMaxChunkSize = 4; + int readvMaxChunks = 20; + int rReqMaxSize = 100; + bool start, finish; + std::stringstream ss; + ss << "bytes=" << rangeBegin << "-" << "\r"; + std::string rs = ss.str(); + XrdHttpReadRangeHandler::Configuration cfg(readvMaxChunkSize, readvMaxChunks, rReqMaxSize); + XrdHttpReadRangeHandler h(cfg); + h.ParseContentRange(rs.c_str()); + h.SetFilesize(filesize); + const XrdHttpReadRangeHandler::UserRangeList &ul = h.ListResolvedRanges(); + { + const XrdHttpIOList &cl1 = h.NextReadList(); + ASSERT_EQ(1, ul.size()); + ASSERT_EQ(1, cl1.size()); + ASSERT_EQ(0, cl1[0].offset); + ASSERT_EQ(100, cl1[0].size); + ASSERT_EQ(0, h.NotifyReadResult(100, nullptr, start, finish)); + } + { + const XrdHttpIOList &cl2 = h.NextReadList(); + ASSERT_EQ(1, cl2.size()); + ASSERT_EQ(100, cl2[0].offset); + ASSERT_EQ(100, cl2[0].size); + } +} + +TEST(XrdHttpTests, xrdHttpReadRangeHandlerRange5FromEnd) { + long long filesize = 200; + int rangeEnd = 5; + int readvMaxChunkSize = 4; + int readvMaxChunks = 20; + int rReqMaxSize = 100; + bool start, finish; + std::stringstream ss; + ss << "bytes=-" << rangeEnd << "\r"; + std::string rs = ss.str(); + XrdHttpReadRangeHandler::Configuration cfg(readvMaxChunkSize, readvMaxChunks, rReqMaxSize); + XrdHttpReadRangeHandler h(cfg); + h.ParseContentRange(rs.c_str()); + h.SetFilesize(filesize); + const XrdHttpReadRangeHandler::UserRangeList &ul = h.ListResolvedRanges(); + { + const XrdHttpIOList &cl1 = h.NextReadList(); + ASSERT_EQ(1, ul.size()); + ASSERT_EQ(1, cl1.size()); + ASSERT_EQ(195, cl1[0].offset); + ASSERT_EQ(5, cl1[0].size); + ASSERT_EQ(0, h.NotifyReadResult(5, nullptr, start, finish)); + } + { + const XrdHttpIOList &cl2 = h.NextReadList(); + ASSERT_EQ(0, cl2.size()); + const XrdHttpReadRangeHandler::Error &error = h.getError(); + ASSERT_EQ(false, static_cast(error)); + } +} + +TEST(XrdHttpTests, xrdHttpReadRangeHandlerRange0To0) { + long long filesize = 8; + int rangeBegin = 0; + int rangeEnd = 0; + int readvMaxChunkSize = 4; + int readvMaxChunks = 20; + int rReqMaxSize = 100; + std::stringstream ss; + ss << "bytes=" << rangeBegin << "-" << rangeEnd; + std::string rs = ss.str(); + XrdHttpReadRangeHandler::Configuration cfg(readvMaxChunkSize, readvMaxChunks, rReqMaxSize); + XrdHttpReadRangeHandler h(cfg); + h.ParseContentRange(rs.c_str()); + h.SetFilesize(filesize); + const XrdHttpReadRangeHandler::UserRangeList &ul = h.ListResolvedRanges(); + const XrdHttpIOList &cl = h.NextReadList(); + ASSERT_EQ(1, cl.size()); + ASSERT_EQ(1, ul.size()); + ASSERT_EQ(0, ul[0].start); + ASSERT_EQ(0, ul[0].end); + ASSERT_EQ(0, cl[0].offset); + ASSERT_EQ(1, cl[0].size); +} + +TEST(XrdHttpTests, xrdHttpReadRangeHandlerEndByteGreaterThanFileSize) { + long long filesize = 2; + int rangeBegin = 0; + int rangeEnd = 4; + int readvMaxChunkSize = 10; + int readvMaxChunks = 20; + int rReqMaxSize = 100; + std::stringstream ss; + ss << "bytes=" << rangeBegin << "-" << rangeEnd; + std::string rs = ss.str(); + XrdHttpReadRangeHandler::Configuration cfg(readvMaxChunkSize, readvMaxChunks, rReqMaxSize); + XrdHttpReadRangeHandler h(cfg); + h.ParseContentRange(rs.c_str()); + h.SetFilesize(filesize); + const XrdHttpReadRangeHandler::UserRangeList &ul = h.ListResolvedRanges(); + const XrdHttpIOList &cl = h.NextReadList(); + ASSERT_EQ(1, cl.size()); + ASSERT_EQ(0, cl[0].offset); + ASSERT_EQ(2, cl[0].size); + ASSERT_EQ(1, ul.size()); + ASSERT_EQ(0, ul[0].start); + ASSERT_EQ(1, ul[0].end); +} + +TEST(XrdHttpTests, xrdHttpReadRangeHandlerRangeBeginGreaterThanFileSize) { + long long filesize = 2; + int rangeBegin = 4; + int rangeEnd = 6; + int readvMaxChunkSize = 10; + int readvMaxChunks = 20; + int rReqMaxSize = 100; + std::stringstream ss; + ss << "bytes=" << rangeBegin << "-" << rangeEnd; + std::string rs = ss.str(); + XrdHttpReadRangeHandler::Configuration cfg(readvMaxChunkSize, readvMaxChunks, rReqMaxSize); + XrdHttpReadRangeHandler h(cfg); + h.ParseContentRange(rs.c_str()); + h.SetFilesize(filesize); + const XrdHttpReadRangeHandler::UserRangeList &ul = h.ListResolvedRanges(); + const XrdHttpIOList &cl = h.NextReadList(); + ASSERT_EQ(0, ul.size()); + ASSERT_EQ(0, cl.size()); + const XrdHttpReadRangeHandler::Error &error = h.getError(); + ASSERT_EQ(true, static_cast(error)); + ASSERT_EQ(416, error.httpRetCode); +} + +TEST(XrdHttpTests, xrdHttpReadRangeHandlerTwoRangesOneOutsideFileExtent) { + long long filesize = 20; + int rangeBegin1 = 22; + int rangeEnd1 = 30; + int rangeBegin2 = 4; + int rangeEnd2 = 6; + int readvMaxChunkSize = 10; + int readvMaxChunks = 20; + int rReqMaxSize = 100; + std::stringstream ss; + ss << "bytes=" << rangeBegin1 << "-" << rangeEnd1 << "," << rangeBegin2 << "-" << rangeEnd2; + std::string rs = ss.str(); + XrdHttpReadRangeHandler::Configuration cfg(readvMaxChunkSize, readvMaxChunks, rReqMaxSize); + XrdHttpReadRangeHandler h(cfg); + h.ParseContentRange(rs.c_str()); + h.SetFilesize(filesize); + const XrdHttpReadRangeHandler::UserRangeList &ul = h.ListResolvedRanges(); + const XrdHttpIOList &cl = h.NextReadList(); + ASSERT_EQ(1, ul.size()); + ASSERT_EQ(4, ul[0].start); + ASSERT_EQ(6, ul[0].end); + ASSERT_EQ(1, cl.size()); + ASSERT_EQ(4, cl[0].offset); + ASSERT_EQ(3, cl[0].size); +} + +TEST(XrdHttpTests, xrdHttpReadRangeHandlerMultiChunksSingleRange) { + long long filesize = 20; + int rangeBegin = 0; + int rangeEnd = 15; + int readvMaxChunkSize = 3; + int readvMaxChunks = 20; + int rReqMaxSize = 5; + bool start, finish; + std::stringstream ss; + ss << "bytes=" << rangeBegin << "-" << rangeEnd; + std::string rs = ss.str(); + XrdHttpReadRangeHandler::Configuration cfg(readvMaxChunkSize, readvMaxChunks, rReqMaxSize); + XrdHttpReadRangeHandler h(cfg); + h.ParseContentRange(rs.c_str()); + h.SetFilesize(filesize); + const XrdHttpReadRangeHandler::UserRangeList &ul = h.ListResolvedRanges(); + ASSERT_EQ(1, ul.size()); + ASSERT_EQ(0, ul[0].start); + ASSERT_EQ(15, ul[0].end); + { + const XrdHttpIOList &cl = h.NextReadList(); + ASSERT_EQ(1, cl.size()); + ASSERT_EQ(0, cl[0].offset); + ASSERT_EQ(5, cl[0].size); + ASSERT_EQ(0, h.NotifyReadResult(5, nullptr, start, finish)); + ASSERT_EQ(true, start); + ASSERT_EQ(false, finish); + } + { + const XrdHttpIOList &cl = h.NextReadList(); + ASSERT_EQ(1, cl.size()); + ASSERT_EQ(5, cl[0].offset); + ASSERT_EQ(5, cl[0].size); + ASSERT_EQ(0, h.NotifyReadResult(5, nullptr, start, finish)); + ASSERT_EQ(false, start); + ASSERT_EQ(false, finish); + } + { + const XrdHttpIOList &cl = h.NextReadList(); + ASSERT_EQ(1, cl.size()); + ASSERT_EQ(10, cl[0].offset); + ASSERT_EQ(5, cl[0].size); + ASSERT_EQ(0, h.NotifyReadResult(5, nullptr, start, finish)); + ASSERT_EQ(false, start); + ASSERT_EQ(false, finish); + } + { + const XrdHttpIOList &cl = h.NextReadList(); + ASSERT_EQ(1, cl.size()); + ASSERT_EQ(15, cl[0].offset); + ASSERT_EQ(1, cl[0].size); + ASSERT_EQ(0, h.NotifyReadResult(1, nullptr, start, finish)); + ASSERT_EQ(false, start); + ASSERT_EQ(true, finish); + } + { + const XrdHttpIOList &cl = h.NextReadList(); + ASSERT_EQ(0, cl.size()); + const XrdHttpReadRangeHandler::Error &error = h.getError(); + ASSERT_EQ(false, static_cast(error)); + } +} + +TEST(XrdHttpTests, xrdHttpReadRangeHandlerMultiChunksTwoRanges) { + long long filesize = 22; + int rangeBegin1 = 0; + int rangeEnd1 = 1; + int rangeBegin2 = 5; + int rangeEnd2 = 21; + int readvMaxChunkSize = 3; + int readvMaxChunks = 2; + int rReqMaxSize = 5; + bool start, finish; + const XrdHttpReadRangeHandler::UserRange *ur; + std::stringstream ss; + ss << "bytes=" << rangeBegin1 << "-" << rangeEnd1 << "," << rangeBegin2 << "-" << rangeEnd2; + std::string rs = ss.str(); + XrdHttpReadRangeHandler::Configuration cfg(readvMaxChunkSize, readvMaxChunks, rReqMaxSize); + XrdHttpReadRangeHandler h(cfg); + h.ParseContentRange(rs.c_str()); + h.SetFilesize(filesize); + const XrdHttpReadRangeHandler::UserRangeList &ul = h.ListResolvedRanges(); + ASSERT_EQ(2, ul.size()); + ASSERT_EQ(0, ul[0].start); + ASSERT_EQ(1, ul[0].end); + ASSERT_EQ(5, ul[1].start); + ASSERT_EQ(21, ul[1].end); + { + // we get 0-1, 5-7 + const XrdHttpIOList &cl = h.NextReadList(); + ASSERT_EQ(2, cl.size()); + ASSERT_EQ(0, cl[0].offset); + ASSERT_EQ(2, cl[0].size); + ASSERT_EQ(5, cl[1].offset); + ASSERT_EQ(3, cl[1].size); + ASSERT_EQ(0, h.NotifyReadResult(2, &ur, start, finish)); + ASSERT_EQ(true, start); + ASSERT_EQ(false, finish); + ASSERT_EQ(0, ur->start); + ASSERT_EQ(1, ur->end); + ASSERT_EQ(0, h.NotifyReadResult(3, &ur, start, finish)); + ASSERT_EQ(true, start); + ASSERT_EQ(false, finish); + ASSERT_EQ(5, ur->start); + ASSERT_EQ(21, ur->end); + } + { + // we get 8-12 + const XrdHttpIOList &cl = h.NextReadList(); + ASSERT_EQ(1, cl.size()); + ASSERT_EQ(8, cl[0].offset); + ASSERT_EQ(5, cl[0].size); + ASSERT_EQ(0, h.NotifyReadResult(5, &ur, start, finish)); + ASSERT_EQ(false, start); + ASSERT_EQ(false, finish); + ASSERT_EQ(5, ur->start); + ASSERT_EQ(21, ur->end); + } + { + // we get 13-17 + const XrdHttpIOList &cl = h.NextReadList(); + ASSERT_EQ(1, cl.size()); + ASSERT_EQ(13, cl[0].offset); + ASSERT_EQ(5, cl[0].size); + ASSERT_EQ(0, h.NotifyReadResult(5, &ur, start, finish)); + ASSERT_EQ(false, start); + ASSERT_EQ(false, finish); + ASSERT_EQ(5, ur->start); + ASSERT_EQ(21, ur->end); + } + { + // we get 18-20, 21-21 + const XrdHttpIOList &cl = h.NextReadList(); + ASSERT_EQ(2, cl.size()); + ASSERT_EQ(18, cl[0].offset); + ASSERT_EQ(3, cl[0].size); + ASSERT_EQ(21, cl[1].offset); + ASSERT_EQ(1, cl[1].size); + ASSERT_EQ(0, h.NotifyReadResult(3, &ur, start, finish)); + ASSERT_EQ(false, start); + ASSERT_EQ(false, finish); + ASSERT_EQ(5, ur->start); + ASSERT_EQ(21, ur->end); + ASSERT_EQ(0, h.NotifyReadResult(1, &ur, start, finish)); + ASSERT_EQ(false, start); + ASSERT_EQ(true, finish); + ASSERT_EQ(5, ur->start); + ASSERT_EQ(21, ur->end); + } + { + const XrdHttpIOList &cl = h.NextReadList(); + ASSERT_EQ(0, cl.size()); + const XrdHttpReadRangeHandler::Error &error = h.getError(); + ASSERT_EQ(false, static_cast(error)); + } +} From 6c202f79e70e5953751e76d2398fbf0d272551ba Mon Sep 17 00:00:00 2001 From: David Smith Date: Thu, 14 Sep 2023 11:15:52 +0200 Subject: [PATCH 3/3] [XrdHttp] Correct chunked response for GET with a byte range #2076 Also suppress Content-Length header for transfer-encoding chunked response. --- src/XrdHttp/XrdHttpReq.cc | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/src/XrdHttp/XrdHttpReq.cc b/src/XrdHttp/XrdHttpReq.cc index b39c5883a53..5b9c185e4de 100644 --- a/src/XrdHttp/XrdHttpReq.cc +++ b/src/XrdHttp/XrdHttpReq.cc @@ -2115,7 +2115,7 @@ int XrdHttpReq::PostProcessHTTPReq(bool final_) { // Full file. if (m_transfer_encoding_chunked && m_trailer_headers) { - prot->StartChunkedResp(200, NULL, responseHeader.empty() ? NULL : responseHeader.c_str(), filesize, keepalive); + prot->StartChunkedResp(200, NULL, responseHeader.empty() ? NULL : responseHeader.c_str(), -1, keepalive); } else { prot->SendSimpleResp(200, NULL, responseHeader.empty() ? NULL : responseHeader.c_str(), NULL, filesize, keepalive); } @@ -2140,8 +2140,11 @@ int XrdHttpReq::PostProcessHTTPReq(bool final_) { s += responseHeader.c_str(); } - - prot->SendSimpleResp(206, NULL, (char *)s.c_str(), NULL, cnt, keepalive); + if (m_transfer_encoding_chunked && m_trailer_headers) { + prot->StartChunkedResp(206, NULL, (char *)s.c_str(), -1, keepalive); + } else { + prot->SendSimpleResp(206, NULL, (char *)s.c_str(), NULL, cnt, keepalive); + } return 0; } @@ -2163,8 +2166,11 @@ int XrdHttpReq::PostProcessHTTPReq(bool final_) { header += m_digest_header; } - - prot->SendSimpleResp(206, NULL, header.c_str(), NULL, cnt, keepalive); + if (m_transfer_encoding_chunked && m_trailer_headers) { + prot->StartChunkedResp(206, NULL, header.c_str(), -1, keepalive); + } else { + prot->SendSimpleResp(206, NULL, header.c_str(), NULL, cnt, keepalive); + } return 0; @@ -2814,7 +2820,10 @@ int XrdHttpReq::sendReadResponsesMultiRanges(const XrdHttpIOList &received) { // report each received byte chunk to the range handler and record the details // of original user range it related to and if starts a range or finishes all. + // also sum the total of the headers and data which need to be sent to the user, + // in case we need it for chunked transfer encoding std::vector rvec; + off_t sum_len = 0; rvec.reserve(received.size()); @@ -2838,16 +2847,26 @@ int XrdHttpReq::sendReadResponsesMultiRanges(const XrdHttpIOList &received) { (char *) "123456"); rentry.st_header = s; + sum_len += s.size(); } + sum_len += rcv.size; + if (finish) { std::string s = buildPartialHdrEnd((char *) "123456"); rentry.fin_header = s; + sum_len += s.size(); } rvec.push_back(rentry); } + + // Send chunked encoding header + if (m_transfer_encoding_chunked && m_trailer_headers) { + prot->ChunkRespHeader(sum_len); + } + // send the user the headers / data for(const auto &rentry: rvec) { @@ -2870,6 +2889,11 @@ int XrdHttpReq::sendReadResponsesMultiRanges(const XrdHttpIOList &received) { } } + // Send chunked encoding footer + if (m_transfer_encoding_chunked && m_trailer_headers) { + prot->ChunkRespFooter(); + } + return 0; }