diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c3917baf..0d6b953e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -915,6 +915,7 @@ jobs: cp -r "$workspace_root"/buffers-root libs/buffers python3 tools/boostdep/depinst/depinst.py buffers + python3 tools/boostdep/depinst/depinst.py $module - name: Boost B2 Workflow uses: alandefreitas/cpp-actions/b2-workflow@v1.8.2 diff --git a/include/boost/http_proto/detail/header.hpp b/include/boost/http_proto/detail/header.hpp index 2ec4024a..8d76ad94 100644 --- a/include/boost/http_proto/detail/header.hpp +++ b/include/boost/http_proto/detail/header.hpp @@ -1,5 +1,6 @@ // // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com) +// Copyright (c) 2024 Mohammad Nejati // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -211,11 +212,13 @@ struct header void on_insert_content_length(core::string_view); void on_insert_expect(core::string_view); void on_insert_transfer_encoding(); + void on_insert_content_encoding(core::string_view); void on_insert_upgrade(core::string_view); void on_erase_connection(); void on_erase_content_length(); void on_erase_expect(); void on_erase_transfer_encoding(); + void on_erase_content_encoding(); void on_erase_upgrade(); void on_erase_all(field); void update_payload() noexcept; diff --git a/include/boost/http_proto/error.hpp b/include/boost/http_proto/error.hpp index a0834589..383cc82a 100644 --- a/include/boost/http_proto/error.hpp +++ b/include/boost/http_proto/error.hpp @@ -1,5 +1,6 @@ // // Copyright (c) 2021 Vinnie Falco (vinnie.falco@gmail.com) +// Copyright (c) 2024 Mohammad Nejati // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -55,6 +56,9 @@ enum class error /// Invalid Connection field value bad_connection, + /// Syntax error in content-encoding + bad_content_encoding, + /// Invalid Content-Length field value or values bad_content_length, diff --git a/include/boost/http_proto/metadata.hpp b/include/boost/http_proto/metadata.hpp index 9f23c207..78678117 100644 --- a/include/boost/http_proto/metadata.hpp +++ b/include/boost/http_proto/metadata.hpp @@ -1,5 +1,6 @@ // // Copyright (c) 2021 Vinnie Falco (vinnie.falco@gmail.com) +// Copyright (c) 2024 Mohammad Nejati // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -64,6 +65,11 @@ encoding */ identity, + /** + * Indicates the body encoding is unsupported. + */ + unsupported, + /** * Indicates the body has deflate applied. */ @@ -128,6 +134,42 @@ struct metadata //-------------------------------------------- + /** Metadata for the Content-Encoding field + */ + struct content_encoding_t + { + /** Error status of Content-Encoding + */ + system::error_code ec; + + /** The total number of fields + */ + std::size_t count = 0; + + /** The body encoding. + */ + http_proto::encoding encoding = + http_proto::encoding::identity; + + #ifdef BOOST_HTTP_PROTO_AGGREGATE_WORKAROUND + constexpr + content_encoding_t() = default; + + constexpr + content_encoding_t( + system::error_code ec_, + std::size_t count_) noexcept + : ec(ec_) + , count(count_) + , encoding( + http_proto::encoding::identity) + { + } + #endif + }; + + //-------------------------------------------- + /** Metadata for the Content-Length field */ struct content_length_t @@ -320,6 +362,10 @@ struct metadata */ connection_t connection; + /** Metadata for the Content-Encoding field. + */ + content_encoding_t content_encoding; + /** Metadata for the Content-Length field. */ content_length_t content_length; diff --git a/include/boost/http_proto/parser.hpp b/include/boost/http_proto/parser.hpp index 852aac00..7e0473cd 100644 --- a/include/boost/http_proto/parser.hpp +++ b/include/boost/http_proto/parser.hpp @@ -1,5 +1,6 @@ // // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com) +// Copyright (c) 2024 Mohammad Nejati // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -77,18 +78,20 @@ class BOOST_SYMBOL_VISIBLE /** True if parser can decode deflate transfer and content encodings. - The deflate decoder must already be + The zlib service must already be installed thusly, or else an exception is thrown. - - @par Install Deflate Decoder - @code - deflate_decoder_service::config cfg; - cfg.install( ctx ); - @endcode */ bool apply_deflate_decoder = false; + /** True if parser can decode gzip transfer and content encodings. + + The zlib service must already be + installed thusly, or else an exception + is thrown. + */ + bool apply_gzip_decoder = false; + /** Minimum space for payload buffering. This value controls the following @@ -420,7 +423,7 @@ class BOOST_SYMBOL_VISIBLE buffers::circular_buffer* body_buf_ = nullptr; buffers::any_dynamic_buffer* eb_ = nullptr; - detail::filter* filt_ = nullptr; + detail::filter* filter_ = nullptr; sink* sink_ = nullptr; state st_ = state::start; diff --git a/src/detail/header.cpp b/src/detail/header.cpp index 193e48dd..bcd43c2c 100644 --- a/src/detail/header.cpp +++ b/src/detail/header.cpp @@ -1,5 +1,6 @@ // // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com) +// Copyright (c) 2024 Mohammad Nejati // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -357,6 +358,8 @@ maybe_count( { case field::connection: return md.connection.count; + case field::content_encoding: + return md.content_encoding.count; case field::content_length: return md.content_length.count; case field::expect: @@ -381,6 +384,7 @@ is_special( switch(id) { case field::connection: + case field::content_encoding: case field::content_length: case field::expect: case field::transfer_encoding: @@ -419,6 +423,8 @@ on_insert( return; switch(id) { + case field::content_encoding: + return on_insert_content_encoding(v); case field::content_length: return on_insert_content_length(v); case field::connection: @@ -445,6 +451,8 @@ on_erase(field id) { case field::connection: return on_erase_connection(); + case field::content_encoding: + return on_erase_content_encoding(); case field::content_length: return on_erase_content_length(); case field::expect: @@ -648,6 +656,52 @@ on_insert_transfer_encoding() update_payload(); } +void +header:: +on_insert_content_encoding( + core::string_view v) +{ + ++md.content_encoding.count; + if( md.content_encoding.ec.failed() ) + return; + + auto rv = grammar::parse( + v, list_rule(token_rule, 1)); + if( !rv ) + { + md.content_encoding.ec = + BOOST_HTTP_PROTO_ERR( + error::bad_content_encoding); + return; + } + + if( rv->size() > 1 || + md.content_encoding.count > 1) + { + md.content_encoding.encoding = + encoding::unsupported; + return; + } + + if( grammar::ci_is_equal(*(rv->begin()), + "deflate") ) + { + md.content_encoding.encoding = + encoding::deflate; + } + else if( grammar::ci_is_equal(*(rv->begin()), + "gzip") ) + { + md.content_encoding.encoding = + encoding::gzip; + } + else + { + md.content_encoding.encoding = + encoding::unsupported; + } +} + void header:: on_insert_upgrade( @@ -807,6 +861,25 @@ on_erase_transfer_encoding() on_insert_transfer_encoding(); } +void +header:: +on_erase_content_encoding() +{ + BOOST_ASSERT( + md.content_encoding.count > 0); + --md.content_encoding.count; + if(md.content_encoding.count == 0) + { + // no Content-Encoding + md.content_encoding = {}; + return; + } + // re-insert everything + --md.content_encoding.count; + // TODO + // on_insert_content_encoding(); +} + // called when Upgrade is erased void header:: diff --git a/src/detail/impl/filter.hpp b/src/detail/impl/filter.hpp index 927fb196..55b01115 100644 --- a/src/detail/impl/filter.hpp +++ b/src/detail/impl/filter.hpp @@ -39,19 +39,6 @@ process_impl( auto ib = *it_i; for(;;) { - results rs = process_impl(ob, ib, more); - - rv.out_bytes += rs.out_bytes; - rv.in_bytes += rs.in_bytes; - rv.ec = rs.ec; - rv.finished = rs.finished; - - if(rv.finished || rv.ec) - return rv; - - ob = buffers::sans_prefix(ob, rs.out_bytes); - ib = buffers::sans_prefix(ib, rs.in_bytes); - if(ob.size() == 0) { if(++it_o == buffers::end(out)) @@ -65,6 +52,19 @@ process_impl( return rv; ib = *it_i; } + + results rs = process_impl(ob, ib, more); + + rv.out_bytes += rs.out_bytes; + rv.in_bytes += rs.in_bytes; + rv.ec = rs.ec; + rv.finished = rs.finished; + + if(rv.finished || rv.ec) + return rv; + + ob = buffers::sans_prefix(ob, rs.out_bytes); + ib = buffers::sans_prefix(ib, rs.in_bytes); } } diff --git a/src/parser.cpp b/src/parser.cpp index cf690708..9f19ac95 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -1,5 +1,6 @@ // // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com) +// Copyright (c) 2024 Mohammad Nejati // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -7,14 +8,14 @@ // Official repository: https://github.com/cppalliance/http_proto // -#include - #include +#include #include +#include #include #include -#include +#include "detail/filter.hpp" #include #include @@ -30,8 +31,6 @@ #include #include -#include "rfc/detail/rules.hpp" - namespace boost { namespace http_proto { @@ -117,7 +116,64 @@ Buffer Usage or additional data received past the end of the message payload. */ -//----------------------------------------------- + +namespace { +class inflator_filter + : public http_proto::detail::filter +{ + zlib::stream& inflator_; + +public: + inflator_filter( + context& ctx, + http_proto::detail::workspace& ws, + bool use_gzip) + : inflator_{ ctx.get_service() + .make_inflator(ws, use_gzip ? 31 : 15) } + { + } + + virtual filter::results + on_process( + buffers::mutable_buffer out, + buffers::const_buffer in, + bool more) override + { + auto flush = + more ? zlib::flush::none : zlib::flush::finish; + filter::results results; + + for(;;) + { + auto params = zlib::params{in.data(), in.size(), + out.data(), out.size() }; + auto ec = inflator_.write(params, flush); + + results.in_bytes += in.size() - params.avail_in; + results.out_bytes += out.size() - params.avail_out; + + // TODO: limit the cases where buf_err is valid + if( ec.failed() && + ec != zlib::error::buf_err ) + { + results.ec = ec; + return results; + } + + if( ec == zlib::error::stream_end ) + { + results.finished = true; + return results; + } + + in = buffers::suffix(in, params.avail_in); + out = buffers::suffix(out, params.avail_out); + + if( in.size() == 0 || out.size() == 0) + return results; + } + } +}; class chained_sequence { @@ -178,7 +234,7 @@ class chained_sequence static system::result -parse_hex(chained_sequence& cs) +parse_hex(chained_sequence& cs) noexcept { std::uint64_t v = 0; std::size_t init_size = cs.size(); @@ -207,7 +263,7 @@ parse_hex(chained_sequence& cs) static system::result -find_eol(chained_sequence& cs) +find_eol(chained_sequence& cs) noexcept { while(!cs.empty()) { @@ -229,7 +285,7 @@ find_eol(chained_sequence& cs) static system::result -parse_eol(chained_sequence& cs) +parse_eol(chained_sequence& cs) noexcept { if(cs.size() >= 2) { @@ -248,7 +304,7 @@ parse_eol(chained_sequence& cs) static system::result -skip_trailer_headers(chained_sequence& cs) +skip_trailer_headers(chained_sequence& cs) noexcept { while(!cs.empty()) { @@ -271,15 +327,25 @@ skip_trailer_headers(chained_sequence& cs) error::need_data); } +template +std::size_t +clamp(UInt x, std::size_t limit) noexcept +{ + if(x >= limit) + return limit; + return static_cast(x); +} + template system::result parse_chunked( + detail::filter* filter, buffers::circular_buffer& input, ElasticBuffer& output, std::uint64_t& chunk_remain_, std::uint64_t& body_avail_, bool& needs_chunk_close_, - bool& trailer_headers_) + bool& trailer_headers_) noexcept { for(;;) { @@ -289,18 +355,18 @@ parse_chunked( if(trailer_headers_) { - auto rv = skip_trailer_headers(cs); - if(rv.has_error()) - return rv; + auto rs = skip_trailer_headers(cs); + if(rs.has_error()) + return rs; input.consume(input.size() - cs.size()); return {}; } if(needs_chunk_close_) { - auto rv = parse_eol(cs); - if(rv.has_error()) - return rv; + auto rs = parse_eol(cs); + if(rs.has_error()) + return rs; } auto chunk_size = parse_hex(cs); @@ -308,9 +374,9 @@ parse_chunked( return chunk_size.error(); // chunk extensions are skipped - auto rv = find_eol(cs); - if(rv.has_error()) - return rv; + auto rs = find_eol(cs); + if(rs.has_error()) + return rs; input.consume(input.size() - cs.size()); chunk_remain_ = chunk_size.value(); @@ -323,35 +389,50 @@ parse_chunked( } } - // we've successfully parsed a chunk-size and have - // consume()d the entire buffer if( input.size() == 0 ) BOOST_HTTP_PROTO_RETURN_EC( error::need_data); - // TODO: this is an open-ended design space with no - // clear answer at time of writing. - // revisit this later if( output.capacity() == 0 ) - detail::throw_length_error(); - - auto n = (std::min)( - chunk_remain_, - static_cast(input.size())); + BOOST_HTTP_PROTO_RETURN_EC( + error::in_place_overflow); - auto m = buffers::buffer_copy( - output.prepare(output.capacity()), - buffers::prefix(input.data(), static_cast(n))); + auto chunk = buffers::prefix(input.data(), + clamp(chunk_remain_, input.size())); - BOOST_ASSERT(m <= chunk_remain_); - chunk_remain_ -= m; - input.consume(m); - output.commit(m); - body_avail_ += m; + if( filter ) + { + // TODO: gather available chunks and provide + // them as a const_buffer_span + auto rs = filter->process( + output.prepare(output.capacity()), + chunk, + !trailer_headers_); + + if( rs.ec.failed() ) + return rs.ec; + + chunk_remain_ -= rs.in_bytes; + input.consume(rs.in_bytes); + output.commit(rs.out_bytes); + body_avail_ += rs.out_bytes; + + if( rs.finished && chunk_remain_ != 0 ) + BOOST_HTTP_PROTO_RETURN_EC( + error::bad_payload); + } + else + { + auto copied = buffers::buffer_copy( + output.prepare(output.capacity()), chunk); + chunk_remain_ -= copied; + input.consume(copied); + output.commit(copied); + body_avail_ += copied; + } } } - -//----------------------------------------------- +} // namespace class parser_service : public service @@ -616,7 +697,9 @@ start_impl( chunk_remain_ = 0; needs_chunk_close_ = false; trailer_headers_ = false; + filter_ = nullptr; body_avail_ = 0; + body_total_ = 0; } auto @@ -660,8 +743,7 @@ prepare() -> if(! is_plain()) { // buffered payload - auto n = cb0_.capacity() - - cb0_.size(); + auto n = cb0_.capacity(); if( n > svc_.cfg.max_prepare) n = svc_.cfg.max_prepare; mbp_ = cb0_.prepare(n); @@ -676,6 +758,13 @@ prepare() -> auto n = cb0_.capacity(); if( n > svc_.cfg.max_prepare) n = svc_.cfg.max_prepare; + + // TODO: payload_remain_ + svc_.max_overread() might overflow + if( h_.md.payload == payload::size && + n > payload_remain_ + svc_.max_overread()) + n = static_cast( + payload_remain_ + svc_.max_overread()); + mbp_ = cb0_.prepare(n); nprepare_ = n; return mutable_buffers_type(mbp_); @@ -859,10 +948,12 @@ commit( if(n < payload_remain_) { body_avail_ += n; + body_total_ += n; payload_remain_ -= n; break; } body_avail_ += payload_remain_; + body_total_ += payload_remain_; payload_remain_ = 0; st_ = state::complete; break; @@ -871,6 +962,7 @@ commit( BOOST_ASSERT( h_.md.payload == payload::to_eof); body_avail_ += n; + body_total_ += n; break; } @@ -1074,25 +1166,113 @@ parse( { if( how_ == how::in_place ) { - auto& input = cb0_; - auto& output = cb1_; + // TODO: parse_chunked should be a member function auto rv = parse_chunked( - input, output, chunk_remain_, body_avail_, - needs_chunk_close_, trailer_headers_); - if(rv.has_error()) + filter_, cb0_, cb1_, chunk_remain_, + body_avail_, needs_chunk_close_, trailer_headers_); + + // TODO: check for body_limit + + if(rv.has_error()) // including error::need_data ec = rv.error(); else st_ = state::complete; return; } else + { + // TODO detail::throw_logic_error(); - + } } - else if( filt_ ) + else if( filter_ ) { - // VFALCO TODO apply filter - detail::throw_logic_error(); + if( how_ == how::in_place ) + { + if( body_buf_->capacity() == 0 ) + { + // in_place buffer limit + ec = BOOST_HTTP_PROTO_ERR( + error::in_place_overflow); + return; + } + + auto rs = [&]() -> detail::filter::results + { + if( h_.md.payload == payload::size ) + { + auto rv = filter_->process( + body_buf_->prepare(body_buf_->capacity()), + buffers::prefix(cb0_.data(), payload_remain_), + cb0_.size() < payload_remain_); + + payload_remain_ -= rv.in_bytes; + return rv; + } + BOOST_ASSERT(h_.md.payload == payload::to_eof); + return filter_->process( + body_buf_->prepare(body_buf_->capacity()), + cb0_.data(), + !got_eof_); + }(); + + ec = rs.ec; + body_avail_ += rs.out_bytes; + body_total_ += rs.out_bytes; + cb0_.consume(rs.in_bytes); + body_buf_->commit(rs.out_bytes); + + if( body_avail_ > svc_.cfg.body_limit ) + { + ec = BOOST_HTTP_PROTO_ERR( + error::body_too_large); + st_ = state::reset; // unrecoverable + return; + } + + if( ec.failed() ) + { + st_ = state::reset; // unrecoverable + return; + } + + if( rs.finished ) + { + if( !got_eof_ && + h_.md.payload == payload::to_eof ) + { + ec = BOOST_HTTP_PROTO_ERR( + error::need_data); + return; + } + + st_ = state::complete; + return; + } + + if( got_eof_ ) + { + if( body_buf_->capacity() == 0 ) + { + ec = BOOST_HTTP_PROTO_ERR( + error::in_place_overflow); + return; + } + ec = BOOST_HTTP_PROTO_ERR( + error::incomplete); + st_ = state::reset; // unrecoverable + return; + } + + ec = BOOST_HTTP_PROTO_ERR( + error::need_data); + return; + } + else + { + // TODO + detail::throw_logic_error(); + } } if(how_ == how::in_place) @@ -1125,15 +1305,14 @@ parse( st_ = state::complete; break; } - if(body_avail_ > svc_.cfg.body_limit) + if( body_total_ > svc_.cfg.body_limit ) { ec = BOOST_HTTP_PROTO_ERR( error::body_too_large); st_ = state::reset; // unrecoverable return; } - if( h_.md.payload == payload::chunked || - ! got_eof_) + if( ! got_eof_ ) { ec = BOOST_HTTP_PROTO_ERR( error::need_data); @@ -1414,7 +1593,7 @@ bool parser:: is_plain() const noexcept { - return ! filt_ && + return ! filter_ && h_.md.payload != payload::chunked; } @@ -1461,23 +1640,47 @@ on_headers( ws_.data(), overread + fb_.capacity(), overread }; - body_avail_ = 0; - body_total_ = 0; body_buf_ = &cb0_; st_ = state::complete; return; } - // calculate filter - filt_ = nullptr; // VFALCO TODO + auto cap = fb_.capacity() + overread + + svc_.cfg.min_buffer; + + // reserve body buffers first, as the decoder + // must be installed after them. + auto const p = ws_.reserve_front(cap); - if(is_plain()) + if( svc_.cfg.apply_deflate_decoder && + h_.md.content_encoding.encoding == encoding::deflate ) { - // plain payload - if(h_.md.payload == payload::size) + filter_ = &ws_.emplace( + ctx_, ws_, false); + } + else if( svc_.cfg.apply_gzip_decoder && + h_.md.content_encoding.encoding == encoding::gzip ) + { + filter_ = &ws_.emplace( + ctx_, ws_, true); + } + else + { + cap += svc_.max_codec; + ws_.reserve_front(svc_.max_codec); + } + + if( !filter_ && + h_.md.payload != payload::chunked ) + { + cb0_ = { p, cap, overread }; + body_buf_ = &cb0_; + body_avail_ = cb0_.size(); + + if( h_.md.payload == payload::size ) { - if(h_.md.payload_size > - svc_.cfg.body_limit) + if( h_.md.payload_size > + svc_.cfg.body_limit ) { ec = BOOST_HTTP_PROTO_ERR( error::body_too_large); @@ -1485,90 +1688,29 @@ on_headers( return; } - // for plain messages with a known size,, we can - // get away with only using cb0_ as our input - // area and leaving cb1_ blank - BOOST_ASSERT(fb_.max_size() >= h_.size); - BOOST_ASSERT( - fb_.max_size() - h_.size == - overread + fb_.capacity()); - BOOST_ASSERT(fb_.data().data() == h_.buf); - BOOST_ASSERT(svc_.max_codec == 0); - auto cap = - (overread + fb_.capacity()) + // reuse previously designated storage - svc_.cfg.min_buffer + // minimum buffer size for prepare() calls - svc_.max_codec; // tentatively we can delete this - - if( cap > h_.md.payload_size && - cap - h_.md.payload_size >= svc_.max_overread() ) - { - // we eagerly process octets as they arrive, - // so it's important to limit potential - // overread as applying a transformation algo - // can be prohibitively expensive - cap = - static_cast(h_.md.payload_size) + - svc_.max_overread(); - } - - BOOST_ASSERT(cap <= ws_.size()); - - cb0_ = { ws_.data(), cap, overread }; - cb1_ = {}; - - body_buf_ = &cb0_; - body_avail_ = cb0_.size(); - if( body_avail_ >= h_.md.payload_size) + if( body_avail_ >= h_.md.payload_size ) body_avail_ = h_.md.payload_size; - body_total_ = body_avail_; payload_remain_ = - h_.md.payload_size - body_total_; - - st_ = state::body; - return; + h_.md.payload_size - body_avail_; } - // overread is not applicable - BOOST_ASSERT( - h_.md.payload == payload::to_eof); - auto const n0 = - fb_.capacity() - h_.size + - svc_.cfg.min_buffer + - svc_.max_codec; - BOOST_ASSERT(n0 <= ws_.size()); - cb0_ = { ws_.data(), n0, overread }; - body_buf_ = &cb0_; - body_avail_ = cb0_.size(); body_total_ = body_avail_; st_ = state::body; return; } - // buffered payload + if( h_.md.payload == payload::size ) + payload_remain_ = h_.md.payload_size; - // TODO: need to handle the case where we have so much - // overread or such an initially large chunk that we - // don't have enough room in cb1_ for the output - // perhaps we just return with an error and ask the user - // to attach a body style - auto size = ws_.size(); + auto const n0 = overread > svc_.cfg.min_buffer ? + overread : svc_.cfg.min_buffer; + auto const n1 = cap - n0; - auto n0 = (std::max)(svc_.cfg.min_buffer, overread); - n0 = (std::max)(n0, size / 2); - if( filt_) - n0 += svc_.max_codec; - - auto n1 = size - n0; - - // BOOST_ASSERT(n0 <= svc_.max_overread()); - BOOST_ASSERT(n0 + n1 <= ws_.size()); - cb0_ = { ws_.data(), n0, overread }; - cb1_ = { ws_.data() + n0, n1 }; + cb0_ = { p , n0, overread }; + cb1_ = { p + n0 , n1 }; body_buf_ = &cb1_; - // body_buf_ = nullptr; - body_avail_ = 0; - body_total_ = 0; + st_ = state::body; } @@ -1627,53 +1769,17 @@ init_dynamic( body_avail_ == body_buf_->size()); BOOST_ASSERT( body_total_ == body_avail_); + auto const space_left = eb_->max_size() - eb_->size(); - if(h_.md.payload == payload::size) - { - if(space_left < h_.md.payload_size) - { - ec = BOOST_HTTP_PROTO_ERR( - error::buffer_overflow); - return; - } - // reserve the full size - eb_->prepare(static_cast(h_.md.payload_size)); - // transfer in-place body - auto n = static_cast(body_avail_); - if( n > h_.md.payload_size) - n = static_cast(h_.md.payload_size); - eb_->commit( - buffers::buffer_copy( - eb_->prepare(n), - body_buf_->data())); - BOOST_ASSERT(body_avail_ == n); - BOOST_ASSERT(body_total_ == n); - BOOST_ASSERT(payload_remain_ == - h_.md.payload_size - n); - body_buf_->consume(n); - body_avail_ = 0; - if(n < h_.md.payload_size) - { - BOOST_ASSERT( - body_buf_->size() == 0); - st_ = state::body; - return; - } - // complete - st_ = state::complete; - return; - } - - BOOST_ASSERT(h_.md.payload == - payload::to_eof); if(space_left < body_avail_) { ec = BOOST_HTTP_PROTO_ERR( error::buffer_overflow); return; } + eb_->commit( buffers::buffer_copy( eb_->prepare(static_cast(body_avail_)), @@ -1682,6 +1788,18 @@ init_dynamic( body_avail_ = 0; BOOST_ASSERT( body_buf_->size() == 0); + + // TODO: expand cb_0? + + // TODO: we need a better way to recover the state. + if( !filter_ && + h_.md.payload == payload::size && + body_total_ == h_.md.payload_size) + { + st_ = state::complete; + return; + } + st_ = state::body; } diff --git a/src/serializer.cpp b/src/serializer.cpp index 508e793c..ba759af2 100644 --- a/src/serializer.cpp +++ b/src/serializer.cpp @@ -56,15 +56,18 @@ class deflator_filter { auto params = zlib::params{in.data(), in.size(), out.data(), out.size() }; - results.ec = deflator_.write(params, flush); + auto ec = deflator_.write(params, flush); results.in_bytes += in.size() - params.avail_in; results.out_bytes += out.size() - params.avail_out; - if(results.ec.failed()) + if(ec.failed()) + { + results.ec = ec; return results; + } - if(results.ec == zlib::error::stream_end) + if(ec == zlib::error::stream_end) { results.finished = true; return results; diff --git a/src_zlib/service/zlib_service.cpp b/src_zlib/service/zlib_service.cpp index 4fa46ff1..1a36704e 100644 --- a/src_zlib/service/zlib_service.cpp +++ b/src_zlib/service/zlib_service.cpp @@ -157,7 +157,7 @@ void zfree(void* /* opaque */, void* /* addr */) // so all the allocations are passively freed } -static ::uInt +::uInt clamp(std::size_t x) noexcept { if(x >= (std::numeric_limits<::uInt>::max)()) diff --git a/test/cmake_test/CMakeLists.txt b/test/cmake_test/CMakeLists.txt index af24eb6e..114b61d8 100644 --- a/test/cmake_test/CMakeLists.txt +++ b/test/cmake_test/CMakeLists.txt @@ -44,7 +44,6 @@ else() optional predef detail - move utility preprocessor io diff --git a/test/unit/zlib.cpp b/test/unit/zlib.cpp index 113215bb..0ffbef26 100644 --- a/test/unit/zlib.cpp +++ b/test/unit/zlib.cpp @@ -1,3 +1,13 @@ +// +// Copyright (c) 2024 Christian Mazakas +// Copyright (c) 2024 Mohammad Nejati +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/cppalliance/http_proto +// + #include #include "test_suite.hpp" @@ -22,15 +32,19 @@ TEST_SUITE( #include #include +#include +#include #include #include #include +#include #include #include #include #include #include +#include #include #include @@ -83,6 +97,40 @@ namespace http_proto { struct zlib_test { + std::string + deflate( + int window_bits, + int mem_level, + core::string_view str) + { + z_stream zs{}; + + if( deflateInit2(&zs, -1, Z_DEFLATED, window_bits, + mem_level, Z_DEFAULT_STRATEGY) != Z_OK ) + throw std::runtime_error{ "deflateInit2" }; + + zs.next_in = reinterpret_cast( + const_cast(str.data())); + zs.avail_in = static_cast(str.size()); + + std::string result; + for(;;) + { + constexpr auto chunk_size = 2048; + result.resize(result.size() + chunk_size); + auto it = result.begin() + result.size() - chunk_size; + zs.next_out = reinterpret_cast(&*it); + zs.avail_out = chunk_size; + auto ret = ::deflate(&zs, Z_FINISH); + result.erase( + it + chunk_size - zs.avail_out, result.end()); + if(ret != Z_OK) + break; + } + deflateEnd(&zs); + return result; + } + void verify_compressed( span compressed, core::string_view expected) @@ -427,7 +475,7 @@ struct zlib_test } void - zlib_serializer() + test_serializer() { std::string short_body = "hello world, compression seems super duper cool! hmm, but what if I also add like a whole bunch of text to this thing????"; @@ -463,9 +511,111 @@ struct zlib_test zlib_serializer_impl(fp, c, body, b); } + void + test_parser() + { + context ctx; + zlib::install_service(ctx); + + auto append_chunked = []( + std::string& msg, + core::string_view body) + { + for(;;) + { + auto chunk = body.substr(0, + std::min(size_t{ 100 }, body.size())); + body.remove_prefix(chunk.size()); + msg.append(8, '0'); + auto it = msg.begin() + msg.size() - 8; + auto c = std::snprintf(&*it, 8, "%zx", chunk.size()); + msg.erase(it + c, msg.end()); + msg += "\r\n"; + msg += chunk; + msg += "\r\n"; + if(chunk.size() == 0) + break; + } + }; + + response_parser::config cfg; + cfg.apply_deflate_decoder = true; + cfg.apply_gzip_decoder = true; + cfg.body_limit = 8 * 1024 * 1024; + install_parser_service(ctx, cfg); + response_parser pr(ctx); + + for(auto gzip : { false, true }) + for(auto chunked : { false, true }) + for(auto body_size : { 0, 7, 64 * 1024, 1024 * 1024 }) + { + std::string msg = "HTTP/1.1 200 OK\r\n"; + + if(gzip) + msg += "Content-Encoding: gzip\r\n"; + else + msg += "Content-Encoding: deflate\r\n"; + + if(chunked) + msg += "Transfer-Encoding: chunked\r\n"; + + msg += "\r\n"; + + auto const body = generate_book(body_size); + auto const deflated = deflate(15 + (gzip ? 16 : 0), 8, body); + + if(chunked) + append_chunked(msg, deflated); + else + msg += deflated; + + pr.reset(); + pr.start(); + + auto msg_buf = + buffers::const_buffer{ msg.data(), msg.size() }; + + std::string parsed_body; + buffers::string_buffer parsed_body_buf{ + &parsed_body }; + for(;;) + { + auto n1 = buffers::buffer_copy( + pr.prepare(), msg_buf); + pr.commit(n1); + msg_buf = buffers::sans_prefix(msg_buf, n1); + + boost::system::error_code ec; + pr.parse(ec); + if( ec ) + BOOST_TEST(ec == error::in_place_overflow + || ec == error::need_data); + + // consume in_place body + auto n2 = buffers::buffer_copy( + parsed_body_buf.prepare( + buffers::buffer_size(pr.pull_body())), + pr.pull_body()); + parsed_body_buf.commit(n2); + pr.consume_body(n2); + + if( msg_buf.size() == 0 && ec == error::need_data ) + { + pr.commit_eof(); + pr.parse(ec); + BOOST_TEST(!ec || ec == error::in_place_overflow); + } + if( pr.is_complete() ) + break; + } + BOOST_TEST(parsed_body == body); + } + } + void run() { - zlib_serializer(); + test_serializer(); + test_parser(); } };