From 293df8d66b2c955bc136ae6292e91b38feb9473e Mon Sep 17 00:00:00 2001 From: George Wang Date: Mon, 12 Apr 2021 09:52:42 -0400 Subject: [PATCH] Release 2.30.0 - [FEATURE] Added support for sending/receiving multiple headers to address the case related to "100 continue" header handling. - [BUGFIX] Addressed high CPU usage for a GOAWAY connection before sending CONNECTION_CLOSE. - [BUGFIX] Addressed SIGFPE due to zero pacing rate. (ISSUE #254). - [BUGFIX] Fixed a minor issue related to multi-paths. --- CHANGELOG | 9 + bin/http_client.c | 3 +- docs/conf.py | 10 +- docs/gettingstarted.rst | 2 +- docs/internals.rst | 700 +++++++++++++++++++++++++- include/lsquic.h | 4 +- src/liblsquic/lsquic_adaptive_cc.h | 2 +- src/liblsquic/lsquic_alarmset.c | 2 + src/liblsquic/lsquic_full_conn_ietf.c | 52 +- src/liblsquic/lsquic_handshake.c | 24 +- src/liblsquic/lsquic_headers.h | 2 + src/liblsquic/lsquic_mm.h | 1 - src/liblsquic/lsquic_qdec_hdl.c | 2 +- src/liblsquic/lsquic_send_ctl.c | 4 + src/liblsquic/lsquic_stream.c | 117 ++--- src/liblsquic/lsquic_stream.h | 6 +- 16 files changed, 815 insertions(+), 125 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 6daaf1653..f2bd7c028 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,12 @@ +2021-04-12 + - 2.30.0 + - Added support for sending/receiving multiple headers to address the + case related to "100 continue" header handling. + - Addressed high CPU usage for a GOAWAY connection before sending + CONNECTION_CLOSE. + - Addressed SIGFPE due to zero pacing rate. (ISSUE #254). + - Fixed a minor issue related to multi-paths. + 2021-03-31 - 2.29.6 - Documentation: describe lsquic internals ("guts"). diff --git a/bin/http_client.c b/bin/http_client.c index c04808573..53f18c367 100644 --- a/bin/http_client.c +++ b/bin/http_client.c @@ -585,13 +585,14 @@ send_headers (lsquic_stream_ctx_t *st_h) if (!hostname) hostname = st_h->client_ctx->prog->prog_hostname; hbuf.off = 0; - struct lsxpack_header headers_arr[9]; + struct lsxpack_header headers_arr[10]; #define V(v) (v), strlen(v) header_set_ptr(&headers_arr[h_idx++], &hbuf, V(":method"), V(st_h->client_ctx->method)); header_set_ptr(&headers_arr[h_idx++], &hbuf, V(":scheme"), V("https")); header_set_ptr(&headers_arr[h_idx++], &hbuf, V(":path"), V(st_h->path)); header_set_ptr(&headers_arr[h_idx++], &hbuf, V(":authority"), V(hostname)); header_set_ptr(&headers_arr[h_idx++], &hbuf, V("user-agent"), V(st_h->client_ctx->prog->prog_settings.es_ua)); + //header_set_ptr(&headers_arr[h_idx++], &hbuf, V("expect"), V("100-continue")); if (randomly_reprioritize_streams) { char pfv[10]; diff --git a/docs/conf.py b/docs/conf.py index 2f2928b4d..d8315492a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -24,9 +24,9 @@ author = u'LiteSpeed Technologies' # The short X.Y version -version = u'2.29' +version = u'2.30' # The full version, including alpha/beta/rc tags -release = u'2.29.6' +release = u'2.30.0' # -- General configuration --------------------------------------------------- @@ -39,6 +39,12 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ +# To make ours look like readthedocs.io, change theme to "sphinx_rtd_theme", +# pip install sphinx_rtd_theme, and uncomment extensions: +# "sphinx.ext.intersphinx", +# "sphinx.ext.autodoc", +# "sphinx.ext.mathjax", +# "sphinx.ext.viewcode", ] # Add any paths that contain templates here, relative to this directory. diff --git a/docs/gettingstarted.rst b/docs/gettingstarted.rst index 460f0e6e3..aba5fe5e4 100644 --- a/docs/gettingstarted.rst +++ b/docs/gettingstarted.rst @@ -38,7 +38,7 @@ Fetch Google home page: :: - ./http_client -s www.google.com -p / -o version=Q050 + ./http_client -s www.google.com -p / -o version=h3-29 Run your own server (it does not touch the filesystem, don't worry): diff --git a/docs/internals.rst b/docs/internals.rst index 2dd94e686..bae9e2db6 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -22,16 +22,6 @@ Code Version The code version under discussion is v2.29.6. -High-Level Structure -******************** - -At a high level, the lsquic library can be used to instantiate an engine -(or several engines). An engine manages connections; each connection has -streams. Engine, connection, and stream objects are exposed to the user -who interacts with them using the API (see :doc:`apiref`). All other data -structures are internal and are hanging off, in one way or another, from -the engine, connection, or stream objects. - Coding Style ************ @@ -97,6 +87,21 @@ List of Common Terms - **iQUIC** This stands for IETF QUIC. To differentiate between gQUIC and IETF QUIC, we use ``iquic`` in some names and types. +- **Public Reset** In the IETF QUIC parlance, this is called the *stateless* + reset. Because gQUIC was first to be implemented, this name is still + used in the code, even when the IETF QUIC stateless reset is meant. + You will see names that contain strings like "prst" and "pubres". + +High-Level Structure +******************** + +At a high level, the lsquic library can be used to instantiate an engine +(or several engines). An engine manages connections; each connection has +streams. Engine, connection, and stream objects are exposed to the user +who interacts with them using the API (see :doc:`apiref`). All other data +structures are internal and are hanging off, in one way or another, from +the engine, connection, or stream objects. + Engine ****** @@ -573,11 +578,11 @@ dedicated chapters elsewhere in this document: - `Mini gQUIC Connection <#mini-gquic-connection>`__ -- `Full gQUIC Connection <#connection-public-interface>`__ +- `Full gQUIC Connection <#full-gquic-connection>`__ - `Mini IETF QUIC Connection <#mini-ietf-connection>`__ -- `Full IETF QUIC Connection <#mini-ietf-connection>`__ +- `Full IETF QUIC Connection <#full-ietf-connection>`__ - `Evanescent Connection <#evanescent-connection>`__ @@ -619,7 +624,7 @@ Various list and heap connectors A connection may be pointed to by one or several queues and heaps (see "\ `Connection Management <#connection-management>`__\ "). There are -several struct members that make it possible: all the \*TAILQ_ENTRYs, +several struct members that make it possible: \*TAILQ_ENTRYs, ``cn_attq_elem``, and ``cn_cert_susp_head``. Version @@ -1135,12 +1140,156 @@ size will be written by a different function. Parsing ******* +*Files: lsquic_parse.h, lsquic_parse_ietf_v1.c, lsquic_parse_Q050.c, lsquic_parse_Q046.c, +lsquic_parse_gquic_be.c, lsquic_parse_common.c, and others* + +Overview +======== + +The two types of QUIC -- gQUIC and IETF QUIC -- have different packet and +frame formats. In addition, different gQUIC version are different among +themselves. Functions to parse and generate packets and frames of each +type are abstracted out behind the rather large ``struct parse_funcs``. +When a connection is created, its ``cn_pf`` member is set to point to +the correct set of function pointers via the ``select_pf_by_ver()`` macro. + Parsing Packets =============== +Before settling on a particular set of parsing function for a connection, +the server needs to determine the connection's version. It does so using +the function ``lsquic_parse_packet_in_server_begin()``. + +This function figures out whether the packet has a long or a short header, +and which QUIC version it is. Because the server deals with fewer packet +types than the client (no version negotiation or stateless retry packets), +it can determine the necessary parsing function from the first byte of the +incoming packet. + +The "begin" in the name of the function refers to the fact that packet +parsing is a two-step process [3]_. In the first step, the packet version, +CID, and some other parameters are parsed out; in the second step, +version-specific ``pf_parse_packet_in_finish()`` is called to parse out +the packet number. Between the two calls, the state is saved in +``struct packin_parse_state``. + +Generating Packets +================== + +Packets are generated during encryption using the ``pf_gen_reg_pkt_header()`` +function. The generated header is encrypted together with the `packet payload`_ +and this becomes the QUIC packet that is sent out. (Most of the time, the +QUIC packet corresponds to the UDP datagram, but sometimes packets are +`coalesced <#packet-coalescing>`__. + Parsing Frames ============== +There is a parsing function for each frame type. These function generally +have names that begin with "pf_parse\_" and follow a similar pattern: + +- The first argument is the buffer to be parsed; + +- The second argument is its size; + +- Any additional arguments are outputs: the parsed out values from the frame; + +- Number of bytes consumed is returned or a negative value is returned + if a parsing error occurred. + +For example: + +:: + + int + (*pf_parse_stream_frame) (const unsigned char *buf, size_t rem_packet_sz, + struct stream_frame *); + + int + (*pf_parse_max_data) (const unsigned char *, size_t, uint64_t *); + +Generating Frames +================= + +Functions that generate frames begin with "pf_gen\_" and also follow a +pattern: + +- First argument is the buffer to be written to; + +- The second argument is the buffer size; + +- Any additional arguments specify the values to include in the frame; + +- The size of the resulting frame is returned or a negative value if + an error occurred. + +For example: + +:: + + int + (*pf_gen_path_chal_frame) (unsigned char *, size_t, uint64_t chal); + + int + (*pf_gen_stream_frame) (unsigned char *buf, size_t bufsz, + lsquic_stream_id_t stream_id, uint64_t offset, + int fin, size_t size, gsf_read_f, void *stream); + +Frame Types +=========== + +Frame types are listed in ``enum quic_frame_type``. When frames are parsed, +the on-the-wire frame type is translated to the enum value; when frames are +generated, the enum is converted to the on-the-wire format. This indirection +is convenient, as it limits the range of possible QUIC frame values, making +it possible to store a list of frame types as a bitmask. Examples include +``po_frame_types`` and ``sc_retx_frames``. + +Some frame types, such as ACK and STREAM, are common to both Google and IETF +QUIC. Others, such as STOP_WAITING and RETIRE_CONNECTION_ID, are only used +in one of the protocols. The third type is frames that are used by IETF +QUIC extensions, such as TIMESTAMP and ACK_FREQUENCY. + +Parsing IETF QUIC Frame Types +----------------------------- + +Most IETF frame types are encoded as a single by on the wire (and all Google +QUIC frames are). Some of them are encoded using multiple bytes. This is +because, like the vast majority of all integral values in IETF QUIC, the frame +type is encoded as a varint. Unlike the other integral values, however, the +frame type has the unique property is that it must be encoded using the +*minimal representation*: that is, the encoding must use the minimum number +of bytes possible. For example, encoding the value 200 must use the two-byte +varint, not four- or eight-byte version. This makes it possible to parse +frame types once without having to reparse the frame type again in individual +frame-parsing routines. + +Frame type is parsed out in ``ietf_v1_parse_frame_type()``. Because of the +minimal encoding requirement, the corresponding frame-parsing functions know +the number of bytes to skip for type, for example: + + +:: + + static int + ietf_v1_parse_frame_with_varints (const unsigned char *buf, size_t len, + const uint64_t frame_type, unsigned count, uint64_t *vals[]) + { + /* --- 8< --- code removed */ + vbits = vint_val2bits(frame_type); + p += 1 << vbits; // <=== SKIP FRAME TYPE + /* --- 8< --- code removed */ + } + + static int + ietf_v1_parse_timestamp_frame (const unsigned char *buf, + size_t buf_len, uint64_t *timestamp) + { + return ietf_v1_parse_frame_with_varints(buf, buf_len, + FRAME_TYPE_TIMESTAMP, 1, (uint64_t *[]) { timestamp }); + } + + Mini vs Full Connections ************************ @@ -1772,13 +1921,6 @@ The following steps are performed: - Streams are serviced (closed, freed, created) -.. _notable-code-4: - -Notable Code -============ - -TODO - Full IETF Connection ******************** @@ -2240,8 +2382,22 @@ Last time ``ea_live_scids()`` was called. ifc_paths --------- -Array of network paths. Most of the time, only one path is used when the -peer migrates. The array has four elements as a safe upper limit. +Array of connection paths. Most of the time, only one path is used; more +are used during `migration <#path-migration>`__. The array has four +elements as a safe upper limit. + +The elements are of type ``struct conn_path``. Besides the network path, +which stores socket addresses and is associated with each outgoing packet +(via ``po_path``), the connection path keeps track of the following +information: + +- Outgoing path challenges. See `Sending Path Challenges`_. + +- Incoming path challenge. + +- Spin bit (``cop_max_packno``, ``cop_spin_bit``, and ``COP_SPIN_BIT``). + +- DPLPMTUD state. ifc_u.cli --------- @@ -2348,6 +2504,140 @@ be found). Path Migration ============== +What follows assumes familiarity with `Section 9 +`__ +of the Transport I-D. + +Server +------ + +The server handles two types of path migration. In the first type, the +client performs probing by sending path challenges; in the second type, +the migration is due to a NAT rebinding. + +The connection keeps track of different paths in `ifc_paths`_. Path +objects are allocated out of the ``ifc_paths`` array. They are of type +``struct conn_path``; one of the members is ``cop_path``, which is the +network path object used to send packets (via ``po_path``). + +Each incoming packet is fed to the engine using the +``lsquic_engine_packet_in()`` function. Along with the UDP datagram, +the local and peer socket addresses are passed to it. These addresses are +eventually passed to the connection via the ``ci_record_addrs()`` call. +The first of these calls -- for the first incoming packet -- determines the +*current path*. When the address pair, which is a four-tuple of local +and remote IP addresses and port numbers, does not match that of the +current path, a new path object is created, triggering migration logic. + +``ci_record_addrs()`` returns a *path ID*, which is simply the index of +the corresponding element in the ``ifc_paths`` array. The current path +ID is stored in ``ifc_cur_path_id``. The engine assigns this value to +the newly created incoming packet (in ``pi_path_id``). The packet is +then passed to ``ci_packet_in()``. + +The first part of the path-switching logic is in ``process_regular_packet()``: + +:: + + case REC_ST_OK: + /* --- 8< --- some code elided... */ + saved_path_id = conn->ifc_cur_path_id; + parse_regular_packet(conn, packet_in); + if (saved_path_id == conn->ifc_cur_path_id) + { + if (conn->ifc_cur_path_id != packet_in->pi_path_id) + { + if (0 != on_new_or_unconfirmed_path(conn, packet_in)) + { + LSQ_DEBUG("path %hhu invalid, cancel any path response " + "on it", packet_in->pi_path_id); + conn->ifc_send_flags &= ~(SF_SEND_PATH_RESP + << packet_in->pi_path_id); + } + +The above means: if the current path has not changed after the packet +was processed, but the packet came in on a different path, then invoke +the "on new or unconfirmed path" logic. This is done this way because +the current path may be have been already changed if the packet contained +a PATH_RESPONSE frame. + +First time a packet is received on a new path, a PATH_CHALLENGE frame is +scheduled. + +If more than one packet received on the new path contain non-probing frames, +the current path is switched: it is assumed that the path change is due to +NAT rebinding. + +Client +------ + +Path migration is controlled by the client. When the client receives +a packet from an unknown server address, it drops the packet on the +floor (per spec). This code is in ``process_regular_packet()``. + +The client can migrate if ``es_allow_migration`` is on (it is in the default +configuration) and the server provides the "preferred_address" transport +parameter. The migration process begins once the handshake is confirmed; +see the ``maybe_start_migration()`` function. The SCID provided by the +server as part of the "preferred_address" transport parameter is used as the +destination CID and path #1 is picked: + + +:: + + copath = &conn->ifc_paths[1]; + migra_begin(conn, copath, dce, (struct sockaddr *) &sockaddr, params); + return BM_MIGRATING; + +In ``migra_begin``, migration state is initiated and sending of a +PATH_CHALLENGE frame is scheduled: + +:: + + conn->ifc_mig_path_id = copath - conn->ifc_paths; + conn->ifc_used_paths |= 1 << conn->ifc_mig_path_id; + conn->ifc_send_flags |= SF_SEND_PATH_CHAL << conn->ifc_mig_path_id; + LSQ_DEBUG("Schedule migration to path %hhu: will send PATH_CHALLENGE", + conn->ifc_mig_path_id); + +Sending Path Challenges +----------------------- + +To send a path challenge, a packet is allocated to be sent on that path, +a new challenge is generated, the PATH_CHALLENGE is written to the +packet, and the packet is scheduled. All this happens in the +``generate_path_chal_frame()`` function. + +:: + + need = conn->ifc_conn.cn_pf->pf_path_chal_frame_size(); + packet_out = get_writeable_packet_on_path(conn, need, &copath->cop_path, 1); + /* --- 8< --- some code elided... */ + w = conn->ifc_conn.cn_pf->pf_gen_path_chal_frame( + packet_out->po_data + packet_out->po_data_sz, + lsquic_packet_out_avail(packet_out), + copath->cop_path_chals[copath->cop_n_chals]); + /* --- 8< --- some code elided... */ + lsquic_alarmset_set(&conn->ifc_alset, AL_PATH_CHAL + path_id, + now + (INITIAL_CHAL_TIMEOUT << (copath->cop_n_chals - 1))); + +If the path response is not received before a timeout, another path challenge +is sent, up to the number of elements in ``cop_path_chals``. The timeout +uses exponential back-off; it is not based on RTT, because the RTT of the +new path is unknown. + +Receiving Path Responses +------------------------ + +When a PATH_RESPONSE frame is received, the path on which the corresponding +challenge was sent may become the new current path. See +``process_path_response_frame()``. + +Note that the path ID of the incoming packet with the PATH_RESPONSE frame is +not taken into account. This is by design: see +`Section 8.2.2 of the Transport I-D +`__. + Stream Priority Iterators ========================= @@ -2385,9 +2675,182 @@ some limited interop testing. Anatomy of Outgoing Packet ************************** +Overview +======== + +The outgoing packet is represented by ``struct lsquic_packet_out``. An +outgoing packet always lives on one -- and only one -- of the +`Send Controller`_'s `Packet Queues`_. For that, ``po_next`` is used. + +Beyond the packet number, stored in ``po_packno``, the packet has several +properties: sent time (``po_sent``), frame information, encryption +level, network path, and others. Several properties are encoded into +one or more bits in the bitmasks ``po_flags`` and ``po_lflags``. +Multibit properties are usually accessed and modified by a special +macro. + +The packet has a pointer to the packetized data in ``po_data``. +If the packet has been encrypted but not yet sent, the encrypted +buffer is pointed to ``po_enc_data``. + +Packet Payload +============== + +The payload consists of the various frames -- STREAM, ACK, and others -- +written, one after another, to ``po_data``. The header, consisting of +the type byte, (optional) connection ID, and the packet number is constructed +when the packet is just about to be sent, during encryption. This +buffer -- header and the encrypted payload are stored in a buffer +pointed to by ``po_enc_data``. + +Because stream data is written directly to the outgoing packet, the +packet is not destroyed when it is declared lost by the `loss detection +logic <#loss-detection-and-retransmission>`__. Instead, it is repackaged +and sent out again as a new packet. Besides assigning the packet a +new number, packet retransmission involves removing non-retransmittable +frames from the packet. (See ``lsquic_packet_out_chop_regen()``.) + +Historically, some places in the code assumed that the frames to be +dropped are always located at the beginning of the ``po_data`` buffer. +(This was before a `frame record <#frame-records>`__ was created for +each frame). The cumulative size of the frames to be removed is in +``po_regen_sz``; this size can be zero. Code that generates +non-retransmittable frames still writes them only to the beginning +of the packet. + +The goal is to drop ``po_regen_sz`` and to begin to write ACK and +other non-retransmittable frames anywhere. This should be possible +to do now (see ``lsquic_packet_out_chop_regen()``, which can support +such use after removing the assertion), but we haven't pulled the +trigger on it yet. Making this change will allow other code to become +simpler: for example, the opportunistic ACKs logic. + +Frame Records +============= + +Each frame written to ``po_data`` has an associated *frame record* stored +in ``po_frecs``: + +:: + + struct frame_rec { + union { + struct lsquic_stream *stream; + uintptr_t data; + } fe_u; + unsigned short fe_off, + fe_len; + enum quic_frame_type fe_frame_type; + }; + +Frame records are primarily used to keep track of the number of unacknowledged +stream frames for a stream. When a packet is acknowledged, the frame records +are iterated over and ``lsquic_stream_acked()`` is called. The second purpose +is to speed up packet resizing, as frame records record the type, position, +and size of a frame. + +Most of the time, a packet will contain a single frame: STREAM on the sender +of data and ACK on the receiver. This use case is optimized: ``po_frecs`` is +a union and when there is only one frame per packets, the frame record is +stored in the packet struct directly. + Evanescent Connection ********************* +*Files: lsquic_pr_queue.h, lsquic_pr_queue.c* + +"PR Queue" stands for "Packet Request Queue." This and the Evanescent +Connection object types are explaned below in this section. + +Overview +======== + +Some packets need to be replied to outside of context of existing +mini or full connections: + +1. A version negotiation packet needs to be sent when a packet + arrives that specifies QUIC version that we do not support. + +2. A stateless reset packet needs to be sent when we receive a + packet that does not belong to a known QUIC connection. + + +The replies cannot be sent immediately. They share outgoing +socket with existing connections and must be scheduled according +to prioritization rules. + +The information needed to generate reply packet -- connection ID, +connection context, and the peer address -- is saved in the Packet +Request Queue. + +When it is time to send packets, the connection iterator knows to +call prq_next_conn() when appropriate. What is returned is an +evanescent connection object that disappears as soon as the reply +packet is successfully sent out. + +There are two limits associated with Packet Request Queue: + +1. Maximum number of packet requests that are allowed to be + pending at any one time. This is simply to prevent memory + blowout. + +2. Maximum verneg connection objects to be allocated at any one + time. This number is the same as the maximum batch size in + the engine, because the packet (and, therefore, the connection) + is returned to the Packet Request Queue when it could not be + sent. + +We call this a "request" queue because it describes what we do with +QUIC packets whose version we do not support or those packets that +do not belong to an existing connection: we send a reply for each of +these packets, which effectively makes them "requests." + +Packet Requests +=============== + +When an incoming packet requires a non-connection response, it is added +to the Packet Request Queue. There is a single ``struct pr_queue`` per +engine -- it is instantiated if the engine is in the server mode. + +The packet request is recorded in ``struct packet_req``, which are kept +inside a hash in the PR Queue. The reason for keeping the requests in +a hash is to minimize duplicate responses: If a client hello message +is spread over several incoming packets, only one response carrying the +version negotiation packet (for example) will be sent. + +:: + + struct packet_req + { + struct lsquic_hash_elem pr_hash_el; + lsquic_cid_t pr_scid; + lsquic_cid_t pr_dcid; + enum packet_req_type pr_type; + enum pr_flags { + PR_GQUIC = 1 << 0, + } pr_flags; + enum lsquic_version pr_version; + unsigned pr_rst_sz; + struct network_path pr_path; + }; + +Responses are created on demand. Until that time, everything that is +necessary to generate the response is stored in ``packet_req``. + +Sending Responses +================= + +To make these packets fit into the usual packet-sending loop, +each response is made to resemble a packet +sent by a connecteion. For that, the PR Queue creates a connection +object that only lives for the duration of batching of the packet. +(Hence the connection's name: *evanescent* connection.) This connection +is returned by the ``lsquic_prq_next_conn()`` by the connection iterator +during the `batching process <#batching-packets>`__ + +For simplicity, the response packet is generated in this function as well. +The call to ``ci_next_packet_to_send()`` only returns the pointer to it. + Send Controller *************** @@ -2692,7 +3155,16 @@ Alarm Set *Files: lsquic_alarmset.h, lsquic_alarmset.c, test_alarmset.c* -TODO +The alarm set, ``struct lsquic_alarmset``, is an array of callbacks and +expiry times. To speed up operations, setting and unsetting alarms is +done via macros. + +The functions to ring [4]_ the alarms and to calculate the next alarm +time use a loop. It would be possible to maintain a different data +structure, such as a min-heap, to keep the alarm, and that would obviate +the need to loop in ``lsquic_alarmset_mintime()``. It is not worth it: +the function is not called often and a speed win here would be offset +by the necessity to maintain the min-heap ordering. Tickable Queue ************** @@ -2743,12 +3215,169 @@ memory that stores ``attq_elem`` stays put. This is why there are both CID Purgatory ************* +*Files: lsquic_purga.h, lsquic_purga.c* + +Overview +======== + +This module keeps a set of CIDs that should be ignored for a period +of time. It is used when a connection is closed: this way, late +packets will not create a new connection. + +A connection may have been deleted, retired, or closed. In the latter +case, it enters the `Draining State `__. +In this state, the connection is to ignore incoming packets. + +Structure +========= + +The purgatory keeps a list of 16-KB pages. A page looks like this: + +:: + + #define PURGA_ELS_PER_PAGE 273 + + struct purga_page + { + TAILQ_ENTRY(purga_page) pupa_next; + lsquic_time_t pupa_last; + unsigned pupa_count; + bloom_mask_el_t pupa_mask[BLOOM_N_MASK_ELS]; + lsquic_cid_t pupa_cids[PURGA_ELS_PER_PAGE]; + void * pupa_peer_ctx[PURGA_ELS_PER_PAGE]; + struct purga_el pupa_els[PURGA_ELS_PER_PAGE]; + }; + +The reason for having CIDs and peer contexts in separate arrays is to be +able to call the ``ea_old_scids()`` callback when a page expires. A page +is expired when it is full and the last added element is more than +``pur_min_life`` microseconds ago. The minimum CID life is hardcoded as +30 seconds in lsquic_engine.c (see the ``lsquic_purga_new()`` call). + +To avoid scannig the whole array of CIDs in ``lsquic_purga_contains()``, +we use a Bloom filter. + +The Bloom filter is constructed using a 8192-bit bit field and 6 hash +functions. With 273 elements per page, this gives us 0.004% possibility +of a false positive. In other words, when we do have to search a page +for a particular CID, the chance of finding the CID is 99.99%. + +Quick calculation: + +.. code-block:: text + + perl -E '$k=6;$m=1<<13;$n=273;printf("%f\n", (1-exp(1)**-($k*$n/$m))**$k)' + +To extract 6 13-bit values from a 64-bit integer, they are overlapped: + +.. code-block:: text + + 0 10 20 30 40 50 60 + +----------------------------------------------------------------+ + | | + +----------------------------------------------------------------+ + 1111111111111 + 2222222222222 + 3333333333333 + 4444444444444 + 5555555555555 + 6666666666666 + +This is not 100% kosher, but having 6 functions gives a better guarantee +and it happens to work in practice. + Memory Manager ************** +*Files: lsquic_mm.h, lsquic_mm.c* + +The memory manager allocates several types of objects that are used by +different parts of the library: + +- Incoming packet objects and associated buffers + +- Outgoing packet objects and associated buffers + +- Stream frames + +- Frame records + +- Mini connections, both Google and IETF QUIC + +- DCID elements + +- HTTP/3 (a.k.a. "HQ") frames + +- Four- and sixteen-kilobyte pages + +These objects are either stored on linked list or in `malo <#malo-allocator>`__ +pools and are shared among all connections. (Full connections allocate outgoing +packets from per-connection malo allocators: this is done to speed up `ACK +processing <#handling-acks>`__.) + +The list of cached outgoing packet buffers is shrunk once in a while (see +the "poolst\_*" functions). Other object types are kept in the cache +until the engine is destroyed. One Memory Manager object is allocated per +engine instance. + Malo Allocator ************** +*Files: lsquic_malo.h, lsquic_malo.c* + +Overview +======== + +The malo allocator is a pool of objects of fixed size. It tries to +allocate and deallocate objects as fast as possible. To do so, it +does the following: + +1. Allocations occur 4 KB at a time. + +2. No division or multiplication operations are performed for + appropriately sized objects. (More on this below.) + +(In recent testing, malo was about 2.7 times faster than malloc for +64-byte objects.) + +Besides speed, the allocator provides a convenient API: +To free (put) an object, one does not need a pointer to the malo +object. + +To gain all these advantages, there are trade-offs: + +1. There are two memory penalties: + + a. Per object overhead. If an object is at least ROUNDUP_THRESH in + size as the next power of two, the allocator uses that power of + two value as the object size. This is done to avoid using + division and multiplication. For example, a 104-byte object + will have a 24-byte overhead. + + b. Per page overhead. Page links occupy some bytes in the + page. To keep things fast, at least one slot per page is + always occupied, independent of object size. Thus, for a + 1 KB object size, 25% of the page is used for the page + header. + +2. 4 KB pages are not freed until the malo allocator is destroyed. + This is something to keep in mind. + +Internal Structure +================== + +The malo allocator allocates objects out of 4 KB pages. Each page is +aligned on a 4-KB memory boundary. This makes it possible for the +``lsquic_malo_put()`` function only to take on argument -- the object +to free -- and to find the malo allocator object itself. + +Each page begins with a header followed by a number of slots -- up to +the 4-KB limit. Two lists of pages are maintained: all pages and free +pages. A "free" page is a page with at least one free slot in it. + +The malo allocator (``struct malo``) stores itself in the first page, +occupying some slots. + Receive History *************** @@ -2987,6 +3616,20 @@ connection. Set64 ***** +*Files: lsquic_set.h, lsquic_set.h, test_set.c* + +This data structure (along with *Set32*, which is not currently used +anywhere in the code) is meant to keep track of a set of numbers that +are always increasing and are not expected to contain many gaps. +Stream IDs fit that description, and ``lsquic_set64`` is used in both +gQUIC and IETF QUIC full connections. + +Because one or two low bits in stream IDs contain stream type, the +stream IDs of different types are stored in different set structures; +otherwise, there would be gaps. For example, see the +``conn_is_stream_closed()`` functions (there is one in each gQUIC and +IETF QUIC full connection code). + Appendix A: List of Data Structures *********************************** @@ -3050,3 +3693,14 @@ QLOG Allocator <#malo-allocator>`__, which used to be limited to objects whose size is a power of two, so it was either fitting it into 128 bytes or effectively doubling the mini conn size. + +.. [3] + This two-step packet parsing mechanism is left over from the + little-endian to big-endian switch in gQUIC several years ago: + Before parsing out the packet number, it was necessary to know + whether it is little- or big-endian. It should be possible to + do away with this, especially once gQUIC is gone. + +.. [4] + This term was picked consciously: alarms *ring*, while timers do + other things, such as "fire" and so on. diff --git a/include/lsquic.h b/include/lsquic.h index 862981c34..344303cea 100644 --- a/include/lsquic.h +++ b/include/lsquic.h @@ -24,8 +24,8 @@ extern "C" { #endif #define LSQUIC_MAJOR_VERSION 2 -#define LSQUIC_MINOR_VERSION 29 -#define LSQUIC_PATCH_VERSION 6 +#define LSQUIC_MINOR_VERSION 30 +#define LSQUIC_PATCH_VERSION 0 /** * Engine flags: diff --git a/src/liblsquic/lsquic_adaptive_cc.h b/src/liblsquic/lsquic_adaptive_cc.h index 105ca4276..8b8fe4953 100644 --- a/src/liblsquic/lsquic_adaptive_cc.h +++ b/src/liblsquic/lsquic_adaptive_cc.h @@ -15,7 +15,7 @@ struct adaptive_cc struct lsquic_cubic acc_cubic; struct lsquic_bbr acc_bbr; enum { - ACC_CUBIC, /* If set, use Cubic; otherwise, use BBR */ + ACC_CUBIC = 1, /* If set, use Cubic; otherwise, use BBR */ } acc_flags; }; diff --git a/src/liblsquic/lsquic_alarmset.c b/src/liblsquic/lsquic_alarmset.c index 8879f989b..0b98597e5 100644 --- a/src/liblsquic/lsquic_alarmset.c +++ b/src/liblsquic/lsquic_alarmset.c @@ -45,6 +45,8 @@ const char *const lsquic_alid2str[] = [AL_CID_THROT] = "CID_THROT", [AL_PATH_CHAL_0] = "PATH_CHAL_0", [AL_PATH_CHAL_1] = "PATH_CHAL_1", + [AL_PATH_CHAL_2] = "PATH_CHAL_2", + [AL_PATH_CHAL_3] = "PATH_CHAL_3", [AL_SESS_TICKET] = "SESS_TICKET", [AL_BLOCKED_KA] = "BLOCKED_KA", [AL_MTU_PROBE] = "MTU_PROBE", diff --git a/src/liblsquic/lsquic_full_conn_ietf.c b/src/liblsquic/lsquic_full_conn_ietf.c index b563804dc..4ce0570d9 100644 --- a/src/liblsquic/lsquic_full_conn_ietf.c +++ b/src/liblsquic/lsquic_full_conn_ietf.c @@ -704,6 +704,7 @@ wipe_path (struct ietf_full_conn *conn, unsigned path_id) memset(&conn->ifc_paths[path_id], 0, sizeof(conn->ifc_paths[0])); conn->ifc_paths[path_id].cop_path.np_path_id = path_id; conn->ifc_paths[path_id].cop_path.np_peer_ctx = peer_ctx; + conn->ifc_used_paths &= ~(1 << path_id); } @@ -2755,6 +2756,20 @@ have_bidi_streams (const struct ietf_full_conn *conn) } +static int +conn_ok_to_close (const struct ietf_full_conn *conn) +{ + assert(conn->ifc_flags & IFC_CLOSING); + return !(conn->ifc_flags & IFC_SERVER) + || (conn->ifc_flags & IFC_RECV_CLOSE) + || ( + !lsquic_send_ctl_have_outgoing_stream_frames(&conn->ifc_send_ctl) + && !have_bidi_streams(conn) + && lsquic_send_ctl_have_unacked_stream_frames( + &conn->ifc_send_ctl) == 0); +} + + static void maybe_close_conn (struct ietf_full_conn *conn) { @@ -2763,8 +2778,13 @@ maybe_close_conn (struct ietf_full_conn *conn) && !have_bidi_streams(conn)) { conn->ifc_flags |= IFC_CLOSING|IFC_GOAWAY_CLOSE; - conn->ifc_send_flags |= SF_SEND_CONN_CLOSE; - LSQ_DEBUG("closing connection: GOAWAY sent and no responses remain"); + LSQ_DEBUG("maybe_close_conn: GOAWAY sent and no responses remain"); + if (conn_ok_to_close(conn)) + { + conn->ifc_send_flags |= SF_SEND_CONN_CLOSE; + LSQ_DEBUG("maybe_close_conn: ok to close: " + "schedule to send CONNECTION_CLOSE"); + } } } @@ -2920,7 +2940,12 @@ ietf_full_conn_ci_close (struct lsquic_conn *lconn) lsquic_stream_maybe_reset(stream, 0, 1); } conn->ifc_flags |= IFC_CLOSING; - conn->ifc_send_flags |= SF_SEND_CONN_CLOSE; + if (conn_ok_to_close(conn)) + { + conn->ifc_send_flags |= SF_SEND_CONN_CLOSE; + LSQ_DEBUG("ietf_full_conn_ci_close: ok to close: " + "schedule to send CONNECTION_CLOSE"); + } lsquic_engine_add_conn_to_tickable(conn->ifc_enpub, lconn); } } @@ -3204,7 +3229,8 @@ ietf_full_conn_ci_going_away (struct lsquic_conn *lconn) { if (!(conn->ifc_flags & (IFC_CLOSING|IFC_GOING_AWAY))) { - LSQ_INFO("connection marked as going away"); + LSQ_INFO("connection marked as going away, last stream: %ld", + conn->ifc_max_req_id); conn->ifc_flags |= IFC_GOING_AWAY; const lsquic_stream_id_t stream_id = conn->ifc_max_req_id + N_SITS; if (valid_stream_id(stream_id)) @@ -4340,20 +4366,6 @@ process_streams_write_events (struct ietf_full_conn *conn, int high_prio) } -static int -conn_ok_to_close (const struct ietf_full_conn *conn) -{ - assert(conn->ifc_flags & IFC_CLOSING); - return !(conn->ifc_flags & IFC_SERVER) - || (conn->ifc_flags & IFC_RECV_CLOSE) - || ( - !lsquic_send_ctl_have_outgoing_stream_frames(&conn->ifc_send_ctl) - && !have_bidi_streams(conn) - && lsquic_send_ctl_have_unacked_stream_frames( - &conn->ifc_send_ctl) == 0); -} - - static void generate_connection_close_packet (struct ietf_full_conn *conn) { @@ -8325,7 +8337,9 @@ ietf_full_conn_ci_tick (struct lsquic_conn *lconn, lsquic_time_t now) lsquic_send_ctl_maybe_app_limited(&conn->ifc_send_ctl, CUR_NPATH(conn)); end_write: - if ((conn->ifc_flags & IFC_CLOSING) && conn_ok_to_close(conn)) + if ((conn->ifc_flags & IFC_CLOSING) + && ((conn->ifc_send_flags & SF_SEND_CONN_CLOSE) + || conn_ok_to_close(conn))) { LSQ_DEBUG("connection is OK to close"); conn->ifc_flags |= IFC_TICK_CLOSE; diff --git a/src/liblsquic/lsquic_handshake.c b/src/liblsquic/lsquic_handshake.c index 33536fcfa..278154db7 100644 --- a/src/liblsquic/lsquic_handshake.c +++ b/src/liblsquic/lsquic_handshake.c @@ -133,6 +133,7 @@ typedef struct hs_ctx_st HSET_SCID = (1 << 2), HSET_IRTT = (1 << 3), HSET_SRST = (1 << 4), + HSET_XLCT = (1 << 5), /* xlct is set */ } set; enum { HOPT_NSTP = (1 << 0), /* NSTP option present in COPT */ @@ -283,6 +284,7 @@ struct lsquic_enc_session SSL_CTX * ssl_ctx; struct lsquic_engine_public *enpub; struct lsquic_str * cert_ptr; /* pointer to the leaf cert of the server, not real copy */ + uint64_t cert_hash; struct lsquic_str chlo; /* real copy of CHLO message */ struct lsquic_str sstk; struct lsquic_str ssno; @@ -310,6 +312,7 @@ typedef struct compress_cert_hash_item_st typedef struct cert_item_st { struct lsquic_str* crt; + uint64_t hash; /* Hash of `crt' */ struct lsquic_hash_elem hash_el; unsigned char key[0]; } cert_item_t; @@ -535,6 +538,8 @@ insert_cert (struct lsquic_engine_public *enpub, const unsigned char *key, item->crt = crt_copy; memcpy(item->key, key, key_sz); + item->hash = lsquic_fnv1a_64((const uint8_t *)lsquic_str_buf(crt), + lsquic_str_len(crt)); el = lsquic_hash_insert(enpub->enp_server_certs, item->key, key_sz, item, &item->hash_el); if (el) @@ -1267,6 +1272,13 @@ static int parse_hs_data (struct lsquic_enc_session *enc_session, uint32_t tag, break; case QTAG_XLCT: + if (len != sizeof(hs_ctx->xlct)) + { + LSQ_INFO("Unexpected size of XLCT: %u instead of %zu bytes", + len, sizeof(hs_ctx->xlct)); + return -1; + } + hs_ctx->set |= HSET_XLCT; hs_ctx->xlct = get_tag_value_i64(val, len); break; @@ -1664,7 +1676,6 @@ determine_rtts (struct lsquic_enc_session *enc_session, { hs_ctx_t *const hs_ctx = &enc_session->hs_ctx; enum hsk_failure_reason hfr; - uint64_t hash = 0; if (!(hs_ctx->set & HSET_SCID)) { @@ -1701,12 +1712,9 @@ determine_rtts (struct lsquic_enc_session *enc_session, goto fail_1rtt; } - if (hs_ctx->xlct) + if (hs_ctx->set & HSET_XLCT) { - hash = lsquic_fnv1a_64((const uint8_t *)lsquic_str_buf(enc_session->cert_ptr), - lsquic_str_len(enc_session->cert_ptr)); - - if (hash != hs_ctx->xlct) + if (enc_session->cert_hash != hs_ctx->xlct) { /* The expected leaf certificate hash could not be validated. */ hs_ctx->rrej = HFR_INVALID_EXPECTED_LEAF_CERTIFICATE; @@ -2297,6 +2305,7 @@ get_sni_SSL_CTX(struct lsquic_enc_session *enc_session, lsquic_lookup_cert_f cb, } } enc_session->cert_ptr = item->crt; + enc_session->cert_hash = item->hash; } else { @@ -2310,6 +2319,9 @@ get_sni_SSL_CTX(struct lsquic_enc_session *enc_session, lsquic_lookup_cert_f cb, if (!enc_session->cert_ptr) return GET_SNI_ERR; enc_session->es_flags |= ES_FREE_CERT_PTR; + enc_session->cert_hash = lsquic_fnv1a_64( + (const uint8_t *) lsquic_str_buf(enc_session->cert_ptr), + lsquic_str_len(enc_session->cert_ptr)); } } return GET_SNI_OK; diff --git a/src/liblsquic/lsquic_headers.h b/src/liblsquic/lsquic_headers.h index f867601d5..a8fe33aa2 100644 --- a/src/liblsquic/lsquic_headers.h +++ b/src/liblsquic/lsquic_headers.h @@ -36,6 +36,8 @@ struct uncompressed_headers UH_H1H = (1 << 2), /* uh_hset points to http1x_headers */ } uh_flags:8; void *uh_hset; + struct uncompressed_headers + *uh_next; }; #endif diff --git a/src/liblsquic/lsquic_mm.h b/src/liblsquic/lsquic_mm.h index 14f6169b9..9575998d3 100644 --- a/src/liblsquic/lsquic_mm.h +++ b/src/liblsquic/lsquic_mm.h @@ -36,7 +36,6 @@ struct lsquic_mm { struct malo *frame_rec_arr; /* For struct frame_rec_arr */ struct malo *mini_conn; /* For struct mini_conn */ struct malo *mini_conn_ietf;/* For struct ietf_mini_conn */ - struct malo *retry_conn; /* For struct retry_conn */ struct malo *packet_in; /* For struct lsquic_packet_in */ struct malo *packet_out; /* For struct lsquic_packet_out */ struct malo *dcid_elem; /* For struct dcid_elem */ diff --git a/src/liblsquic/lsquic_qdec_hdl.c b/src/liblsquic/lsquic_qdec_hdl.c index d8dfdf67a..062a9cf26 100644 --- a/src/liblsquic/lsquic_qdec_hdl.c +++ b/src/liblsquic/lsquic_qdec_hdl.c @@ -641,7 +641,7 @@ qdh_header_read_results (struct qpack_dec_hdl *qdh, if (rhs == LQRHS_DONE) { - if (!lsquic_stream_header_is_trailer(stream)) + if (1) //!lsquic_stream_header_is_trailer(stream)) { if (stream->sm_hblock_ctx->ctx.ppc_flags & (PPC_INC_SET|PPC_URG_SET)) diff --git a/src/liblsquic/lsquic_send_ctl.c b/src/liblsquic/lsquic_send_ctl.c index 4081b4505..d5d5d6246 100644 --- a/src/liblsquic/lsquic_send_ctl.c +++ b/src/liblsquic/lsquic_send_ctl.c @@ -546,6 +546,8 @@ send_ctl_transfer_time (void *ctx) in_recovery = send_ctl_in_recovery(ctl); pacing_rate = ctl->sc_ci->cci_pacing_rate(CGP(ctl), in_recovery); + if (!pacing_rate) + pacing_rate = 1; tx_time = (uint64_t) SC_PACK_SIZE(ctl) * 1000000 / pacing_rate; return tx_time; } @@ -3788,6 +3790,8 @@ lsquic_send_ctl_can_send_probe (const struct lsquic_send_ctl *ctl, if (n_out + path->np_pack_size >= cwnd) return 0; pacing_rate = ctl->sc_ci->cci_pacing_rate(CGP(ctl), 0); + if (!pacing_rate) + pacing_rate = 1; tx_time = (uint64_t) path->np_pack_size * 1000000 / pacing_rate; return lsquic_pacer_can_schedule_probe(&ctl->sc_pacer, ctl->sc_n_scheduled + ctl->sc_n_in_flight_all, tx_time); diff --git a/src/liblsquic/lsquic_stream.c b/src/liblsquic/lsquic_stream.c index 931f87ff9..2651e10d7 100644 --- a/src/liblsquic/lsquic_stream.c +++ b/src/liblsquic/lsquic_stream.c @@ -623,15 +623,13 @@ lsquic_stream_drop_hset_ref (struct lsquic_stream *stream) static void -destroy_uh (struct lsquic_stream *stream) +destroy_uh (struct uncompressed_headers *uh, const struct lsquic_hset_if *hsi_if) { - if (stream->uh) + if (uh) { - if (stream->uh->uh_hset) - stream->conn_pub->enpub->enp_hsi_if - ->hsi_discard_header_set(stream->uh->uh_hset); - free(stream->uh); - stream->uh = NULL; + if (uh->uh_hset) + hsi_if->hsi_discard_header_set(uh->uh_hset); + free(uh); } } @@ -641,6 +639,7 @@ lsquic_stream_destroy (lsquic_stream_t *stream) { struct push_promise *promise; struct stream_hq_frame *shf; + struct uncompressed_headers *uh; stream->stream_flags |= STREAM_U_WRITE_DONE|STREAM_U_READ_DONE; if ((stream->stream_flags & (STREAM_ONNEW_DONE|STREAM_ONCLOSE_DONE)) == @@ -687,7 +686,12 @@ lsquic_stream_destroy (lsquic_stream_t *stream) } while ((shf = STAILQ_FIRST(&stream->sm_hq_frames))) stream_hq_frame_put(stream, shf); - destroy_uh(stream); + while(stream->uh) + { + uh = stream->uh; + stream->uh = uh->uh_next; + destroy_uh(uh, stream->conn_pub->enpub->enp_hsi_if); + } free(stream->sm_buf); free(stream->sm_header_block); LSQ_DEBUG("destroyed stream"); @@ -1443,7 +1447,8 @@ static size_t read_uh (struct lsquic_stream *stream, size_t (*readf)(void *, const unsigned char *, size_t, int), void *ctx) { - struct http1x_headers *const h1h = stream->uh->uh_hset; + struct uncompressed_headers *uh = stream->uh; + struct http1x_headers *const h1h = uh->uh_hset; size_t nread; nread = readf(ctx, (unsigned char *) h1h->h1h_buf + h1h->h1h_off, @@ -1452,8 +1457,10 @@ read_uh (struct lsquic_stream *stream, h1h->h1h_off += nread; if (h1h->h1h_off == h1h->h1h_size) { - LSQ_DEBUG("read all uncompressed headers"); - destroy_uh(stream); + stream->uh = uh->uh_next; + LSQ_DEBUG("read all uncompressed headers from uh: %p, next uh: %p", + uh, stream->uh); + destroy_uh(uh, stream->conn_pub->enpub->enp_hsi_if); if (stream->stream_flags & STREAM_HEAD_IN_FIN) { stream->stream_flags |= STREAM_FIN_REACHED; @@ -4186,7 +4193,7 @@ lsquic_stream_send_headers (lsquic_stream_t *stream, const lsquic_http_headers_t *headers, int eos) { if ((stream->sm_bflags & SMBF_USE_HEADERS) - && !(stream->stream_flags & (STREAM_HEADERS_SENT|STREAM_U_WRITE_DONE))) + && !(stream->stream_flags & (STREAM_U_WRITE_DONE))) { if (stream->sm_bflags & SMBF_IETF) return send_headers_ietf(stream, headers, eos); @@ -4411,15 +4418,19 @@ static int stream_uh_in_gquic (struct lsquic_stream *stream, struct uncompressed_headers *uh) { - if ((stream->sm_bflags & SMBF_USE_HEADERS) - && !(stream->stream_flags & STREAM_HAVE_UH)) + struct uncompressed_headers **next; + if ((stream->sm_bflags & SMBF_USE_HEADERS)) { SM_HISTORY_APPEND(stream, SHE_HEADERS_IN); LSQ_DEBUG("received uncompressed headers"); stream->stream_flags |= STREAM_HAVE_UH; if (uh->uh_flags & UH_FIN) stream->stream_flags |= STREAM_FIN_RECVD|STREAM_HEAD_IN_FIN; - stream->uh = uh; + next = &stream->uh; + while(*next) + next = &(*next)->uh_next; + *next = uh; + assert(uh->uh_next == NULL); if (uh->uh_oth_stream_id == 0) { if (uh->uh_weight) @@ -4443,9 +4454,10 @@ stream_uh_in_ietf (struct lsquic_stream *stream, struct uncompressed_headers *uh) { int push_promise; + struct uncompressed_headers **next; push_promise = lsquic_stream_header_is_pp(stream); - if (!(stream->stream_flags & STREAM_HAVE_UH) && !push_promise) + if (!push_promise) { SM_HISTORY_APPEND(stream, SHE_HEADERS_IN); LSQ_DEBUG("received uncompressed headers"); @@ -4460,7 +4472,11 @@ stream_uh_in_ietf (struct lsquic_stream *stream, && lsquic_stream_is_pushed(stream)); stream->stream_flags |= STREAM_FIN_RECVD|STREAM_HEAD_IN_FIN; } - stream->uh = uh; + next = &stream->uh; + while(*next) + next = &(*next)->uh_next; + *next = uh; + assert(uh->uh_next == NULL); if (uh->uh_oth_stream_id == 0) { if (uh->uh_weight) @@ -4652,6 +4668,7 @@ void * lsquic_stream_get_hset (struct lsquic_stream *stream) { void *hset; + struct uncompressed_headers *uh; if (stream_is_read_reset(stream)) { @@ -4676,9 +4693,14 @@ lsquic_stream_get_hset (struct lsquic_stream *stream) hset = stream->uh->uh_hset; stream->uh->uh_hset = NULL; - destroy_uh(stream); + + uh = stream->uh; + stream->uh = uh->uh_next; + free(uh); + if (stream->stream_flags & STREAM_HEAD_IN_FIN) { + stream->stream_flags |= STREAM_FIN_REACHED; SM_HISTORY_APPEND(stream, SHE_REACH_FIN); } @@ -4706,31 +4728,21 @@ static int update_type_hist_and_check (const struct lsquic_stream *stream, struct hq_filter *filter) { - /* 3-bit codes: */ - enum { - CODE_UNSET, - CODE_HEADER, /* H Header */ - CODE_DATA, /* D Data */ - CODE_PLUS, /* + Plus: meaning previous frame repeats */ - }; - static const unsigned valid_seqs[] = { - /* Ordered by expected frequency */ - 0123, /* HD+ */ - 012, /* HD */ - 01, /* H */ - 013, /* H+ */ /* Really HH, but we don't record it like this */ - 01231, /* HD+H */ - 0121, /* HDH */ - }; - unsigned code, i; - switch (filter->hqfi_type) { case HQFT_HEADERS: - code = CODE_HEADER; + if (filter->hqfi_flags & HQFI_FLAG_TRAILER) + return -1; + if (filter->hqfi_flags & HQFI_FLAG_DATA) + filter->hqfi_flags |= HQFI_FLAG_TRAILER; + else + filter->hqfi_flags |= HQFI_FLAG_HEADER; break; case HQFT_DATA: - code = CODE_DATA; + if ((filter->hqfi_flags & (HQFI_FLAG_HEADER + | HQFI_FLAG_TRAILER)) != HQFI_FLAG_HEADER) + return -1; + filter->hqfi_flags |= HQFI_FLAG_DATA; break; case HQFT_PUSH_PROMISE: /* [draft-ietf-quic-http-24], Section 7 */ @@ -4768,31 +4780,7 @@ update_type_hist_and_check (const struct lsquic_stream *stream, return 0; } - if (filter->hqfi_hist_idx >= MAX_HQFI_ENTRIES) - return -1; - - if (filter->hqfi_hist_idx && (filter->hqfi_hist_buf & 7) == code) - { - filter->hqfi_hist_buf <<= 3; - filter->hqfi_hist_buf |= CODE_PLUS; - filter->hqfi_hist_idx++; - } - else if (filter->hqfi_hist_idx > 1 - && ((filter->hqfi_hist_buf >> 3) & 7) == code - && (filter->hqfi_hist_buf & 7) == CODE_PLUS) - /* Keep it at plus, do nothing */; - else - { - filter->hqfi_hist_buf <<= 3; - filter->hqfi_hist_buf |= code; - filter->hqfi_hist_idx++; - } - - for (i = 0; i < sizeof(valid_seqs) / sizeof(valid_seqs[0]); ++i) - if (filter->hqfi_hist_buf == valid_seqs[i]) - return 0; - - return -1; + return 0; } @@ -4860,8 +4848,7 @@ hq_read (void *ctx, const unsigned char *buf, size_t sz, int fin) { lconn = stream->conn_pub->lconn; filter->hqfi_flags |= HQFI_FLAG_ERROR; - LSQ_INFO("unexpected HTTP/3 frame sequence: %o", - filter->hqfi_hist_buf); + LSQ_INFO("unexpected HTTP/3 frame sequence"); lconn->cn_if->ci_abort_error(lconn, 1, HEC_FRAME_UNEXPECTED, "unexpected HTTP/3 frame sequence on stream %"PRIu64, stream->id); diff --git a/src/liblsquic/lsquic_stream.h b/src/liblsquic/lsquic_stream.h index d9969158f..cbbc0d4f3 100644 --- a/src/liblsquic/lsquic_stream.h +++ b/src/liblsquic/lsquic_stream.h @@ -109,6 +109,9 @@ struct hq_filter HQFI_FLAG_ERROR = 1 << 1, HQFI_FLAG_BEGIN = 1 << 2, HQFI_FLAG_BLOCKED = 1 << 3, + HQFI_FLAG_HEADER = 1 << 4, + HQFI_FLAG_DATA = 1 << 5, + HQFI_FLAG_TRAILER = 1 << 6, } hqfi_flags:8; enum { HQFI_STATE_FRAME_HEADER_BEGIN, @@ -117,9 +120,6 @@ struct hq_filter HQFI_STATE_PUSH_ID_BEGIN, HQFI_STATE_PUSH_ID_CONTINUE, } hqfi_state:8; - unsigned char hqfi_hist_idx; -#define MAX_HQFI_ENTRIES (sizeof(unsigned) * 8 / 3) - unsigned hqfi_hist_buf; };