Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

UDP GSO support #135

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions bin/prog.c
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,8 @@ prog_print_common_options (const struct prog *prog, FILE *out)
" sndbuf=12345 # Sets SO_SNDBUF\n"
" rcvbuf=12345 # Sets SO_RCVBUF\n"
" -W Use stock PMI (malloc & free)\n"
" -O BURST Use UDP GSO (if available). BURST factor is the max packets\n"
" that can be aggregated in single sendmsg.\n"
);

#if HAVE_SENDMMSG
Expand Down Expand Up @@ -222,6 +224,34 @@ prog_print_common_options (const struct prog *prog, FILE *out)
);
}

#if HAVE_GSO
/* Test at runtime if the GSO support is available. setsockopt(UDP_SEGMENT)
* should be successful if the GSO is supported.
* Returns non-zero if GSO supported. */
int supports_gso(void)
{
int fd = socket(AF_INET, SOCK_DGRAM, 0);
int gso_size = 1400; // just for test

if(fd < 0) {
LSQ_ERROR("weird! socket failed");
return 0;
}
if (setsockopt(fd, SOL_UDP, UDP_SEGMENT, &gso_size, sizeof(gso_size))) {
LSQ_INFO("gso setsockopt failed. GSO not supp");
close(fd);
return 0;
}
LSQ_INFO("GSO is supported");
close(fd);
return 1;
}
#else
int supports_gso(void)
{
return 0;
}
#endif // HAVE_GSO

int
prog_set_opt (struct prog *prog, int opt, const char *arg)
Expand All @@ -233,6 +263,11 @@ prog_set_opt (struct prog *prog, int opt, const char *arg)

switch (opt)
{
case 'O':
if(supports_gso()) {
prog->prog_gso_burst = (unsigned)atoi(arg);
}
return 0;
#if LSQUIC_DONTFRAG_SUPPORTED
case 'D':
{
Expand Down
3 changes: 2 additions & 1 deletion bin/prog.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ struct prog
unsigned short prog_max_packet_size;
int prog_version_cleared;
unsigned long prog_read_count;
unsigned prog_gso_burst;
#if HAVE_SENDMMSG
int prog_use_sendmmsg;
#endif
Expand Down Expand Up @@ -75,7 +76,7 @@ prog_init (struct prog *, unsigned lsquic_engine_flags, struct sport_head *,
# define IP_DONTFRAG_FLAG ""
#endif

#define PROG_OPTS "i:km:c:y:L:l:o:H:s:S:Y:z:G:W" RECVMMSG_FLAG SENDMMSG_FLAG \
#define PROG_OPTS "O:i:km:c:y:L:l:o:H:s:S:Y:z:G:W" RECVMMSG_FLAG SENDMMSG_FLAG \
IP_DONTFRAG_FLAG

/* Returns:
Expand Down
174 changes: 154 additions & 20 deletions bin/test_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#ifndef WIN32
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/udp.h>
#include <arpa/inet.h>
#include <sys/socket.h>
#include <unistd.h>
Expand Down Expand Up @@ -1255,6 +1256,7 @@ enum ctl_what
#if ECN_SUPPORTED
CW_ECN = 1 << 1,
#endif
CW_PKTLEN = 1 << 2,
};

static void
Expand All @@ -1264,7 +1266,7 @@ setup_control_msg (
#else
WSAMSG
#endif
*msg, enum ctl_what cw,
*msg, enum ctl_what cw, uint16_t pktlen,
const struct lsquic_out_spec *spec, unsigned char *buf, size_t bufsz)
{
struct cmsghdr *cmsg;
Expand Down Expand Up @@ -1368,6 +1370,17 @@ setup_control_msg (
}
cw &= ~CW_ECN;
}
#endif
#if HAVE_GSO
else if (cw & CW_PKTLEN)
{
cmsg->cmsg_level = SOL_UDP;
cmsg->cmsg_type = UDP_SEGMENT;
cmsg->cmsg_len = CMSG_LEN(sizeof(uint16_t));
ctl_len += CMSG_SPACE(sizeof(uint16_t));
*(uint16_t *)CMSG_DATA(cmsg) = pktlen;
cw &= ~CW_PKTLEN;
}
#endif
else
assert(0);
Expand All @@ -1380,25 +1393,30 @@ setup_control_msg (
#endif
}

#ifndef NDEBUG
void check_if_single_peer(const struct lsquic_out_spec *specs,
unsigned count)
{
void *ctx;
unsigned i;
for (i = 1, ctx = specs[i].peer_ctx;
i < count;
ctx = specs[i].peer_ctx, ++i)
assert(ctx == specs[i - 1].peer_ctx);
}
#endif

#if HAVE_SENDMMSG
static int
send_packets_using_sendmmsg (const struct lsquic_out_spec *specs,
unsigned count)
{
#ifndef NDEBUG
{
/* This only works for a single port! If the specs contain more
* than one socket, this function does *NOT* work. We check it
* here just in case:
*/
void *ctx;
unsigned i;
for (i = 1, ctx = specs[i].peer_ctx;
i < count;
ctx = specs[i].peer_ctx, ++i)
assert(ctx == specs[i - 1].peer_ctx);
}
/* This only works for a single port! If the specs contain more
* than one socket, this function does *NOT* work. We check it
* here just in case:
*/
check_if_single_peer(specs, count);
#endif

const struct service_port *const sport = specs[0].peer_ctx;
Expand Down Expand Up @@ -1469,7 +1487,7 @@ send_packets_using_sendmmsg (const struct lsquic_out_spec *specs,
else if (cw)
{
prev_ancil_key = ancil_key;
setup_control_msg(&mmsgs[i].msg_hdr, cw, &specs[i], ancil[i].buf,
setup_control_msg(&mmsgs[i].msg_hdr, cw, 0, &specs[i], ancil[i].buf,
sizeof(ancil[i].buf));
}
else
Expand Down Expand Up @@ -1535,9 +1553,13 @@ find_sport (struct prog *prog, const struct sockaddr *local_sa)

#endif


/*
* Non-zero pktlen indicates use of UDP GSO. pktlen is used to create gso_size
* CMSG needed for UDP GSO.
*/
static int
send_packets_one_by_one (const struct lsquic_out_spec *specs, unsigned count)
send_packets_one_by_one (const struct lsquic_out_spec *specs, unsigned count,
uint16_t pktlen)
{
const struct service_port *sport;
enum ctl_what cw;
Expand All @@ -1562,6 +1584,7 @@ send_packets_one_by_one (const struct lsquic_out_spec *specs, unsigned count)
#if ECN_SUPPORTED
+ CMSG_SPACE(sizeof(int))
#endif
+ CMSG_SPACE(sizeof(uint16_t))
];
struct cmsghdr cmsg;
} ancil;
Expand Down Expand Up @@ -1639,6 +1662,10 @@ send_packets_one_by_one (const struct lsquic_out_spec *specs, unsigned count)
ancil_key |= specs[n].ecn;
}
#endif
if (pktlen)
{
cw |= CW_PKTLEN;
}
if (cw && prev_ancil_key == ancil_key)
{
/* Reuse previous ancillary message */
Expand All @@ -1647,7 +1674,7 @@ send_packets_one_by_one (const struct lsquic_out_spec *specs, unsigned count)
else if (cw)
{
prev_ancil_key = ancil_key;
setup_control_msg(&msg, cw, &specs[n], ancil.buf, sizeof(ancil.buf));
setup_control_msg(&msg, cw, pktlen, &specs[n], ancil.buf, sizeof(ancil.buf));
}
else
{
Expand Down Expand Up @@ -1697,18 +1724,125 @@ send_packets_one_by_one (const struct lsquic_out_spec *specs, unsigned count)
}
}

#if HAVE_GSO
/* UDP GSO
* Refs:
* https://lwn.net/Articles/752956/
* http://vger.kernel.org/lpc_net2018_talks/willemdebruijn-lpc2018-udpgso-paper-DRAFT-1.pdf
*/

static int send_iovecs_gso(struct lsquic_out_spec *spec,
struct iovec *vecs, unsigned vcnt)
{
spec->iov = vecs;
spec->iovlen = vcnt;
LSQ_DEBUG("GSO with burst:%d", vcnt);
return send_packets_one_by_one (spec, 1, vcnt > 1? vecs[0].iov_len: 0);
}

/* return true if specs match */
int match_spec(const struct lsquic_out_spec *s1,
const struct lsquic_out_spec *s2)
{
if (s1->local_sa != s2->local_sa ||
s1->dest_sa != s2->dest_sa ||
s1->peer_ctx != s2->peer_ctx ||
s1->ecn != s2->ecn)
return 0;
return 1;
}

/*
* To use GSO the flow here is:
* a. Check all the equal length iovs and batch them in single iovec array and
* pass it in spec.
* b. Batching inter-spec iovs can happen only if all the spec parameters
* (peer_ctx, sa, ecn) match.
* c. The last packet in the iovec array can be of smaller length then all the
* earlier packets.
*/
static int
send_packets_using_gso (const unsigned burst,
const struct lsquic_out_spec *specs,
unsigned count)
{
struct iovec vecs[burst];
struct iovec *inv;
struct lsquic_out_spec newspec;
unsigned i, j, vcnt = 0;
int ret;

if (count == 0 || specs[0].iovlen == 0) {
LSQ_ERROR("Sanity failed. count=%d, specs iovlen=%zu",
count, specs[0].iovlen);
return 0;
}

/* Coalesce packets with same lengths, except that the last packet can be
* of smaller length. Colasece max gso_burst packets. */
for (i = 0; i < count; ++i)
{
if (vcnt == 0) {
memcpy(&newspec, &specs[i], sizeof(struct lsquic_out_spec));
} else if(!match_spec(&newspec, &specs[i])) {
// new specs dont match prev ones, so send the previous iovec batch
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When there are two connections (or more) that send packets, the lsquic engine interleaves the packets. In that scenario, you'd end up with sending single packets most of the time. I believe a better approach is to perform the check whether all the specs match in sport_packets_out(). If they don't match, use non-GSO sending method.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, I was aware that two connections packets could be interleaved in the same call.

The match_spec(&newspec, &specs[i]) takes care of identifying it. If the spec differs then the batching is stopped and whatever batch is currently available is sent. Does that make sense?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it makes sense. But I wanted to emphasize that when there is more than one connection, one is likely to end up with specs never matching and thus, given N packets to send, we'll send N batches of one. That's why I suggested to perform the check earlier and use the result to pick the sending function: GSO or non-GSO.

ret = send_iovecs_gso (&newspec, vecs, vcnt);
if(ret == 0) {
LSQ_ERROR("Partial send1");
return i-1;
}
vcnt = 0;
continue;
}
for (j = 0; j < specs[i].iovlen; ++j)
{
inv = &specs[i].iov[j];
if (vcnt == 0) {
vecs[vcnt++] = *inv;
continue;
}
if (inv->iov_len > vecs[0].iov_len) {
ret = send_iovecs_gso (&newspec, vecs, vcnt);
vcnt = 0;
} else if ((inv->iov_len != vecs[0].iov_len) || (vcnt >= (burst-1))) {
vecs[vcnt++] = *inv;
ret = send_iovecs_gso (&newspec, vecs, vcnt);
vcnt = 0;
} else {
vecs[vcnt++] = *inv;
}
if(vcnt == 0 && ret == 0) {
LSQ_ERROR("Partial send2");
return i-1;
}
}
}
if (vcnt) {
ret = send_iovecs_gso (&newspec, vecs, vcnt);
if(ret == 0) {
LSQ_ERROR("Partial send3");
return count-1;
}
}
return count;
}
#endif // HAVE_GSO

int
sport_packets_out (void *ctx, const struct lsquic_out_spec *specs,
unsigned count)
{
#if HAVE_SENDMMSG
const struct prog *prog = ctx;
#if HAVE_GSO
if (prog->prog_gso_burst > 0)
return send_packets_using_gso(prog->prog_gso_burst, specs, count);
else
#endif
#if HAVE_SENDMMSG
if (prog->prog_use_sendmmsg)
return send_packets_using_sendmmsg(specs, count);
else
#endif
return send_packets_one_by_one(specs, count);
return send_packets_one_by_one(specs, count, 0);
}


Expand Down
15 changes: 15 additions & 0 deletions bin/test_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,21 @@ destroy_lsquic_reader_ctx (struct reader_ctx *ctx);
#define LITESPEED_ID "lsquic" "/" TOSTRING(LSQUIC_MAJOR_VERSION) "." \
TOSTRING(LSQUIC_MINOR_VERSION) "." TOSTRING(LSQUIC_PATCH_VERSION)

#ifndef WIN32
#ifndef UDP_SEGMENT
#define UDP_SEGMENT 103
#endif
#endif

#if __linux__
#ifndef HAVE_GSO
#if defined(SOL_UDP) && defined(UDP_SEGMENT)
#include <netinet/udp.h>
#define HAVE_GSO 1
#endif
#endif
#endif

struct header_buf
{
unsigned off;
Expand Down