diff options
author | Jason A. Donenfeld <Jason@zx2c4.com> | 2016-11-04 12:55:13 +0100 |
---|---|---|
committer | Jason A. Donenfeld <Jason@zx2c4.com> | 2016-11-04 14:22:48 +0100 |
commit | 5d304c0863b43f3149f48e4cadf1e95bcba26706 (patch) | |
tree | ea8fbb281899b2e67bf9e2670ad13766b225a435 | |
parent | 91ff061cc5efd24fc9a6a0324f340c9c27aa8489 (diff) |
send: queue bundles on same CPU
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
-rw-r--r-- | src/data.c | 149 | ||||
-rw-r--r-- | src/packets.h | 16 | ||||
-rw-r--r-- | src/send.c | 191 |
3 files changed, 140 insertions, 216 deletions
@@ -11,6 +11,7 @@ #include <linux/slab.h> #include <linux/bitmap.h> #include <linux/scatterlist.h> +#include <net/ip_tunnels.h> #include <net/xfrm.h> #include <crypto/algapi.h> @@ -75,11 +76,21 @@ static inline void skb_reset(struct sk_buff *skb) skb_probe_transport_header(skb, 0); } -static inline void skb_encrypt(struct sk_buff *skb, struct packet_data_encryption_ctx *ctx) +struct packet_data_encryption_ctx { + uint8_t ds; + uint8_t num_frags; + unsigned int plaintext_len, trailer_len; + struct sk_buff *trailer; + uint64_t nonce; +}; + +static inline void skb_encrypt(struct sk_buff *skb, struct noise_keypair *keypair) { + struct packet_data_encryption_ctx *ctx = (struct packet_data_encryption_ctx *)skb->cb; struct scatterlist sg[ctx->num_frags]; /* This should be bound to at most 128 by the caller. */ struct message_data *header; + /* We have to remember to add the checksum to the innerpacket, in case the receiver forwards it. */ if (likely(!skb_checksum_setup(skb, true))) skb_checksum_help(skb); @@ -87,17 +98,14 @@ static inline void skb_encrypt(struct sk_buff *skb, struct packet_data_encryptio /* Only after checksumming can we safely add on the padding at the end and the header. */ header = (struct message_data *)skb_push(skb, sizeof(struct message_data)); header->header.type = MESSAGE_DATA; - header->key_idx = ctx->keypair->remote_index; + header->key_idx = keypair->remote_index; header->counter = cpu_to_le64(ctx->nonce); pskb_put(skb, ctx->trailer, ctx->trailer_len); /* Now we can encrypt the scattergather segments */ sg_init_table(sg, ctx->num_frags); skb_to_sgvec(skb, sg, sizeof(struct message_data), noise_encrypted_len(ctx->plaintext_len)); - chacha20poly1305_encrypt_sg(sg, sg, ctx->plaintext_len, NULL, 0, ctx->nonce, ctx->keypair->sending.key); - - /* When we're done, we free the reference to the key pair */ - noise_keypair_put(ctx->keypair); + chacha20poly1305_encrypt_sg(sg, sg, ctx->plaintext_len, NULL, 0, ctx->nonce, keypair->sending.key); } static inline bool skb_decrypt(struct sk_buff *skb, uint8_t num_frags, uint64_t nonce, struct noise_symmetric_key *key) @@ -140,23 +148,43 @@ static inline bool get_encryption_nonce(uint64_t *nonce, struct noise_symmetric_ return true; } +struct packet_bundle_ctx { + struct padata_priv padata; + struct sk_buff_head queue; + void (*callback)(struct sk_buff_head *, struct wireguard_peer *); + struct wireguard_peer *peer; + struct noise_keypair *keypair; +}; + +static inline void queue_encrypt_reset(struct sk_buff_head *queue, struct noise_keypair *keypair) +{ + struct sk_buff *skb; + /* TODO: as a later optimization, we can activate the FPU just once + * for the entire loop, rather than turning it on and off for each + * packet. */ + skb_queue_walk(queue, skb) { + skb_encrypt(skb, keypair); + skb_reset(skb); + } + noise_keypair_put(keypair); +} + #ifdef CONFIG_WIREGUARD_PARALLEL static void do_encryption(struct padata_priv *padata) { - struct packet_data_encryption_ctx *ctx = container_of(padata, struct packet_data_encryption_ctx, padata); - - skb_encrypt(ctx->skb, ctx); - skb_reset(ctx->skb); + struct packet_bundle_ctx *ctx = container_of(padata, struct packet_bundle_ctx, padata); + queue_encrypt_reset(&ctx->queue, ctx->keypair); padata_do_serial(padata); } static void finish_encryption(struct padata_priv *padata) { - struct packet_data_encryption_ctx *ctx = container_of(padata, struct packet_data_encryption_ctx, padata); + struct packet_bundle_ctx *ctx = container_of(padata, struct packet_bundle_ctx, padata); - ctx->callback(ctx->skb, ctx->peer); + ctx->callback(&ctx->queue, ctx->peer); peer_put(ctx->peer); + kfree(ctx); } static inline int start_encryption(struct padata_instance *padata, struct padata_priv *priv, int cb_cpu) @@ -181,15 +209,11 @@ static inline unsigned int choose_cpu(__le32 key) } #endif -int packet_create_data(struct sk_buff *skb, struct wireguard_peer *peer, void(*callback)(struct sk_buff *, struct wireguard_peer *), bool parallel) +int packet_create_data(struct sk_buff_head *queue, struct wireguard_peer *peer, void(*callback)(struct sk_buff_head *, struct wireguard_peer *)) { int ret = -ENOKEY; struct noise_keypair *keypair; - struct packet_data_encryption_ctx *ctx = NULL; - u64 nonce; - struct sk_buff *trailer = NULL; - unsigned int plaintext_len, padding_len, trailer_len; - unsigned int num_frags; + struct sk_buff *skb; rcu_read_lock(); keypair = noise_keypair_get(rcu_dereference(peer->keypairs.current_keypair)); @@ -197,60 +221,77 @@ int packet_create_data(struct sk_buff *skb, struct wireguard_peer *peer, void(*c goto err_rcu; rcu_read_unlock(); - if (unlikely(!get_encryption_nonce(&nonce, &keypair->sending))) - goto err; + skb_queue_walk(queue, skb) { + struct packet_data_encryption_ctx *ctx = (struct packet_data_encryption_ctx *)skb->cb; + unsigned int padding_len, num_frags; - padding_len = skb_padding(skb); - trailer_len = padding_len + noise_encrypted_len(0); - plaintext_len = skb->len + padding_len; + BUILD_BUG_ON(sizeof(struct packet_data_encryption_ctx) > sizeof(skb->cb)); - /* Expand data section to have room for padding and auth tag */ - ret = skb_cow_data(skb, trailer_len, &trailer); - if (unlikely(ret < 0)) - goto err; - num_frags = ret; - ret = -ENOMEM; - if (unlikely(num_frags > 128)) - goto err; + if (unlikely(!get_encryption_nonce(&ctx->nonce, &keypair->sending))) + goto err; - /* Set the padding to zeros, and make sure it and the auth tag are part of the skb */ - memset(skb_tail_pointer(trailer), 0, padding_len); + padding_len = skb_padding(skb); + ctx->trailer_len = padding_len + noise_encrypted_len(0); + ctx->plaintext_len = skb->len + padding_len; - /* Expand head section to have room for our header and the network stack's headers, - * plus our key and nonce in the head. */ - ret = skb_cow_head(skb, DATA_PACKET_HEAD_ROOM); - if (unlikely(ret < 0)) - goto err; + /* Store the ds bit in the cb */ + ctx->ds = ip_tunnel_ecn_encap(0 /* No outer TOS: no leak. TODO: should we use flowi->tos as outer? */, ip_hdr(skb), skb); - ctx = (struct packet_data_encryption_ctx *)skb->head; - ctx->skb = skb; - ctx->callback = callback; - ctx->peer = peer; - ctx->num_frags = num_frags; - ctx->trailer_len = trailer_len; - ctx->trailer = trailer; - ctx->plaintext_len = plaintext_len; - ctx->nonce = nonce; - ctx->keypair = keypair; + /* Expand data section to have room for padding and auth tag */ + ret = skb_cow_data(skb, ctx->trailer_len, &ctx->trailer); + if (unlikely(ret < 0)) + goto err; + num_frags = ret; + ret = -ENOMEM; + if (unlikely(num_frags > 128)) + goto err; + ctx->num_frags = num_frags; + + /* Set the padding to zeros, and make sure it and the auth tag are part of the skb */ + memset(skb_tail_pointer(ctx->trailer), 0, padding_len); + + /* Expand head section to have room for our header and the network stack's headers. */ + ret = skb_cow_head(skb, DATA_PACKET_HEAD_ROOM); + if (unlikely(ret < 0)) + goto err; + + /* After the first time through the loop, if we've suceeded with a legitimate nonce, + * then we don't want a -ENOKEY error if subsequent nonces fail. Rather, if this + * condition arises, we simply want error out hard, and drop the entire queue. This + * is partially lazy programming and TODO: this could be made to only requeue the + * ones that had no nonce. But I'm not sure it's worth the added complexity, given + * how rarely that condition should arise. */ + ret = -EPIPE; + } #ifdef CONFIG_WIREGUARD_PARALLEL - if ((parallel || padata_queue_len(peer->device->parallel_send) > 0) && cpumask_weight(cpu_online_mask) > 1) { + if ((skb_queue_len(queue) > 1 || queue->next->len > 256 || padata_queue_len(peer->device->parallel_send) > 0) && cpumask_weight(cpu_online_mask) > 1) { unsigned int cpu = choose_cpu(keypair->remote_index); - ret = -EBUSY; + struct packet_bundle_ctx *ctx = kmalloc(sizeof(struct packet_bundle_ctx), GFP_ATOMIC); + if (!ctx) + goto serial; + skb_queue_head_init(&ctx->queue); + skb_queue_splice_init(queue, &ctx->queue); + ctx->callback = callback; + ctx->keypair = keypair; ctx->peer = peer_rcu_get(peer); + ret = -EBUSY; if (unlikely(!ctx->peer)) - goto err; + goto err_parallel; ret = start_encryption(peer->device->parallel_send, &ctx->padata, cpu); if (unlikely(ret < 0)) { peer_put(ctx->peer); +err_parallel: + skb_queue_splice(&ctx->queue, queue); + kfree(ctx); goto err; } } else #endif { - skb_encrypt(skb, ctx); - skb_reset(skb); - callback(skb, peer); +serial: + queue_encrypt_reset(queue, keypair); + callback(queue, peer); } return 0; diff --git a/src/packets.h b/src/packets.h index a8ecdf1..31abb57 100644 --- a/src/packets.h +++ b/src/packets.h @@ -39,22 +39,10 @@ void packet_send_queued_handshakes(struct work_struct *work); /* data.c */ -struct packet_data_encryption_ctx { - struct padata_priv padata; - struct sk_buff *skb; - void (*callback)(struct sk_buff *, struct wireguard_peer *); - struct wireguard_peer *peer; - unsigned int plaintext_len, trailer_len; - uint8_t num_frags; - struct sk_buff *trailer; - struct noise_keypair *keypair; - uint64_t nonce; -}; - -int packet_create_data(struct sk_buff *skb, struct wireguard_peer *peer, void(*callback)(struct sk_buff *, struct wireguard_peer *), bool parallel); +int packet_create_data(struct sk_buff_head *queue, struct wireguard_peer *peer, void(*callback)(struct sk_buff_head *, struct wireguard_peer *)); void packet_consume_data(struct sk_buff *skb, size_t offset, struct wireguard_device *wg, void(*callback)(struct sk_buff *, struct wireguard_peer *, struct sockaddr_storage *, bool used_new_key, int err)); -#define DATA_PACKET_HEAD_ROOM ALIGN(sizeof(struct message_data) + max(sizeof(struct packet_data_encryption_ctx), SKB_HEADER_LEN), 4) +#define DATA_PACKET_HEAD_ROOM ALIGN(sizeof(struct message_data) + SKB_HEADER_LEN, 4) #ifdef DEBUG bool packet_counter_selftest(void); @@ -14,7 +14,6 @@ #include <linux/jiffies.h> #include <net/udp.h> #include <net/sock.h> -#include <net/ip_tunnels.h> void packet_send_handshake_initiation(struct wireguard_peer *peer) { @@ -118,175 +117,71 @@ void packet_send_keepalive(struct wireguard_peer *peer) packet_send_queue(peer); } -struct packet_bundle { - atomic_t count; - struct sk_buff *first; -}; - -struct packet_cb { - struct packet_bundle *bundle; - struct packet_bundle data; - u8 ds; -}; -#define PACKET_CB(skb) ((struct packet_cb *)skb->cb) - -static inline void send_off_bundle(struct packet_bundle *bundle, struct wireguard_peer *peer) +static void message_create_data_done(struct sk_buff_head *queue, struct wireguard_peer *peer) { - struct sk_buff *skb, *next; + struct sk_buff *skb, *tmp; bool is_keepalive, data_sent = false; - if (likely(bundle->first)) - timers_any_authenticated_packet_traversal(peer); - for (skb = bundle->first; skb; skb = next) { - /* We store the next pointer locally because socket_send_skb_to_peer - * consumes the packet before the top of the loop comes again. */ - next = skb->next; + + timers_any_authenticated_packet_traversal(peer); + skb_queue_walk_safe(queue, skb, tmp) { is_keepalive = skb->len == message_data_len(0); - if (likely(!socket_send_skb_to_peer(peer, skb, PACKET_CB(skb)->ds) && !is_keepalive)) + if (likely(!socket_send_skb_to_peer(peer, skb, *(u8 *)skb->cb) && !is_keepalive)) data_sent = true; } if (likely(data_sent)) timers_data_sent(peer); -} -static void message_create_data_done(struct sk_buff *skb, struct wireguard_peer *peer) -{ - /* A packet completed successfully, so we deincrement the counter of packets - * remaining, and if we hit zero we can send it off. */ - if (atomic_dec_and_test(&PACKET_CB(skb)->bundle->count)) { - send_off_bundle(PACKET_CB(skb)->bundle, peer); - /* We queue the remaining ones only after sending, to retain packet order. */ - if (unlikely(peer->need_resend_queue)) - packet_send_queue(peer); - } keep_key_fresh(peer); + + if (unlikely(peer->need_resend_queue)) + packet_send_queue(peer); } int packet_send_queue(struct wireguard_peer *peer) { - struct packet_bundle *bundle; - struct sk_buff_head local_queue; - struct sk_buff *skb, *next, *first; + struct sk_buff_head queue; unsigned long flags; - bool parallel = true; peer->need_resend_queue = false; /* Steal the current queue into our local one. */ - skb_queue_head_init(&local_queue); + skb_queue_head_init(&queue); spin_lock_irqsave(&peer->tx_packet_queue.lock, flags); - skb_queue_splice_init(&peer->tx_packet_queue, &local_queue); + skb_queue_splice_init(&peer->tx_packet_queue, &queue); spin_unlock_irqrestore(&peer->tx_packet_queue.lock, flags); - first = skb_peek(&local_queue); - if (unlikely(!first)) - goto out; - - /* Remove the circularity from the queue, so that we can iterate on - * on the skbs themselves. */ - local_queue.prev->next = local_queue.next->prev = NULL; + if (unlikely(!skb_queue_len(&queue))) + return NETDEV_TX_OK; - /* The first pointer of the control block is a pointer to the bundle - * and after that, in the first packet only, is where we actually store - * the bundle data. This saves us a call to kmalloc. */ - BUILD_BUG_ON(sizeof(struct packet_cb) > sizeof(skb->cb)); - bundle = &PACKET_CB(first)->data; - atomic_set(&bundle->count, skb_queue_len(&local_queue)); - bundle->first = first; - - /* Non-parallel path for the case of only one packet that's small */ - if (skb_queue_len(&local_queue) == 1 && first->len <= 256) - parallel = false; - - for (skb = first; skb; skb = next) { - /* We store the next pointer locally because we might free skb - * before the top of the loop comes again. */ - next = skb->next; - - /* We set the first pointer in cb to point to the bundle data. */ - PACKET_CB(skb)->bundle = bundle; - - /* Extract the TOS value before encryption, for ECN encapsulation. */ - PACKET_CB(skb)->ds = ip_tunnel_ecn_encap(0 /* No outer TOS: no leak. TODO: should we use flowi->tos as outer? */, ip_hdr(skb), skb); - - /* We submit it for encryption and sending. */ - switch (packet_create_data(skb, peer, message_create_data_done, parallel)) { - case 0: - /* If all goes well, we can simply deincrement the queue counter. Even - * though skb_dequeue() would do this for us, we don't want to break the - * links between packets, so we just traverse the list normally and - * deincrement the counter manually each time a packet is consumed. */ - --local_queue.qlen; - break; - case -ENOKEY: - /* ENOKEY means that we don't have a valid session for the peer, which - * means we should initiate a session, and then requeue everything. */ - packet_send_handshake_initiation_ratelimited(peer); - goto requeue; - case -EBUSY: - /* EBUSY happens when the parallel workers are all filled up, in which - * case we should requeue everything. */ - - /* First, we mark that we should try to do this later, when existing - * jobs are done. */ - peer->need_resend_queue = true; - requeue: - if (skb->prev) { - /* Since we're requeuing skb and everything after skb, we make - * sure that the previously successfully sent packets don't link - * to the requeued packets, which will be sent independently the - * next time this function is called. */ - skb->prev->next = NULL; - skb->prev = NULL; - } - if (atomic_sub_and_test(local_queue.qlen, &bundle->count)) { - /* We remove the requeued packets from the count of total packets - * that were successfully submitted, which means we then must see - * if we were the ones to get it to zero. If we are at zero, we - * only send the previous successful packets if there actually were - * packets that succeeded before skb. */ - if (skb != first) - send_off_bundle(bundle, peer); - } - /* We stick the remaining skbs from local_queue at the top of the peer's - * queue again, setting the top of local_queue to be the skb that begins - * the requeueing. */ - local_queue.next = skb; - spin_lock_irqsave(&peer->tx_packet_queue.lock, flags); - skb_queue_splice(&local_queue, &peer->tx_packet_queue); - spin_unlock_irqrestore(&peer->tx_packet_queue.lock, flags); - goto out; - default: - /* If we failed for any other reason, we want to just free the packet and - * forget about it, so we first deincrement the queue counter as in the - * successful case above. */ - --local_queue.qlen; - if (skb == first && next) { - /* If it's the first one that failed, we need to move the bundle data - * to the next packet. Then, all subsequent assignments of the bundle - * pointer will be to the moved data. */ - PACKET_CB(next)->data = *bundle; - bundle = &PACKET_CB(next)->data; - bundle->first = next; - } - /* We remove the skb from the list and free it. */ - if (skb->prev) - skb->prev->next = skb->next; - if (skb->next) - skb->next->prev = skb->prev; - kfree_skb(skb); - if (atomic_dec_and_test(&bundle->count)) { - /* As above, if this failed packet pushes the count to zero, we have to - * be the ones to send it off only in the case that there's something to - * send. */ - if (skb != first) - send_off_bundle(bundle, peer); - } - /* Only at the bottom do we update our local `first` variable, because we need it - * in the check above. But it's important that bundle->first is updated earlier when - * actually moving the bundle. */ - first = bundle->first; - } + /* We submit it for encryption and sending. */ + switch (packet_create_data(&queue, peer, message_create_data_done)) { + case 0: + break; + case -ENOKEY: + /* ENOKEY means that we don't have a valid session for the peer, which + * means we should initiate a session, and then requeue everything. */ + packet_send_handshake_initiation_ratelimited(peer); + goto requeue; + case -EBUSY: + /* EBUSY happens when the parallel workers are all filled up, in which + * case we should requeue everything. */ + + /* First, we mark that we should try to do this later, when existing + * jobs are done. */ + peer->need_resend_queue = true; + requeue: + /* We stick the remaining skbs from local_queue at the top of the peer's + * queue again, setting the top of local_queue to be the skb that begins + * the requeueing. */ + spin_lock_irqsave(&peer->tx_packet_queue.lock, flags); + skb_queue_splice(&queue, &peer->tx_packet_queue); + spin_unlock_irqrestore(&peer->tx_packet_queue.lock, flags); + break; + default: + /* If we failed for any other reason, we want to just free the packets and + * forget about them. We do this unlocked, since we're the only ones with + * a reference to the local queue. */ + __skb_queue_purge(&queue); } -out: return NETDEV_TX_OK; } |