This patch reverts the following upstream commits: https://git.kernel.org/cgit/linux/kernel/git/stable/linux-stable.git/patch/?id=4e27762417669cb459971635be550eb7b5598286 https://git.kernel.org/cgit/linux/kernel/git/stable/linux-stable.git/patch/?id=c0bb07df7d981e4091432754e30c9c720e2c0c78 https://git.kernel.org/cgit/linux/kernel/git/stable/linux-stable.git/patch/?id=c5adde9468b0714a051eac7f9666f23eb10b61f7 https://git.kernel.org/cgit/linux/kernel/git/stable/linux-stable.git/patch/?id=d48623677191e0f035d7afd344f92cf880b01f8e In order to fix https://forge.univention.org/bugzilla/show_bug.cgi?id=40059 (Bug #40059) diff -Nur linux-4.1.6.orig/net/netlink/af_netlink.c linux-4.1.6/net/netlink/af_netlink.c --- linux-4.1.6.orig/net/netlink/af_netlink.c 2015-11-29 14:30:39.332341256 +0100 +++ linux-4.1.6/net/netlink/af_netlink.c 2015-11-29 14:31:38.060766662 +0100 @@ -123,24 +123,6 @@ return group ? 1 << (group - 1) : 0; } -static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb, - gfp_t gfp_mask) -{ - unsigned int len = skb_end_offset(skb); - struct sk_buff *new; - - new = alloc_skb(len, gfp_mask); - if (new == NULL) - return NULL; - - NETLINK_CB(new).portid = NETLINK_CB(skb).portid; - NETLINK_CB(new).dst_group = NETLINK_CB(skb).dst_group; - NETLINK_CB(new).creds = NETLINK_CB(skb).creds; - - memcpy(skb_put(new, len), skb->data, len); - return new; -} - int netlink_add_tap(struct netlink_tap *nt) { if (unlikely(nt->dev->type != ARPHRD_NETLINK)) @@ -222,11 +204,7 @@ int ret = -ENOMEM; dev_hold(dev); - - if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head)) - nskb = netlink_to_full_skb(skb, GFP_ATOMIC); - else - nskb = skb_clone(skb, GFP_ATOMIC); + nskb = skb_clone(skb, GFP_ATOMIC); if (nskb) { nskb->dev = dev; nskb->protocol = htons((u16) sk->sk_protocol); @@ -298,6 +276,11 @@ } #ifdef CONFIG_NETLINK_MMAP +static bool netlink_skb_is_mmaped(const struct sk_buff *skb) +{ + return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; +} + static bool netlink_rx_is_mmaped(struct sock *sk) { return nlk_sk(sk)->rx_ring.pg_vec != NULL; @@ -372,52 +355,25 @@ return NULL; } - -static void -__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec, - unsigned int order) -{ - struct netlink_sock *nlk = nlk_sk(sk); - struct sk_buff_head *queue; - struct netlink_ring *ring; - - queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; - ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; - - spin_lock_bh(&queue->lock); - - ring->frame_max = req->nm_frame_nr - 1; - ring->head = 0; - ring->frame_size = req->nm_frame_size; - ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; - - swap(ring->pg_vec_len, req->nm_block_nr); - swap(ring->pg_vec_order, order); - swap(ring->pg_vec, pg_vec); - - __skb_queue_purge(queue); - spin_unlock_bh(&queue->lock); - - WARN_ON(atomic_read(&nlk->mapped)); - - if (pg_vec) - free_pg_vec(pg_vec, order, req->nm_block_nr); -} - static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, - bool tx_ring) + bool closing, bool tx_ring) { struct netlink_sock *nlk = nlk_sk(sk); struct netlink_ring *ring; + struct sk_buff_head *queue; void **pg_vec = NULL; unsigned int order = 0; + int err; ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; + queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; - if (atomic_read(&nlk->mapped)) - return -EBUSY; - if (atomic_read(&ring->pending)) - return -EBUSY; + if (!closing) { + if (atomic_read(&nlk->mapped)) + return -EBUSY; + if (atomic_read(&ring->pending)) + return -EBUSY; + } if (req->nm_block_nr) { if (ring->pg_vec != NULL) @@ -449,19 +405,31 @@ return -EINVAL; } + err = -EBUSY; mutex_lock(&nlk->pg_vec_lock); - if (atomic_read(&nlk->mapped) == 0) { - __netlink_set_ring(sk, req, tx_ring, pg_vec, order); - mutex_unlock(&nlk->pg_vec_lock); - return 0; - } + if (closing || atomic_read(&nlk->mapped) == 0) { + err = 0; + spin_lock_bh(&queue->lock); + + ring->frame_max = req->nm_frame_nr - 1; + ring->head = 0; + ring->frame_size = req->nm_frame_size; + ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; + + swap(ring->pg_vec_len, req->nm_block_nr); + swap(ring->pg_vec_order, order); + swap(ring->pg_vec, pg_vec); + + __skb_queue_purge(queue); + spin_unlock_bh(&queue->lock); + WARN_ON(atomic_read(&nlk->mapped)); + } mutex_unlock(&nlk->pg_vec_lock); if (pg_vec) free_pg_vec(pg_vec, order, req->nm_block_nr); - - return -EBUSY; + return err; } static void netlink_mm_open(struct vm_area_struct *vma) @@ -849,6 +817,7 @@ } #else /* CONFIG_NETLINK_MMAP */ +#define netlink_skb_is_mmaped(skb) false #define netlink_rx_is_mmaped(sk) false #define netlink_tx_is_mmaped(sk) false #define netlink_mmap sock_no_mmap @@ -929,10 +898,10 @@ memset(&req, 0, sizeof(req)); if (nlk->rx_ring.pg_vec) - __netlink_set_ring(sk, &req, false, NULL, 0); + netlink_set_ring(sk, &req, true, false); memset(&req, 0, sizeof(req)); if (nlk->tx_ring.pg_vec) - __netlink_set_ring(sk, &req, true, NULL, 0); + netlink_set_ring(sk, &req, true, true); } #endif /* CONFIG_NETLINK_MMAP */ @@ -1096,8 +1065,8 @@ lock_sock(sk); - err = nlk_sk(sk)->portid == portid ? 0 : -EBUSY; - if (nlk_sk(sk)->bound) + err = -EBUSY; + if (nlk_sk(sk)->portid) goto err; err = -ENOMEM; @@ -1110,20 +1079,12 @@ err = __netlink_insert(table, sk); if (err) { - /* In case the hashtable backend returns with -EBUSY - * from here, it must not escape to the caller. - */ - if (unlikely(err == -EBUSY)) - err = -EOVERFLOW; if (err == -EEXIST) err = -EADDRINUSE; + nlk_sk(sk)->portid = 0; sock_put(sk); } - /* We need to ensure that the socket is hashed and visible. */ - smp_wmb(); - nlk_sk(sk)->bound = portid; - err: release_sock(sk); return err; @@ -1503,7 +1464,6 @@ struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; int err; long unsigned int groups = nladdr->nl_groups; - bool bound; if (addr_len < sizeof(struct sockaddr_nl)) return -EINVAL; @@ -1520,14 +1480,9 @@ return err; } - bound = nlk->bound; - if (bound) { - /* Ensure nlk->portid is up-to-date. */ - smp_rmb(); - + if (nlk->portid) if (nladdr->nl_pid != nlk->portid) return -EINVAL; - } if (nlk->netlink_bind && groups) { int group; @@ -1543,10 +1498,7 @@ } } - /* No need for barriers here as we return to user-space without - * using any of the bound attributes. - */ - if (!bound) { + if (!nlk->portid) { err = nladdr->nl_pid ? netlink_insert(sk, nladdr->nl_pid) : netlink_autobind(sock); @@ -1594,10 +1546,7 @@ !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) return -EPERM; - /* No need for barriers here as we return to user-space without - * using any of the bound attributes. - */ - if (!nlk->bound) + if (!nlk->portid) err = netlink_autobind(sock); if (err == 0) { @@ -2248,7 +2197,7 @@ return -EINVAL; if (copy_from_user(&req, optval, sizeof(req))) return -EFAULT; - err = netlink_set_ring(sk, &req, + err = netlink_set_ring(sk, &req, false, optname == NETLINK_TX_RING); break; } @@ -2354,13 +2303,10 @@ dst_group = nlk->dst_group; } - if (!nlk->bound) { + if (!nlk->portid) { err = netlink_autobind(sock); if (err) goto out; - } else { - /* Ensure nlk is hashed and visible. */ - smp_rmb(); } /* It's a really convoluted way for userland to ask for mmaped @@ -2683,7 +2629,6 @@ struct sk_buff *skb = NULL; struct nlmsghdr *nlh; int len, err = -ENOBUFS; - int alloc_min_size; int alloc_size; mutex_lock(nlk->cb_mutex); @@ -2692,6 +2637,9 @@ goto errout_skb; } + cb = &nlk->cb; + alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); + if (!netlink_rx_is_mmaped(sk) && atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) goto errout_skb; @@ -2701,35 +2649,23 @@ * to reduce number of system calls on dump operations, if user * ever provided a big enough buffer. */ - cb = &nlk->cb; - alloc_min_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); - - if (alloc_min_size < nlk->max_recvmsg_len) { - alloc_size = nlk->max_recvmsg_len; - skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, + if (alloc_size < nlk->max_recvmsg_len) { + skb = netlink_alloc_skb(sk, + nlk->max_recvmsg_len, + nlk->portid, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); + /* available room should be exact amount to avoid MSG_TRUNC */ + if (skb) + skb_reserve(skb, skb_tailroom(skb) - + nlk->max_recvmsg_len); } - if (!skb) { - alloc_size = alloc_min_size; + if (!skb) skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL); - } if (!skb) goto errout_skb; - - /* Trim skb to allocated size. User is expected to provide buffer as - * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at - * netlink_recvmsg())). dump will pack as many smaller messages as - * could fit within the allocated skb. skb is typically allocated - * with larger space than required (could be as much as near 2x the - * requested size with align to next power of 2 approach). Allowing - * dump to use the excess space makes it difficult for a user to have a - * reasonable static buffer based on the expected largest dump of a - * single netdev. The outcome is MSG_TRUNC error. - */ - skb_reserve(skb, skb_tailroom(skb) - alloc_size); netlink_skb_set_owner_r(skb, sk); len = cb->dump(skb, cb); diff -Nur linux-4.1.6.orig/net/netlink/af_netlink.h linux-4.1.6/net/netlink/af_netlink.h --- linux-4.1.6.orig/net/netlink/af_netlink.h 2015-11-29 14:30:36.124536349 +0100 +++ linux-4.1.6/net/netlink/af_netlink.h 2015-11-29 14:31:38.060766662 +0100 @@ -35,7 +35,6 @@ unsigned long state; size_t max_recvmsg_len; wait_queue_head_t wait; - bool bound; bool cb_running; struct netlink_callback cb; struct mutex *cb_mutex; @@ -60,15 +59,6 @@ return container_of(sk, struct netlink_sock, sk); } -static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb) -{ -#ifdef CONFIG_NETLINK_MMAP - return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; -#else - return false; -#endif /* CONFIG_NETLINK_MMAP */ -} - struct netlink_table { struct rhashtable hash; struct hlist_head mc_list;