Skip to content

bpf: Rework bpf_redirect_neigh() to allow supplying nexthop from caller #249

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
sudo: required
language: bash
dist: bionic
services:
- docker

env:
global:
- PROJECT_NAME='libbpf'
- AUTHOR_EMAIL="$(git log -1 --pretty=\"%aE\")"
- REPO_ROOT="$TRAVIS_BUILD_DIR"
- CI_ROOT="$REPO_ROOT/travis-ci"
- VMTEST_ROOT="$CI_ROOT/vmtest"

addons:
apt:
packages:
- qemu-kvm
- zstd
- binutils-dev
- elfutils
- libcap-dev
- libelf-dev
- libdw-dev
- python3-docutils

jobs:
include:
- stage: Builds & Tests
name: Kernel LATEST + selftests
language: bash
env: KERNEL=LATEST
script: $CI_ROOT/vmtest/run_vmtest.sh || travis_terminate 1
9 changes: 9 additions & 0 deletions include/linux/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -607,12 +607,21 @@ struct bpf_skb_data_end {
void *data_end;
};

struct bpf_nh_params {
u32 nh_family;
union {
__u32 ipv4_nh;
struct in6_addr ipv6_nh;
};
};

struct bpf_redirect_info {
u32 flags;
u32 tgt_index;
void *tgt_value;
struct bpf_map *map;
u32 kern_flags;
struct bpf_nh_params nh;
};

DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
Expand Down
22 changes: 18 additions & 4 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -3677,15 +3677,19 @@ union bpf_attr {
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
* long bpf_redirect_neigh(u32 ifindex, u64 flags)
* long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags)
* Description
* Redirect the packet to another net device of index *ifindex*
* and fill in L2 addresses from neighboring subsystem. This helper
* is somewhat similar to **bpf_redirect**\ (), except that it
* populates L2 addresses as well, meaning, internally, the helper
* performs a FIB lookup based on the skb's networking header to
* get the address of the next hop and then relies on the neighbor
* lookup for the L2 address of the nexthop.
* relies on the neighbor lookup for the L2 address of the nexthop.
*
* The helper will perform a FIB lookup based on the skb's
* networking header to get the address of the next hop, unless
* this is supplied by the caller in the *params* argument. The
* *plen* argument indicates the len of *params* and should be set
* to 0 if *params* is NULL.
*
* The *flags* argument is reserved and must be 0. The helper is
* currently only supported for tc BPF program types, and enabled
Expand Down Expand Up @@ -4906,6 +4910,16 @@ struct bpf_fib_lookup {
__u8 dmac[6]; /* ETH_ALEN */
};

struct bpf_redir_neigh {
/* network family for lookup (AF_INET, AF_INET6) */
__u32 nh_family;
/* network address of nexthop; skips fib lookup to find gateway */
union {
__be32 ipv4_nh;
__u32 ipv6_nh[4]; /* in6_addr; network order */
};
};

enum bpf_task_fd_type {
BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
BPF_FD_TYPE_TRACEPOINT, /* tp name */
Expand Down
159 changes: 99 additions & 60 deletions net/core/filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -2165,12 +2165,12 @@ static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
}

#if IS_ENABLED(CONFIG_IPV6)
static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
struct net_device *dev, struct bpf_nh_params *nh)
{
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
u32 hh_len = LL_RESERVED_SPACE(dev);
const struct in6_addr *nexthop;
struct dst_entry *dst = NULL;
struct neighbour *neigh;

if (dev_xmit_recursion()) {
Expand All @@ -2196,8 +2196,13 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
}

rcu_read_lock_bh();
nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
&ipv6_hdr(skb)->daddr);
if (!nh) {
dst = skb_dst(skb);
nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
&ipv6_hdr(skb)->daddr);
} else {
nexthop = &nh->ipv6_nh;
}
neigh = ip_neigh_gw6(dev, nexthop);
if (likely(!IS_ERR(neigh))) {
int ret;
Expand All @@ -2210,36 +2215,43 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
return ret;
}
rcu_read_unlock_bh();
IP6_INC_STATS(dev_net(dst->dev),
ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
if (dst)
IP6_INC_STATS(dev_net(dst->dev),
ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
out_drop:
kfree_skb(skb);
return -ENETDOWN;
}

static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
struct bpf_nh_params *nh)
{
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
struct net *net = dev_net(dev);
int err, ret = NET_XMIT_DROP;
struct dst_entry *dst;
struct flowi6 fl6 = {
.flowi6_flags = FLOWI_FLAG_ANYSRC,
.flowi6_mark = skb->mark,
.flowlabel = ip6_flowinfo(ip6h),
.flowi6_oif = dev->ifindex,
.flowi6_proto = ip6h->nexthdr,
.daddr = ip6h->daddr,
.saddr = ip6h->saddr,
};

dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
if (IS_ERR(dst))
goto out_drop;
if (!nh) {
struct dst_entry *dst;
struct flowi6 fl6 = {
.flowi6_flags = FLOWI_FLAG_ANYSRC,
.flowi6_mark = skb->mark,
.flowlabel = ip6_flowinfo(ip6h),
.flowi6_oif = dev->ifindex,
.flowi6_proto = ip6h->nexthdr,
.daddr = ip6h->daddr,
.saddr = ip6h->saddr,
};

dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
if (IS_ERR(dst))
goto out_drop;

skb_dst_set(skb, dst);
skb_dst_set(skb, dst);
} else if (nh->nh_family != AF_INET6) {
goto out_drop;
}

err = bpf_out_neigh_v6(net, skb);
err = bpf_out_neigh_v6(net, skb, dev, nh);
if (unlikely(net_xmit_eval(err)))
dev->stats.tx_errors++;
else
Expand All @@ -2252,19 +2264,18 @@ static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
return ret;
}
#else
static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
struct bpf_nh_params *nh)
{
kfree_skb(skb);
return NET_XMIT_DROP;
}
#endif /* CONFIG_IPV6 */

#if IS_ENABLED(CONFIG_INET)
static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb)
static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
struct net_device *dev, struct bpf_nh_params *nh)
{
struct dst_entry *dst = skb_dst(skb);
struct rtable *rt = container_of(dst, struct rtable, dst);
struct net_device *dev = dst->dev;
u32 hh_len = LL_RESERVED_SPACE(dev);
struct neighbour *neigh;
bool is_v6gw = false;
Expand Down Expand Up @@ -2292,7 +2303,21 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb)
}

rcu_read_lock_bh();
neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
if (!nh) {
struct dst_entry *dst = skb_dst(skb);
struct rtable *rt = container_of(dst, struct rtable, dst);

neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
} else if (nh->nh_family == AF_INET6) {
neigh = ip_neigh_gw6(dev, &nh->ipv6_nh);
is_v6gw = true;
} else if (nh->nh_family == AF_INET) {
neigh = ip_neigh_gw4(dev, nh->ipv4_nh);
} else {
rcu_read_unlock_bh();
goto out_drop;
}

if (likely(!IS_ERR(neigh))) {
int ret;

Expand All @@ -2309,33 +2334,37 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb)
return -ENETDOWN;
}

static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev)
static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
struct bpf_nh_params *nh)
{
const struct iphdr *ip4h = ip_hdr(skb);
struct net *net = dev_net(dev);
int err, ret = NET_XMIT_DROP;
struct rtable *rt;
struct flowi4 fl4 = {
.flowi4_flags = FLOWI_FLAG_ANYSRC,
.flowi4_mark = skb->mark,
.flowi4_tos = RT_TOS(ip4h->tos),
.flowi4_oif = dev->ifindex,
.flowi4_proto = ip4h->protocol,
.daddr = ip4h->daddr,
.saddr = ip4h->saddr,
};

rt = ip_route_output_flow(net, &fl4, NULL);
if (IS_ERR(rt))
goto out_drop;
if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
ip_rt_put(rt);
goto out_drop;
}
if (!nh) {
struct flowi4 fl4 = {
.flowi4_flags = FLOWI_FLAG_ANYSRC,
.flowi4_mark = skb->mark,
.flowi4_tos = RT_TOS(ip4h->tos),
.flowi4_oif = dev->ifindex,
.flowi4_proto = ip4h->protocol,
.daddr = ip4h->daddr,
.saddr = ip4h->saddr,
};
struct rtable *rt;

rt = ip_route_output_flow(net, &fl4, NULL);
if (IS_ERR(rt))
goto out_drop;
if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
ip_rt_put(rt);
goto out_drop;
}

skb_dst_set(skb, &rt->dst);
skb_dst_set(skb, &rt->dst);
}

err = bpf_out_neigh_v4(net, skb);
err = bpf_out_neigh_v4(net, skb, dev, nh);
if (unlikely(net_xmit_eval(err)))
dev->stats.tx_errors++;
else
Expand All @@ -2348,14 +2377,16 @@ static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev)
return ret;
}
#else
static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev)
static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
struct bpf_nh_params *nh)
{
kfree_skb(skb);
return NET_XMIT_DROP;
}
#endif /* CONFIG_INET */

static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev)
static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev,
struct bpf_nh_params *nh)
{
struct ethhdr *ethh = eth_hdr(skb);

Expand All @@ -2370,9 +2401,9 @@ static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev)
skb_reset_network_header(skb);

if (skb->protocol == htons(ETH_P_IP))
return __bpf_redirect_neigh_v4(skb, dev);
return __bpf_redirect_neigh_v4(skb, dev, nh);
else if (skb->protocol == htons(ETH_P_IPV6))
return __bpf_redirect_neigh_v6(skb, dev);
return __bpf_redirect_neigh_v6(skb, dev, nh);
out:
kfree_skb(skb);
return -ENOTSUPP;
Expand All @@ -2382,7 +2413,8 @@ static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev)
enum {
BPF_F_NEIGH = (1ULL << 1),
BPF_F_PEER = (1ULL << 2),
#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH | BPF_F_PEER)
BPF_F_NEXTHOP = (1ULL << 3),
#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH | BPF_F_PEER | BPF_F_NEXTHOP)
};

BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
Expand Down Expand Up @@ -2455,8 +2487,8 @@ int skb_do_redirect(struct sk_buff *skb)
return -EAGAIN;
}
return flags & BPF_F_NEIGH ?
__bpf_redirect_neigh(skb, dev) :
__bpf_redirect(skb, dev, flags);
__bpf_redirect_neigh(skb, dev, flags & BPF_F_NEXTHOP ? &ri->nh : NULL) :
__bpf_redirect(skb, dev, flags);
out_drop:
kfree_skb(skb);
return -EINVAL;
Expand Down Expand Up @@ -2504,16 +2536,21 @@ static const struct bpf_func_proto bpf_redirect_peer_proto = {
.arg2_type = ARG_ANYTHING,
};

BPF_CALL_2(bpf_redirect_neigh, u32, ifindex, u64, flags)
BPF_CALL_4(bpf_redirect_neigh, u32, ifindex, struct bpf_redir_neigh *, params,
int, plen, u64, flags)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);

if (unlikely(flags))
if (unlikely((plen && plen < sizeof(*params)) || flags))
return TC_ACT_SHOT;

ri->flags = BPF_F_NEIGH;
ri->flags = BPF_F_NEIGH | (plen ? BPF_F_NEXTHOP : 0);
ri->tgt_index = ifindex;

BUILD_BUG_ON(sizeof(struct bpf_redir_neigh) != sizeof(struct bpf_nh_params));
if (plen)
memcpy(&ri->nh, params, sizeof(ri->nh));

return TC_ACT_REDIRECT;
}

Expand All @@ -2522,7 +2559,9 @@ static const struct bpf_func_proto bpf_redirect_neigh_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_ANYTHING,
.arg2_type = ARG_ANYTHING,
.arg2_type = ARG_PTR_TO_MEM_OR_NULL,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
};

BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes)
Expand Down
1 change: 1 addition & 0 deletions scripts/bpf_helpers_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,7 @@ class PrinterHelpers(Printer):
'struct bpf_perf_event_data',
'struct bpf_perf_event_value',
'struct bpf_pidns_info',
'struct bpf_redir_neigh',
'struct bpf_sk_lookup',
'struct bpf_sock',
'struct bpf_sock_addr',
Expand Down
Loading