blob: bbf8296f4d66336b2776ba0faa073f01cea92934 [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
// Copyright (c) 2020 Cloudflare
#include <errno.h>
#include <stdbool.h>
#include <stddef.h>
#include <linux/bpf.h>
#include <linux/in.h>
#include <sys/socket.h>
#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
#define IP4(a, b, c, d) \
bpf_htonl((((__u32)(a) & 0xffU) << 24) | \
(((__u32)(b) & 0xffU) << 16) | \
(((__u32)(c) & 0xffU) << 8) | \
(((__u32)(d) & 0xffU) << 0))
#define IP6(aaaa, bbbb, cccc, dddd) \
{ bpf_htonl(aaaa), bpf_htonl(bbbb), bpf_htonl(cccc), bpf_htonl(dddd) }
#define MAX_SOCKS 32
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, MAX_SOCKS);
__type(key, __u32);
__type(value, __u64);
} redir_map SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 2);
__type(key, int);
__type(value, int);
} run_map SEC(".maps");
enum {
PROG1 = 0,
PROG2,
};
enum {
SERVER_A = 0,
SERVER_B,
};
/* Addressable key/value constants for convenience */
static const int KEY_PROG1 = PROG1;
static const int KEY_PROG2 = PROG2;
static const int PROG_DONE = 1;
static const __u32 KEY_SERVER_A = SERVER_A;
static const __u32 KEY_SERVER_B = SERVER_B;
static const __u16 DST_PORT = 7007; /* Host byte order */
static const __u32 DST_IP4 = IP4(127, 0, 0, 1);
static const __u32 DST_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000001);
SEC("sk_lookup/lookup_pass")
int lookup_pass(struct bpf_sk_lookup *ctx)
{
return SK_PASS;
}
SEC("sk_lookup/lookup_drop")
int lookup_drop(struct bpf_sk_lookup *ctx)
{
return SK_DROP;
}
SEC("sk_reuseport/reuse_pass")
int reuseport_pass(struct sk_reuseport_md *ctx)
{
return SK_PASS;
}
SEC("sk_reuseport/reuse_drop")
int reuseport_drop(struct sk_reuseport_md *ctx)
{
return SK_DROP;
}
/* Redirect packets destined for port DST_PORT to socket at redir_map[0]. */
SEC("sk_lookup/redir_port")
int redir_port(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
int err;
if (ctx->local_port != DST_PORT)
return SK_PASS;
sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
if (!sk)
return SK_PASS;
err = bpf_sk_assign(ctx, sk, 0);
bpf_sk_release(sk);
return err ? SK_DROP : SK_PASS;
}
/* Redirect packets destined for DST_IP4 address to socket at redir_map[0]. */
SEC("sk_lookup/redir_ip4")
int redir_ip4(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
int err;
if (ctx->family != AF_INET)
return SK_PASS;
if (ctx->local_port != DST_PORT)
return SK_PASS;
if (ctx->local_ip4 != DST_IP4)
return SK_PASS;
sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
if (!sk)
return SK_PASS;
err = bpf_sk_assign(ctx, sk, 0);
bpf_sk_release(sk);
return err ? SK_DROP : SK_PASS;
}
/* Redirect packets destined for DST_IP6 address to socket at redir_map[0]. */
SEC("sk_lookup/redir_ip6")
int redir_ip6(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
int err;
if (ctx->family != AF_INET6)
return SK_PASS;
if (ctx->local_port != DST_PORT)
return SK_PASS;
if (ctx->local_ip6[0] != DST_IP6[0] ||
ctx->local_ip6[1] != DST_IP6[1] ||
ctx->local_ip6[2] != DST_IP6[2] ||
ctx->local_ip6[3] != DST_IP6[3])
return SK_PASS;
sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
if (!sk)
return SK_PASS;
err = bpf_sk_assign(ctx, sk, 0);
bpf_sk_release(sk);
return err ? SK_DROP : SK_PASS;
}
SEC("sk_lookup/select_sock_a")
int select_sock_a(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
int err;
sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
if (!sk)
return SK_PASS;
err = bpf_sk_assign(ctx, sk, 0);
bpf_sk_release(sk);
return err ? SK_DROP : SK_PASS;
}
SEC("sk_lookup/select_sock_a_no_reuseport")
int select_sock_a_no_reuseport(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
int err;
sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
if (!sk)
return SK_DROP;
err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_NO_REUSEPORT);
bpf_sk_release(sk);
return err ? SK_DROP : SK_PASS;
}
SEC("sk_reuseport/select_sock_b")
int select_sock_b(struct sk_reuseport_md *ctx)
{
__u32 key = KEY_SERVER_B;
int err;
err = bpf_sk_select_reuseport(ctx, &redir_map, &key, 0);
return err ? SK_DROP : SK_PASS;
}
/* Check that bpf_sk_assign() returns -EEXIST if socket already selected. */
SEC("sk_lookup/sk_assign_eexist")
int sk_assign_eexist(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
int err, ret;
ret = SK_DROP;
sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
if (!sk)
goto out;
err = bpf_sk_assign(ctx, sk, 0);
if (err)
goto out;
bpf_sk_release(sk);
sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
if (!sk)
goto out;
err = bpf_sk_assign(ctx, sk, 0);
if (err != -EEXIST) {
bpf_printk("sk_assign returned %d, expected %d\n",
err, -EEXIST);
goto out;
}
ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
out:
if (sk)
bpf_sk_release(sk);
return ret;
}
/* Check that bpf_sk_assign(BPF_SK_LOOKUP_F_REPLACE) can override selection. */
SEC("sk_lookup/sk_assign_replace_flag")
int sk_assign_replace_flag(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
int err, ret;
ret = SK_DROP;
sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
if (!sk)
goto out;
err = bpf_sk_assign(ctx, sk, 0);
if (err)
goto out;
bpf_sk_release(sk);
sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
if (!sk)
goto out;
err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE);
if (err) {
bpf_printk("sk_assign returned %d, expected 0\n", err);
goto out;
}
ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
out:
if (sk)
bpf_sk_release(sk);
return ret;
}
/* Check that bpf_sk_assign(sk=NULL) is accepted. */
SEC("sk_lookup/sk_assign_null")
int sk_assign_null(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk = NULL;
int err, ret;
ret = SK_DROP;
err = bpf_sk_assign(ctx, NULL, 0);
if (err) {
bpf_printk("sk_assign returned %d, expected 0\n", err);
goto out;
}
sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
if (!sk)
goto out;
err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE);
if (err) {
bpf_printk("sk_assign returned %d, expected 0\n", err);
goto out;
}
if (ctx->sk != sk)
goto out;
err = bpf_sk_assign(ctx, NULL, 0);
if (err != -EEXIST)
goto out;
err = bpf_sk_assign(ctx, NULL, BPF_SK_LOOKUP_F_REPLACE);
if (err)
goto out;
err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE);
if (err)
goto out;
ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
out:
if (sk)
bpf_sk_release(sk);
return ret;
}
/* Check that selected sk is accessible through context. */
SEC("sk_lookup/access_ctx_sk")
int access_ctx_sk(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk1 = NULL, *sk2 = NULL;
int err, ret;
ret = SK_DROP;
/* Try accessing unassigned (NULL) ctx->sk field */
if (ctx->sk && ctx->sk->family != AF_INET)
goto out;
/* Assign a value to ctx->sk */
sk1 = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
if (!sk1)
goto out;
err = bpf_sk_assign(ctx, sk1, 0);
if (err)
goto out;
if (ctx->sk != sk1)
goto out;
/* Access ctx->sk fields */
if (ctx->sk->family != AF_INET ||
ctx->sk->type != SOCK_STREAM ||
ctx->sk->state != BPF_TCP_LISTEN)
goto out;
/* Reset selection */
err = bpf_sk_assign(ctx, NULL, BPF_SK_LOOKUP_F_REPLACE);
if (err)
goto out;
if (ctx->sk)
goto out;
/* Assign another socket */
sk2 = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
if (!sk2)
goto out;
err = bpf_sk_assign(ctx, sk2, BPF_SK_LOOKUP_F_REPLACE);
if (err)
goto out;
if (ctx->sk != sk2)
goto out;
/* Access reassigned ctx->sk fields */
if (ctx->sk->family != AF_INET ||
ctx->sk->type != SOCK_STREAM ||
ctx->sk->state != BPF_TCP_LISTEN)
goto out;
ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
out:
if (sk1)
bpf_sk_release(sk1);
if (sk2)
bpf_sk_release(sk2);
return ret;
}
/* Check narrow loads from ctx fields that support them.
*
* Narrow loads of size >= target field size from a non-zero offset
* are not covered because they give bogus results, that is the
* verifier ignores the offset.
*/
SEC("sk_lookup/ctx_narrow_access")
int ctx_narrow_access(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
int err, family;
__u16 *half;
__u8 *byte;
bool v4;
v4 = (ctx->family == AF_INET);
/* Narrow loads from family field */
byte = (__u8 *)&ctx->family;
half = (__u16 *)&ctx->family;
if (byte[0] != (v4 ? AF_INET : AF_INET6) ||
byte[1] != 0 || byte[2] != 0 || byte[3] != 0)
return SK_DROP;
if (half[0] != (v4 ? AF_INET : AF_INET6))
return SK_DROP;
byte = (__u8 *)&ctx->protocol;
if (byte[0] != IPPROTO_TCP ||
byte[1] != 0 || byte[2] != 0 || byte[3] != 0)
return SK_DROP;
half = (__u16 *)&ctx->protocol;
if (half[0] != IPPROTO_TCP)
return SK_DROP;
/* Narrow loads from remote_port field. Expect non-0 value. */
byte = (__u8 *)&ctx->remote_port;
if (byte[0] == 0 && byte[1] == 0 && byte[2] == 0 && byte[3] == 0)
return SK_DROP;
half = (__u16 *)&ctx->remote_port;
if (half[0] == 0)
return SK_DROP;
/* Narrow loads from local_port field. Expect DST_PORT. */
byte = (__u8 *)&ctx->local_port;
if (byte[0] != ((DST_PORT >> 0) & 0xff) ||
byte[1] != ((DST_PORT >> 8) & 0xff) ||
byte[2] != 0 || byte[3] != 0)
return SK_DROP;
half = (__u16 *)&ctx->local_port;
if (half[0] != DST_PORT)
return SK_DROP;
/* Narrow loads from IPv4 fields */
if (v4) {
/* Expect non-0.0.0.0 in remote_ip4 */
byte = (__u8 *)&ctx->remote_ip4;
if (byte[0] == 0 && byte[1] == 0 &&
byte[2] == 0 && byte[3] == 0)
return SK_DROP;
half = (__u16 *)&ctx->remote_ip4;
if (half[0] == 0 && half[1] == 0)
return SK_DROP;
/* Expect DST_IP4 in local_ip4 */
byte = (__u8 *)&ctx->local_ip4;
if (byte[0] != ((DST_IP4 >> 0) & 0xff) ||
byte[1] != ((DST_IP4 >> 8) & 0xff) ||
byte[2] != ((DST_IP4 >> 16) & 0xff) ||
byte[3] != ((DST_IP4 >> 24) & 0xff))
return SK_DROP;
half = (__u16 *)&ctx->local_ip4;
if (half[0] != ((DST_IP4 >> 0) & 0xffff) ||
half[1] != ((DST_IP4 >> 16) & 0xffff))
return SK_DROP;
} else {
/* Expect 0.0.0.0 IPs when family != AF_INET */
byte = (__u8 *)&ctx->remote_ip4;
if (byte[0] != 0 || byte[1] != 0 &&
byte[2] != 0 || byte[3] != 0)
return SK_DROP;
half = (__u16 *)&ctx->remote_ip4;
if (half[0] != 0 || half[1] != 0)
return SK_DROP;
byte = (__u8 *)&ctx->local_ip4;
if (byte[0] != 0 || byte[1] != 0 &&
byte[2] != 0 || byte[3] != 0)
return SK_DROP;
half = (__u16 *)&ctx->local_ip4;
if (half[0] != 0 || half[1] != 0)
return SK_DROP;
}
/* Narrow loads from IPv6 fields */
if (!v4) {
/* Expenct non-:: IP in remote_ip6 */
byte = (__u8 *)&ctx->remote_ip6;
if (byte[0] == 0 && byte[1] == 0 &&
byte[2] == 0 && byte[3] == 0 &&
byte[4] == 0 && byte[5] == 0 &&
byte[6] == 0 && byte[7] == 0 &&
byte[8] == 0 && byte[9] == 0 &&
byte[10] == 0 && byte[11] == 0 &&
byte[12] == 0 && byte[13] == 0 &&
byte[14] == 0 && byte[15] == 0)
return SK_DROP;
half = (__u16 *)&ctx->remote_ip6;
if (half[0] == 0 && half[1] == 0 &&
half[2] == 0 && half[3] == 0 &&
half[4] == 0 && half[5] == 0 &&
half[6] == 0 && half[7] == 0)
return SK_DROP;
/* Expect DST_IP6 in local_ip6 */
byte = (__u8 *)&ctx->local_ip6;
if (byte[0] != ((DST_IP6[0] >> 0) & 0xff) ||
byte[1] != ((DST_IP6[0] >> 8) & 0xff) ||
byte[2] != ((DST_IP6[0] >> 16) & 0xff) ||
byte[3] != ((DST_IP6[0] >> 24) & 0xff) ||
byte[4] != ((DST_IP6[1] >> 0) & 0xff) ||
byte[5] != ((DST_IP6[1] >> 8) & 0xff) ||
byte[6] != ((DST_IP6[1] >> 16) & 0xff) ||
byte[7] != ((DST_IP6[1] >> 24) & 0xff) ||
byte[8] != ((DST_IP6[2] >> 0) & 0xff) ||
byte[9] != ((DST_IP6[2] >> 8) & 0xff) ||
byte[10] != ((DST_IP6[2] >> 16) & 0xff) ||
byte[11] != ((DST_IP6[2] >> 24) & 0xff) ||
byte[12] != ((DST_IP6[3] >> 0) & 0xff) ||
byte[13] != ((DST_IP6[3] >> 8) & 0xff) ||
byte[14] != ((DST_IP6[3] >> 16) & 0xff) ||
byte[15] != ((DST_IP6[3] >> 24) & 0xff))
return SK_DROP;
half = (__u16 *)&ctx->local_ip6;
if (half[0] != ((DST_IP6[0] >> 0) & 0xffff) ||
half[1] != ((DST_IP6[0] >> 16) & 0xffff) ||
half[2] != ((DST_IP6[1] >> 0) & 0xffff) ||
half[3] != ((DST_IP6[1] >> 16) & 0xffff) ||
half[4] != ((DST_IP6[2] >> 0) & 0xffff) ||
half[5] != ((DST_IP6[2] >> 16) & 0xffff) ||
half[6] != ((DST_IP6[3] >> 0) & 0xffff) ||
half[7] != ((DST_IP6[3] >> 16) & 0xffff))
return SK_DROP;
} else {
/* Expect :: IPs when family != AF_INET6 */
byte = (__u8 *)&ctx->remote_ip6;
if (byte[0] != 0 || byte[1] != 0 ||
byte[2] != 0 || byte[3] != 0 ||
byte[4] != 0 || byte[5] != 0 ||
byte[6] != 0 || byte[7] != 0 ||
byte[8] != 0 || byte[9] != 0 ||
byte[10] != 0 || byte[11] != 0 ||
byte[12] != 0 || byte[13] != 0 ||
byte[14] != 0 || byte[15] != 0)
return SK_DROP;
half = (__u16 *)&ctx->remote_ip6;
if (half[0] != 0 || half[1] != 0 ||
half[2] != 0 || half[3] != 0 ||
half[4] != 0 || half[5] != 0 ||
half[6] != 0 || half[7] != 0)
return SK_DROP;
byte = (__u8 *)&ctx->local_ip6;
if (byte[0] != 0 || byte[1] != 0 ||
byte[2] != 0 || byte[3] != 0 ||
byte[4] != 0 || byte[5] != 0 ||
byte[6] != 0 || byte[7] != 0 ||
byte[8] != 0 || byte[9] != 0 ||
byte[10] != 0 || byte[11] != 0 ||
byte[12] != 0 || byte[13] != 0 ||
byte[14] != 0 || byte[15] != 0)
return SK_DROP;
half = (__u16 *)&ctx->local_ip6;
if (half[0] != 0 || half[1] != 0 ||
half[2] != 0 || half[3] != 0 ||
half[4] != 0 || half[5] != 0 ||
half[6] != 0 || half[7] != 0)
return SK_DROP;
}
/* Success, redirect to KEY_SERVER_B */
sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
if (sk) {
bpf_sk_assign(ctx, sk, 0);
bpf_sk_release(sk);
}
return SK_PASS;
}
/* Check that sk_assign rejects SERVER_A socket with -ESOCKNOSUPPORT */
SEC("sk_lookup/sk_assign_esocknosupport")
int sk_assign_esocknosupport(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
int err, ret;
ret = SK_DROP;
sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
if (!sk)
goto out;
err = bpf_sk_assign(ctx, sk, 0);
if (err != -ESOCKTNOSUPPORT) {
bpf_printk("sk_assign returned %d, expected %d\n",
err, -ESOCKTNOSUPPORT);
goto out;
}
ret = SK_PASS; /* Success, pass to regular lookup */
out:
if (sk)
bpf_sk_release(sk);
return ret;
}
SEC("sk_lookup/multi_prog_pass1")
int multi_prog_pass1(struct bpf_sk_lookup *ctx)
{
bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
return SK_PASS;
}
SEC("sk_lookup/multi_prog_pass2")
int multi_prog_pass2(struct bpf_sk_lookup *ctx)
{
bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
return SK_PASS;
}
SEC("sk_lookup/multi_prog_drop1")
int multi_prog_drop1(struct bpf_sk_lookup *ctx)
{
bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
return SK_DROP;
}
SEC("sk_lookup/multi_prog_drop2")
int multi_prog_drop2(struct bpf_sk_lookup *ctx)
{
bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
return SK_DROP;
}
static __always_inline int select_server_a(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
int err;
sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
if (!sk)
return SK_DROP;
err = bpf_sk_assign(ctx, sk, 0);
bpf_sk_release(sk);
if (err)
return SK_DROP;
return SK_PASS;
}
SEC("sk_lookup/multi_prog_redir1")
int multi_prog_redir1(struct bpf_sk_lookup *ctx)
{
int ret;
ret = select_server_a(ctx);
bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
return SK_PASS;
}
SEC("sk_lookup/multi_prog_redir2")
int multi_prog_redir2(struct bpf_sk_lookup *ctx)
{
int ret;
ret = select_server_a(ctx);
bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
return SK_PASS;
}
char _license[] SEC("license") = "Dual BSD/GPL";
__u32 _version SEC("version") = 1;