Merge changes from topic "netbpfload" into main
* changes: netbpfload: remove netbpfload.rc netbpfload: remove support for limiting selinux contexts netbpfload: do not create /sys/fs/bpf/loader subdir type safety for 'bool downstream' type safety for 'bool updatetime'
This commit is contained in:
@@ -91,14 +91,14 @@ struct egress_bool { bool egress; };
|
|||||||
#define INGRESS ((struct egress_bool){ .egress = false })
|
#define INGRESS ((struct egress_bool){ .egress = false })
|
||||||
#define EGRESS ((struct egress_bool){ .egress = true })
|
#define EGRESS ((struct egress_bool){ .egress = true })
|
||||||
|
|
||||||
// constants for passing in to 'bool downstream'
|
struct stream_bool { bool down; };
|
||||||
static const bool UPSTREAM = false;
|
#define UPSTREAM ((struct stream_bool){ .down = false })
|
||||||
static const bool DOWNSTREAM = true;
|
#define DOWNSTREAM ((struct stream_bool){ .down = true })
|
||||||
|
|
||||||
struct rawip_bool { bool rawip; };
|
struct rawip_bool { bool rawip; };
|
||||||
#define ETHER ((struct rawip_bool){ .rawip = false })
|
#define ETHER ((struct rawip_bool){ .rawip = false })
|
||||||
#define RAWIP ((struct rawip_bool){ .rawip = true })
|
#define RAWIP ((struct rawip_bool){ .rawip = true })
|
||||||
|
|
||||||
// constants for passing in to 'bool updatetime'
|
struct updatetime_bool { bool updatetime; };
|
||||||
static const bool NO_UPDATETIME = false;
|
#define NO_UPDATETIME ((struct updatetime_bool){ .updatetime = false })
|
||||||
static const bool UPDATETIME = true;
|
#define UPDATETIME ((struct updatetime_bool){ .updatetime = true })
|
||||||
|
|||||||
@@ -126,7 +126,7 @@ DEFINE_BPF_MAP_GRW(tether_upstream6_map, HASH, TetherUpstream6Key, Tether6Value,
|
|||||||
|
|
||||||
static inline __always_inline int do_forward6(struct __sk_buff* skb,
|
static inline __always_inline int do_forward6(struct __sk_buff* skb,
|
||||||
const struct rawip_bool rawip,
|
const struct rawip_bool rawip,
|
||||||
const bool downstream,
|
const struct stream_bool stream,
|
||||||
const struct kver_uint kver) {
|
const struct kver_uint kver) {
|
||||||
const bool is_ethernet = !rawip.rawip;
|
const bool is_ethernet = !rawip.rawip;
|
||||||
|
|
||||||
@@ -188,7 +188,7 @@ static inline __always_inline int do_forward6(struct __sk_buff* skb,
|
|||||||
TC_PUNT(NON_GLOBAL_DST);
|
TC_PUNT(NON_GLOBAL_DST);
|
||||||
|
|
||||||
// In the upstream direction do not forward traffic within the same /64 subnet.
|
// In the upstream direction do not forward traffic within the same /64 subnet.
|
||||||
if (!downstream && (src32 == dst32) && (ip6->saddr.s6_addr32[1] == ip6->daddr.s6_addr32[1]))
|
if (!stream.down && (src32 == dst32) && (ip6->saddr.s6_addr32[1] == ip6->daddr.s6_addr32[1]))
|
||||||
TC_PUNT(LOCAL_SRC_DST);
|
TC_PUNT(LOCAL_SRC_DST);
|
||||||
|
|
||||||
TetherDownstream6Key kd = {
|
TetherDownstream6Key kd = {
|
||||||
@@ -200,15 +200,15 @@ static inline __always_inline int do_forward6(struct __sk_buff* skb,
|
|||||||
.iif = skb->ifindex,
|
.iif = skb->ifindex,
|
||||||
.src64 = 0,
|
.src64 = 0,
|
||||||
};
|
};
|
||||||
if (is_ethernet) __builtin_memcpy(downstream ? kd.dstMac : ku.dstMac, eth->h_dest, ETH_ALEN);
|
if (is_ethernet) __builtin_memcpy(stream.down ? kd.dstMac : ku.dstMac, eth->h_dest, ETH_ALEN);
|
||||||
|
|
||||||
Tether6Value* v = downstream ? bpf_tether_downstream6_map_lookup_elem(&kd)
|
Tether6Value* v = stream.down ? bpf_tether_downstream6_map_lookup_elem(&kd)
|
||||||
: bpf_tether_upstream6_map_lookup_elem(&ku);
|
: bpf_tether_upstream6_map_lookup_elem(&ku);
|
||||||
|
|
||||||
// If we don't find any offload information then simply let the core stack handle it...
|
// If we don't find any offload information then simply let the core stack handle it...
|
||||||
if (!v) return TC_ACT_PIPE;
|
if (!v) return TC_ACT_PIPE;
|
||||||
|
|
||||||
uint32_t stat_and_limit_k = downstream ? skb->ifindex : v->oif;
|
uint32_t stat_and_limit_k = stream.down ? skb->ifindex : v->oif;
|
||||||
|
|
||||||
TetherStatsValue* stat_v = bpf_tether_stats_map_lookup_elem(&stat_and_limit_k);
|
TetherStatsValue* stat_v = bpf_tether_stats_map_lookup_elem(&stat_and_limit_k);
|
||||||
|
|
||||||
@@ -253,7 +253,7 @@ static inline __always_inline int do_forward6(struct __sk_buff* skb,
|
|||||||
// We do this even if TX interface is RAWIP and thus does not need an ethernet header,
|
// We do this even if TX interface is RAWIP and thus does not need an ethernet header,
|
||||||
// because this is easier and the kernel will strip extraneous ethernet header.
|
// because this is easier and the kernel will strip extraneous ethernet header.
|
||||||
if (bpf_skb_change_head(skb, sizeof(struct ethhdr), /*flags*/ 0)) {
|
if (bpf_skb_change_head(skb, sizeof(struct ethhdr), /*flags*/ 0)) {
|
||||||
__sync_fetch_and_add(downstream ? &stat_v->rxErrors : &stat_v->txErrors, 1);
|
__sync_fetch_and_add(stream.down ? &stat_v->rxErrors : &stat_v->txErrors, 1);
|
||||||
TC_PUNT(CHANGE_HEAD_FAILED);
|
TC_PUNT(CHANGE_HEAD_FAILED);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -265,7 +265,7 @@ static inline __always_inline int do_forward6(struct __sk_buff* skb,
|
|||||||
|
|
||||||
// I do not believe this can ever happen, but keep the verifier happy...
|
// I do not believe this can ever happen, but keep the verifier happy...
|
||||||
if (data + sizeof(struct ethhdr) + sizeof(*ip6) > data_end) {
|
if (data + sizeof(struct ethhdr) + sizeof(*ip6) > data_end) {
|
||||||
__sync_fetch_and_add(downstream ? &stat_v->rxErrors : &stat_v->txErrors, 1);
|
__sync_fetch_and_add(stream.down ? &stat_v->rxErrors : &stat_v->txErrors, 1);
|
||||||
TC_DROP(TOO_SHORT);
|
TC_DROP(TOO_SHORT);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -285,8 +285,8 @@ static inline __always_inline int do_forward6(struct __sk_buff* skb,
|
|||||||
// (-ENOTSUPP) if it isn't.
|
// (-ENOTSUPP) if it isn't.
|
||||||
bpf_csum_update(skb, 0xFFFF - ntohs(old_hl) + ntohs(new_hl));
|
bpf_csum_update(skb, 0xFFFF - ntohs(old_hl) + ntohs(new_hl));
|
||||||
|
|
||||||
__sync_fetch_and_add(downstream ? &stat_v->rxPackets : &stat_v->txPackets, packets);
|
__sync_fetch_and_add(stream.down ? &stat_v->rxPackets : &stat_v->txPackets, packets);
|
||||||
__sync_fetch_and_add(downstream ? &stat_v->rxBytes : &stat_v->txBytes, L3_bytes);
|
__sync_fetch_and_add(stream.down ? &stat_v->rxBytes : &stat_v->txBytes, L3_bytes);
|
||||||
|
|
||||||
// Overwrite any mac header with the new one
|
// Overwrite any mac header with the new one
|
||||||
// For a rawip tx interface it will simply be a bunch of zeroes and later stripped.
|
// For a rawip tx interface it will simply be a bunch of zeroes and later stripped.
|
||||||
@@ -361,8 +361,8 @@ DEFINE_BPF_MAP_GRW(tether_upstream4_map, HASH, Tether4Key, Tether4Value, 1024, T
|
|||||||
static inline __always_inline int do_forward4_bottom(struct __sk_buff* skb,
|
static inline __always_inline int do_forward4_bottom(struct __sk_buff* skb,
|
||||||
const int l2_header_size, void* data, const void* data_end,
|
const int l2_header_size, void* data, const void* data_end,
|
||||||
struct ethhdr* eth, struct iphdr* ip, const struct rawip_bool rawip,
|
struct ethhdr* eth, struct iphdr* ip, const struct rawip_bool rawip,
|
||||||
const bool downstream, const bool updatetime, const bool is_tcp,
|
const struct stream_bool stream, const struct updatetime_bool updatetime,
|
||||||
const struct kver_uint kver) {
|
const bool is_tcp, const struct kver_uint kver) {
|
||||||
const bool is_ethernet = !rawip.rawip;
|
const bool is_ethernet = !rawip.rawip;
|
||||||
struct tcphdr* tcph = is_tcp ? (void*)(ip + 1) : NULL;
|
struct tcphdr* tcph = is_tcp ? (void*)(ip + 1) : NULL;
|
||||||
struct udphdr* udph = is_tcp ? NULL : (void*)(ip + 1);
|
struct udphdr* udph = is_tcp ? NULL : (void*)(ip + 1);
|
||||||
@@ -421,13 +421,13 @@ static inline __always_inline int do_forward4_bottom(struct __sk_buff* skb,
|
|||||||
};
|
};
|
||||||
if (is_ethernet) __builtin_memcpy(k.dstMac, eth->h_dest, ETH_ALEN);
|
if (is_ethernet) __builtin_memcpy(k.dstMac, eth->h_dest, ETH_ALEN);
|
||||||
|
|
||||||
Tether4Value* v = downstream ? bpf_tether_downstream4_map_lookup_elem(&k)
|
Tether4Value* v = stream.down ? bpf_tether_downstream4_map_lookup_elem(&k)
|
||||||
: bpf_tether_upstream4_map_lookup_elem(&k);
|
: bpf_tether_upstream4_map_lookup_elem(&k);
|
||||||
|
|
||||||
// If we don't find any offload information then simply let the core stack handle it...
|
// If we don't find any offload information then simply let the core stack handle it...
|
||||||
if (!v) return TC_ACT_PIPE;
|
if (!v) return TC_ACT_PIPE;
|
||||||
|
|
||||||
uint32_t stat_and_limit_k = downstream ? skb->ifindex : v->oif;
|
uint32_t stat_and_limit_k = stream.down ? skb->ifindex : v->oif;
|
||||||
|
|
||||||
TetherStatsValue* stat_v = bpf_tether_stats_map_lookup_elem(&stat_and_limit_k);
|
TetherStatsValue* stat_v = bpf_tether_stats_map_lookup_elem(&stat_and_limit_k);
|
||||||
|
|
||||||
@@ -472,7 +472,7 @@ static inline __always_inline int do_forward4_bottom(struct __sk_buff* skb,
|
|||||||
// We do this even if TX interface is RAWIP and thus does not need an ethernet header,
|
// We do this even if TX interface is RAWIP and thus does not need an ethernet header,
|
||||||
// because this is easier and the kernel will strip extraneous ethernet header.
|
// because this is easier and the kernel will strip extraneous ethernet header.
|
||||||
if (bpf_skb_change_head(skb, sizeof(struct ethhdr), /*flags*/ 0)) {
|
if (bpf_skb_change_head(skb, sizeof(struct ethhdr), /*flags*/ 0)) {
|
||||||
__sync_fetch_and_add(downstream ? &stat_v->rxErrors : &stat_v->txErrors, 1);
|
__sync_fetch_and_add(stream.down ? &stat_v->rxErrors : &stat_v->txErrors, 1);
|
||||||
TC_PUNT(CHANGE_HEAD_FAILED);
|
TC_PUNT(CHANGE_HEAD_FAILED);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -486,7 +486,7 @@ static inline __always_inline int do_forward4_bottom(struct __sk_buff* skb,
|
|||||||
|
|
||||||
// I do not believe this can ever happen, but keep the verifier happy...
|
// I do not believe this can ever happen, but keep the verifier happy...
|
||||||
if (data + sizeof(struct ethhdr) + sizeof(*ip) + (is_tcp ? sizeof(*tcph) : sizeof(*udph)) > data_end) {
|
if (data + sizeof(struct ethhdr) + sizeof(*ip) + (is_tcp ? sizeof(*tcph) : sizeof(*udph)) > data_end) {
|
||||||
__sync_fetch_and_add(downstream ? &stat_v->rxErrors : &stat_v->txErrors, 1);
|
__sync_fetch_and_add(stream.down ? &stat_v->rxErrors : &stat_v->txErrors, 1);
|
||||||
TC_DROP(TOO_SHORT);
|
TC_DROP(TOO_SHORT);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -538,10 +538,10 @@ static inline __always_inline int do_forward4_bottom(struct __sk_buff* skb,
|
|||||||
|
|
||||||
// This requires the bpf_ktime_get_boot_ns() helper which was added in 5.8,
|
// This requires the bpf_ktime_get_boot_ns() helper which was added in 5.8,
|
||||||
// and backported to all Android Common Kernel 4.14+ trees.
|
// and backported to all Android Common Kernel 4.14+ trees.
|
||||||
if (updatetime) v->last_used = bpf_ktime_get_boot_ns();
|
if (updatetime.updatetime) v->last_used = bpf_ktime_get_boot_ns();
|
||||||
|
|
||||||
__sync_fetch_and_add(downstream ? &stat_v->rxPackets : &stat_v->txPackets, packets);
|
__sync_fetch_and_add(stream.down ? &stat_v->rxPackets : &stat_v->txPackets, packets);
|
||||||
__sync_fetch_and_add(downstream ? &stat_v->rxBytes : &stat_v->txBytes, L3_bytes);
|
__sync_fetch_and_add(stream.down ? &stat_v->rxBytes : &stat_v->txBytes, L3_bytes);
|
||||||
|
|
||||||
// Redirect to forwarded interface.
|
// Redirect to forwarded interface.
|
||||||
//
|
//
|
||||||
@@ -554,8 +554,8 @@ static inline __always_inline int do_forward4_bottom(struct __sk_buff* skb,
|
|||||||
|
|
||||||
static inline __always_inline int do_forward4(struct __sk_buff* skb,
|
static inline __always_inline int do_forward4(struct __sk_buff* skb,
|
||||||
const struct rawip_bool rawip,
|
const struct rawip_bool rawip,
|
||||||
const bool downstream,
|
const struct stream_bool stream,
|
||||||
const bool updatetime,
|
const struct updatetime_bool updatetime,
|
||||||
const struct kver_uint kver) {
|
const struct kver_uint kver) {
|
||||||
const bool is_ethernet = !rawip.rawip;
|
const bool is_ethernet = !rawip.rawip;
|
||||||
|
|
||||||
@@ -616,16 +616,16 @@ static inline __always_inline int do_forward4(struct __sk_buff* skb,
|
|||||||
// in such a situation we can only support TCP. This also has the added nice benefit of
|
// in such a situation we can only support TCP. This also has the added nice benefit of
|
||||||
// using a separate error counter, and thus making it obvious which version of the program
|
// using a separate error counter, and thus making it obvious which version of the program
|
||||||
// is loaded.
|
// is loaded.
|
||||||
if (!updatetime && ip->protocol != IPPROTO_TCP) TC_PUNT(NON_TCP);
|
if (!updatetime.updatetime && ip->protocol != IPPROTO_TCP) TC_PUNT(NON_TCP);
|
||||||
|
|
||||||
// We do not support offloading anything besides IPv4 TCP and UDP, due to need for NAT,
|
// We do not support offloading anything besides IPv4 TCP and UDP, due to need for NAT,
|
||||||
// but no need to check this if !updatetime due to check immediately above.
|
// but no need to check this if !updatetime due to check immediately above.
|
||||||
if (updatetime && (ip->protocol != IPPROTO_TCP) && (ip->protocol != IPPROTO_UDP))
|
if (updatetime.updatetime && (ip->protocol != IPPROTO_TCP) && (ip->protocol != IPPROTO_UDP))
|
||||||
TC_PUNT(NON_TCP_UDP);
|
TC_PUNT(NON_TCP_UDP);
|
||||||
|
|
||||||
// We want to make sure that the compiler will, in the !updatetime case, entirely optimize
|
// We want to make sure that the compiler will, in the !updatetime case, entirely optimize
|
||||||
// out all the non-tcp logic. Also note that at this point is_udp === !is_tcp.
|
// out all the non-tcp logic. Also note that at this point is_udp === !is_tcp.
|
||||||
const bool is_tcp = !updatetime || (ip->protocol == IPPROTO_TCP);
|
const bool is_tcp = !updatetime.updatetime || (ip->protocol == IPPROTO_TCP);
|
||||||
|
|
||||||
// This is a bit of a hack to make things easier on the bpf verifier.
|
// This is a bit of a hack to make things easier on the bpf verifier.
|
||||||
// (In particular I believe the Linux 4.14 kernel's verifier can get confused later on about
|
// (In particular I believe the Linux 4.14 kernel's verifier can get confused later on about
|
||||||
@@ -646,10 +646,10 @@ static inline __always_inline int do_forward4(struct __sk_buff* skb,
|
|||||||
// if the underlying requisite kernel support (bpf_ktime_get_boot_ns) was backported.
|
// if the underlying requisite kernel support (bpf_ktime_get_boot_ns) was backported.
|
||||||
if (is_tcp) {
|
if (is_tcp) {
|
||||||
return do_forward4_bottom(skb, l2_header_size, data, data_end, eth, ip,
|
return do_forward4_bottom(skb, l2_header_size, data, data_end, eth, ip,
|
||||||
rawip, downstream, updatetime, /* is_tcp */ true, kver);
|
rawip, stream, updatetime, /* is_tcp */ true, kver);
|
||||||
} else {
|
} else {
|
||||||
return do_forward4_bottom(skb, l2_header_size, data, data_end, eth, ip,
|
return do_forward4_bottom(skb, l2_header_size, data, data_end, eth, ip,
|
||||||
rawip, downstream, updatetime, /* is_tcp */ false, kver);
|
rawip, stream, updatetime, /* is_tcp */ false, kver);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -808,16 +808,17 @@ DEFINE_BPF_PROG_KVER_RANGE("schedcls/tether_upstream4_ether$stub", TETHERING_UID
|
|||||||
DEFINE_BPF_MAP_GRW(tether_dev_map, DEVMAP_HASH, uint32_t, uint32_t, 64, TETHERING_GID)
|
DEFINE_BPF_MAP_GRW(tether_dev_map, DEVMAP_HASH, uint32_t, uint32_t, 64, TETHERING_GID)
|
||||||
|
|
||||||
static inline __always_inline int do_xdp_forward6(struct xdp_md *ctx, const struct rawip_bool rawip,
|
static inline __always_inline int do_xdp_forward6(struct xdp_md *ctx, const struct rawip_bool rawip,
|
||||||
const bool downstream) {
|
const struct stream_bool stream) {
|
||||||
return XDP_PASS;
|
return XDP_PASS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __always_inline int do_xdp_forward4(struct xdp_md *ctx, const struct rawip_bool rawip,
|
static inline __always_inline int do_xdp_forward4(struct xdp_md *ctx, const struct rawip_bool rawip,
|
||||||
const bool downstream) {
|
const struct stream_bool stream) {
|
||||||
return XDP_PASS;
|
return XDP_PASS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __always_inline int do_xdp_forward_ether(struct xdp_md *ctx, const bool downstream) {
|
static inline __always_inline int do_xdp_forward_ether(struct xdp_md *ctx,
|
||||||
|
const struct stream_bool stream) {
|
||||||
const void* data = (void*)(long)ctx->data;
|
const void* data = (void*)(long)ctx->data;
|
||||||
const void* data_end = (void*)(long)ctx->data_end;
|
const void* data_end = (void*)(long)ctx->data_end;
|
||||||
const struct ethhdr* eth = data;
|
const struct ethhdr* eth = data;
|
||||||
@@ -826,15 +827,16 @@ static inline __always_inline int do_xdp_forward_ether(struct xdp_md *ctx, const
|
|||||||
if ((void*)(eth + 1) > data_end) return XDP_PASS;
|
if ((void*)(eth + 1) > data_end) return XDP_PASS;
|
||||||
|
|
||||||
if (eth->h_proto == htons(ETH_P_IPV6))
|
if (eth->h_proto == htons(ETH_P_IPV6))
|
||||||
return do_xdp_forward6(ctx, ETHER, downstream);
|
return do_xdp_forward6(ctx, ETHER, stream);
|
||||||
if (eth->h_proto == htons(ETH_P_IP))
|
if (eth->h_proto == htons(ETH_P_IP))
|
||||||
return do_xdp_forward4(ctx, ETHER, downstream);
|
return do_xdp_forward4(ctx, ETHER, stream);
|
||||||
|
|
||||||
// Anything else we don't know how to handle...
|
// Anything else we don't know how to handle...
|
||||||
return XDP_PASS;
|
return XDP_PASS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __always_inline int do_xdp_forward_rawip(struct xdp_md *ctx, const bool downstream) {
|
static inline __always_inline int do_xdp_forward_rawip(struct xdp_md *ctx,
|
||||||
|
const struct stream_bool stream) {
|
||||||
const void* data = (void*)(long)ctx->data;
|
const void* data = (void*)(long)ctx->data;
|
||||||
const void* data_end = (void*)(long)ctx->data_end;
|
const void* data_end = (void*)(long)ctx->data_end;
|
||||||
|
|
||||||
@@ -842,8 +844,8 @@ static inline __always_inline int do_xdp_forward_rawip(struct xdp_md *ctx, const
|
|||||||
if (data_end - data < 1) return XDP_PASS;
|
if (data_end - data < 1) return XDP_PASS;
|
||||||
const uint8_t v = (*(uint8_t*)data) >> 4;
|
const uint8_t v = (*(uint8_t*)data) >> 4;
|
||||||
|
|
||||||
if (v == 6) return do_xdp_forward6(ctx, RAWIP, downstream);
|
if (v == 6) return do_xdp_forward6(ctx, RAWIP, stream);
|
||||||
if (v == 4) return do_xdp_forward4(ctx, RAWIP, downstream);
|
if (v == 4) return do_xdp_forward4(ctx, RAWIP, stream);
|
||||||
|
|
||||||
// Anything else we don't know how to handle...
|
// Anything else we don't know how to handle...
|
||||||
return XDP_PASS;
|
return XDP_PASS;
|
||||||
|
|||||||
@@ -36,6 +36,4 @@ cc_binary {
|
|||||||
"loader.cpp",
|
"loader.cpp",
|
||||||
"NetBpfLoad.cpp",
|
"NetBpfLoad.cpp",
|
||||||
],
|
],
|
||||||
|
|
||||||
init_rc: ["netbpfload.rc"],
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -65,46 +65,34 @@ bool exists(const char* const path) {
|
|||||||
abort(); // can only hit this if permissions (likely selinux) are screwed up
|
abort(); // can only hit this if permissions (likely selinux) are screwed up
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr unsigned long long kTetheringApexDomainBitmask =
|
|
||||||
domainToBitmask(domain::tethering) |
|
|
||||||
domainToBitmask(domain::net_private) |
|
|
||||||
domainToBitmask(domain::net_shared) |
|
|
||||||
domainToBitmask(domain::netd_readonly) |
|
|
||||||
domainToBitmask(domain::netd_shared);
|
|
||||||
|
|
||||||
|
|
||||||
const android::bpf::Location locations[] = {
|
const android::bpf::Location locations[] = {
|
||||||
// S+ Tethering mainline module (network_stack): tether offload
|
// S+ Tethering mainline module (network_stack): tether offload
|
||||||
{
|
{
|
||||||
.dir = "/apex/com.android.tethering/etc/bpf/",
|
.dir = "/apex/com.android.tethering/etc/bpf/",
|
||||||
.prefix = "tethering/",
|
.prefix = "tethering/",
|
||||||
.allowedDomainBitmask = kTetheringApexDomainBitmask,
|
|
||||||
},
|
},
|
||||||
// T+ Tethering mainline module (shared with netd & system server)
|
// T+ Tethering mainline module (shared with netd & system server)
|
||||||
// netutils_wrapper (for iptables xt_bpf) has access to programs
|
// netutils_wrapper (for iptables xt_bpf) has access to programs
|
||||||
{
|
{
|
||||||
.dir = "/apex/com.android.tethering/etc/bpf/netd_shared/",
|
.dir = "/apex/com.android.tethering/etc/bpf/netd_shared/",
|
||||||
.prefix = "netd_shared/",
|
.prefix = "netd_shared/",
|
||||||
.allowedDomainBitmask = kTetheringApexDomainBitmask,
|
|
||||||
},
|
},
|
||||||
// T+ Tethering mainline module (shared with netd & system server)
|
// T+ Tethering mainline module (shared with netd & system server)
|
||||||
// netutils_wrapper has no access, netd has read only access
|
// netutils_wrapper has no access, netd has read only access
|
||||||
{
|
{
|
||||||
.dir = "/apex/com.android.tethering/etc/bpf/netd_readonly/",
|
.dir = "/apex/com.android.tethering/etc/bpf/netd_readonly/",
|
||||||
.prefix = "netd_readonly/",
|
.prefix = "netd_readonly/",
|
||||||
.allowedDomainBitmask = kTetheringApexDomainBitmask,
|
|
||||||
},
|
},
|
||||||
// T+ Tethering mainline module (shared with system server)
|
// T+ Tethering mainline module (shared with system server)
|
||||||
{
|
{
|
||||||
.dir = "/apex/com.android.tethering/etc/bpf/net_shared/",
|
.dir = "/apex/com.android.tethering/etc/bpf/net_shared/",
|
||||||
.prefix = "net_shared/",
|
.prefix = "net_shared/",
|
||||||
.allowedDomainBitmask = kTetheringApexDomainBitmask,
|
|
||||||
},
|
},
|
||||||
// T+ Tethering mainline module (not shared, just network_stack)
|
// T+ Tethering mainline module (not shared, just network_stack)
|
||||||
{
|
{
|
||||||
.dir = "/apex/com.android.tethering/etc/bpf/net_private/",
|
.dir = "/apex/com.android.tethering/etc/bpf/net_private/",
|
||||||
.prefix = "net_private/",
|
.prefix = "net_private/",
|
||||||
.allowedDomainBitmask = kTetheringApexDomainBitmask,
|
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -247,13 +235,6 @@ int main(int argc, char** argv) {
|
|||||||
if (createSysFsBpfSubDir(location.prefix)) return 1;
|
if (createSysFsBpfSubDir(location.prefix)) return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note: there's no actual src dir for fs_bpf_loader .o's,
|
|
||||||
// so it is not listed in 'locations[].prefix'.
|
|
||||||
// This is because this is primarily meant for triggering genfscon rules,
|
|
||||||
// and as such this will likely always be the case.
|
|
||||||
// Thus we need to manually create the /sys/fs/bpf/loader subdirectory.
|
|
||||||
if (createSysFsBpfSubDir("loader")) return 1;
|
|
||||||
|
|
||||||
// Load all ELF objects, create programs and maps, and pin them
|
// Load all ELF objects, create programs and maps, and pin them
|
||||||
for (const auto& location : locations) {
|
for (const auto& location : locations) {
|
||||||
if (loadAllElfObjects(location) != 0) {
|
if (loadAllElfObjects(location) != 0) {
|
||||||
|
|||||||
@@ -621,8 +621,7 @@ static bool mapMatchesExpectations(const unique_fd& fd, const string& mapName,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int createMaps(const char* elfPath, ifstream& elfFile, vector<unique_fd>& mapFds,
|
static int createMaps(const char* elfPath, ifstream& elfFile, vector<unique_fd>& mapFds,
|
||||||
const char* prefix, const unsigned long long allowedDomainBitmask,
|
const char* prefix, const size_t sizeOfBpfMapDef) {
|
||||||
const size_t sizeOfBpfMapDef) {
|
|
||||||
int ret;
|
int ret;
|
||||||
vector<char> mdData;
|
vector<char> mdData;
|
||||||
vector<struct bpf_map_def> md;
|
vector<struct bpf_map_def> md;
|
||||||
@@ -733,11 +732,6 @@ static int createMaps(const char* elfPath, ifstream& elfFile, vector<unique_fd>&
|
|||||||
|
|
||||||
domain selinux_context = getDomainFromSelinuxContext(md[i].selinux_context);
|
domain selinux_context = getDomainFromSelinuxContext(md[i].selinux_context);
|
||||||
if (specified(selinux_context)) {
|
if (specified(selinux_context)) {
|
||||||
if (!inDomainBitmask(selinux_context, allowedDomainBitmask)) {
|
|
||||||
ALOGE("map %s has invalid selinux_context of %d (allowed bitmask 0x%llx)",
|
|
||||||
mapNames[i].c_str(), selinux_context, allowedDomainBitmask);
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
ALOGI("map %s selinux_context [%-32s] -> %d -> '%s' (%s)", mapNames[i].c_str(),
|
ALOGI("map %s selinux_context [%-32s] -> %d -> '%s' (%s)", mapNames[i].c_str(),
|
||||||
md[i].selinux_context, selinux_context, lookupSelinuxContext(selinux_context),
|
md[i].selinux_context, selinux_context, lookupSelinuxContext(selinux_context),
|
||||||
lookupPinSubdir(selinux_context));
|
lookupPinSubdir(selinux_context));
|
||||||
@@ -746,11 +740,6 @@ static int createMaps(const char* elfPath, ifstream& elfFile, vector<unique_fd>&
|
|||||||
domain pin_subdir = getDomainFromPinSubdir(md[i].pin_subdir);
|
domain pin_subdir = getDomainFromPinSubdir(md[i].pin_subdir);
|
||||||
if (unrecognized(pin_subdir)) return -ENOTDIR;
|
if (unrecognized(pin_subdir)) return -ENOTDIR;
|
||||||
if (specified(pin_subdir)) {
|
if (specified(pin_subdir)) {
|
||||||
if (!inDomainBitmask(pin_subdir, allowedDomainBitmask)) {
|
|
||||||
ALOGE("map %s has invalid pin_subdir of %d (allowed bitmask 0x%llx)",
|
|
||||||
mapNames[i].c_str(), pin_subdir, allowedDomainBitmask);
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
ALOGI("map %s pin_subdir [%-32s] -> %d -> '%s'", mapNames[i].c_str(), md[i].pin_subdir,
|
ALOGI("map %s pin_subdir [%-32s] -> %d -> '%s'", mapNames[i].c_str(), md[i].pin_subdir,
|
||||||
pin_subdir, lookupPinSubdir(pin_subdir));
|
pin_subdir, lookupPinSubdir(pin_subdir));
|
||||||
}
|
}
|
||||||
@@ -921,7 +910,7 @@ static void applyMapRelo(ifstream& elfFile, vector<unique_fd> &mapFds, vector<co
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int loadCodeSections(const char* elfPath, vector<codeSection>& cs, const string& license,
|
static int loadCodeSections(const char* elfPath, vector<codeSection>& cs, const string& license,
|
||||||
const char* prefix, const unsigned long long allowedDomainBitmask) {
|
const char* prefix) {
|
||||||
unsigned kvers = kernelVersion();
|
unsigned kvers = kernelVersion();
|
||||||
|
|
||||||
if (!kvers) {
|
if (!kvers) {
|
||||||
@@ -980,22 +969,12 @@ static int loadCodeSections(const char* elfPath, vector<codeSection>& cs, const
|
|||||||
if (unrecognized(pin_subdir)) return -ENOTDIR;
|
if (unrecognized(pin_subdir)) return -ENOTDIR;
|
||||||
|
|
||||||
if (specified(selinux_context)) {
|
if (specified(selinux_context)) {
|
||||||
if (!inDomainBitmask(selinux_context, allowedDomainBitmask)) {
|
|
||||||
ALOGE("prog %s has invalid selinux_context of %d (allowed bitmask 0x%llx)",
|
|
||||||
name.c_str(), selinux_context, allowedDomainBitmask);
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
ALOGI("prog %s selinux_context [%-32s] -> %d -> '%s' (%s)", name.c_str(),
|
ALOGI("prog %s selinux_context [%-32s] -> %d -> '%s' (%s)", name.c_str(),
|
||||||
cs[i].prog_def->selinux_context, selinux_context,
|
cs[i].prog_def->selinux_context, selinux_context,
|
||||||
lookupSelinuxContext(selinux_context), lookupPinSubdir(selinux_context));
|
lookupSelinuxContext(selinux_context), lookupPinSubdir(selinux_context));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (specified(pin_subdir)) {
|
if (specified(pin_subdir)) {
|
||||||
if (!inDomainBitmask(pin_subdir, allowedDomainBitmask)) {
|
|
||||||
ALOGE("prog %s has invalid pin_subdir of %d (allowed bitmask 0x%llx)", name.c_str(),
|
|
||||||
pin_subdir, allowedDomainBitmask);
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
ALOGI("prog %s pin_subdir [%-32s] -> %d -> '%s'", name.c_str(),
|
ALOGI("prog %s pin_subdir [%-32s] -> %d -> '%s'", name.c_str(),
|
||||||
cs[i].prog_def->pin_subdir, pin_subdir, lookupPinSubdir(pin_subdir));
|
cs[i].prog_def->pin_subdir, pin_subdir, lookupPinSubdir(pin_subdir));
|
||||||
}
|
}
|
||||||
@@ -1185,8 +1164,7 @@ int loadProg(const char* elfPath, bool* isCritical, const Location& location) {
|
|||||||
/* Just for future debugging */
|
/* Just for future debugging */
|
||||||
if (0) dumpAllCs(cs);
|
if (0) dumpAllCs(cs);
|
||||||
|
|
||||||
ret = createMaps(elfPath, elfFile, mapFds, location.prefix, location.allowedDomainBitmask,
|
ret = createMaps(elfPath, elfFile, mapFds, location.prefix, sizeOfBpfMapDef);
|
||||||
sizeOfBpfMapDef);
|
|
||||||
if (ret) {
|
if (ret) {
|
||||||
ALOGE("Failed to create maps: (ret=%d) in %s", ret, elfPath);
|
ALOGE("Failed to create maps: (ret=%d) in %s", ret, elfPath);
|
||||||
return ret;
|
return ret;
|
||||||
@@ -1197,8 +1175,7 @@ int loadProg(const char* elfPath, bool* isCritical, const Location& location) {
|
|||||||
|
|
||||||
applyMapRelo(elfFile, mapFds, cs);
|
applyMapRelo(elfFile, mapFds, cs);
|
||||||
|
|
||||||
ret = loadCodeSections(elfPath, cs, string(license.data()), location.prefix,
|
ret = loadCodeSections(elfPath, cs, string(license.data()), location.prefix);
|
||||||
location.allowedDomainBitmask);
|
|
||||||
if (ret) ALOGE("Failed to load programs, loadCodeSections ret=%d", ret);
|
if (ret) ALOGE("Failed to load programs, loadCodeSections ret=%d", ret);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
|||||||
@@ -64,18 +64,9 @@ static constexpr bool specified(domain d) {
|
|||||||
return d != domain::unspecified;
|
return d != domain::unspecified;
|
||||||
}
|
}
|
||||||
|
|
||||||
static constexpr unsigned long long domainToBitmask(domain d) {
|
|
||||||
return specified(d) ? 1uLL << (static_cast<int>(d) - 1) : 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static constexpr bool inDomainBitmask(domain d, unsigned long long v) {
|
|
||||||
return domainToBitmask(d) & v;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct Location {
|
struct Location {
|
||||||
const char* const dir = "";
|
const char* const dir = "";
|
||||||
const char* const prefix = "";
|
const char* const prefix = "";
|
||||||
unsigned long long allowedDomainBitmask = 0;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// BPF loader implementation. Loads an eBPF ELF object
|
// BPF loader implementation. Loads an eBPF ELF object
|
||||||
|
|||||||
@@ -1,85 +0,0 @@
|
|||||||
# zygote-start is what officially starts netd (see //system/core/rootdir/init.rc)
|
|
||||||
# However, on some hardware it's started from post-fs-data as well, which is just
|
|
||||||
# a tad earlier. There's no benefit to that though, since on 4.9+ P+ devices netd
|
|
||||||
# will just block until bpfloader finishes and sets the bpf.progs_loaded property.
|
|
||||||
#
|
|
||||||
# It is important that we start netbpfload after:
|
|
||||||
# - /sys/fs/bpf is already mounted,
|
|
||||||
# - apex (incl. rollback) is initialized (so that in the future we can load bpf
|
|
||||||
# programs shipped as part of apex mainline modules)
|
|
||||||
# - logd is ready for us to log stuff
|
|
||||||
#
|
|
||||||
# At the same time we want to be as early as possible to reduce races and thus
|
|
||||||
# failures (before memory is fragmented, and cpu is busy running tons of other
|
|
||||||
# stuff) and we absolutely want to be before netd and the system boot slot is
|
|
||||||
# considered to have booted successfully.
|
|
||||||
#
|
|
||||||
on load_bpf_programs
|
|
||||||
exec_start netbpfload
|
|
||||||
|
|
||||||
service netbpfload /system/bin/netbpfload
|
|
||||||
capabilities CHOWN SYS_ADMIN NET_ADMIN
|
|
||||||
# The following group memberships are a workaround for lack of DAC_OVERRIDE
|
|
||||||
# and allow us to open (among other things) files that we created and are
|
|
||||||
# no longer root owned (due to CHOWN) but still have group read access to
|
|
||||||
# one of the following groups. This is not perfect, but a more correct
|
|
||||||
# solution requires significantly more effort to implement.
|
|
||||||
group root graphics network_stack net_admin net_bw_acct net_bw_stats net_raw system
|
|
||||||
user root
|
|
||||||
#
|
|
||||||
# Set RLIMIT_MEMLOCK to 1GiB for netbpfload
|
|
||||||
#
|
|
||||||
# Actually only 8MiB would be needed if netbpfload ran as its own uid.
|
|
||||||
#
|
|
||||||
# However, while the rlimit is per-thread, the accounting is system wide.
|
|
||||||
# So, for example, if the graphics stack has already allocated 10MiB of
|
|
||||||
# memlock data before netbpfload even gets a chance to run, it would fail
|
|
||||||
# if its memlock rlimit is only 8MiB - since there would be none left for it.
|
|
||||||
#
|
|
||||||
# netbpfload succeeding is critical to system health, since a failure will
|
|
||||||
# cause netd crashloop and thus system server crashloop... and the only
|
|
||||||
# recovery is a full kernel reboot.
|
|
||||||
#
|
|
||||||
# We've had issues where devices would sometimes (rarely) boot into
|
|
||||||
# a crashloop because netbpfload would occasionally lose a boot time
|
|
||||||
# race against the graphics stack's boot time locked memory allocation.
|
|
||||||
#
|
|
||||||
# Thus netbpfload's memlock has to be 8MB higher then the locked memory
|
|
||||||
# consumption of the root uid anywhere else in the system...
|
|
||||||
# But we don't know what that is for all possible devices...
|
|
||||||
#
|
|
||||||
# Ideally, we'd simply grant netbpfload the IPC_LOCK capability and it
|
|
||||||
# would simply ignore it's memlock rlimit... but it turns that this
|
|
||||||
# capability is not even checked by the kernel's bpf system call.
|
|
||||||
#
|
|
||||||
# As such we simply use 1GiB as a reasonable approximation of infinity.
|
|
||||||
#
|
|
||||||
rlimit memlock 1073741824 1073741824
|
|
||||||
oneshot
|
|
||||||
#
|
|
||||||
# How to debug bootloops caused by 'netbpfload-failed'.
|
|
||||||
#
|
|
||||||
# 1. On some lower RAM devices (like wembley) you may need to first enable developer mode
|
|
||||||
# (from the Settings app UI), and change the developer option "Logger buffer sizes"
|
|
||||||
# from the default (wembley: 64kB) to the maximum (1M) per log buffer.
|
|
||||||
# Otherwise buffer will overflow before you manage to dump it and you'll get useless logs.
|
|
||||||
#
|
|
||||||
# 2. comment out 'reboot_on_failure reboot,netbpfload-failed' below
|
|
||||||
# 3. rebuild/reflash/reboot
|
|
||||||
# 4. as the device is booting up capture netbpfload logs via:
|
|
||||||
# adb logcat -s 'NetBpfLoad:*' 'NetBpfLoader:*'
|
|
||||||
#
|
|
||||||
# something like:
|
|
||||||
# $ adb reboot; sleep 1; adb wait-for-device; adb root; sleep 1; adb wait-for-device; adb logcat -s 'NetBpfLoad:*' 'NetBpfLoader:*'
|
|
||||||
# will take care of capturing logs as early as possible
|
|
||||||
#
|
|
||||||
# 5. look through the logs from the kernel's bpf verifier that netbpfload dumps out,
|
|
||||||
# it usually makes sense to search back from the end and find the particular
|
|
||||||
# bpf verifier failure that caused netbpfload to terminate early with an error code.
|
|
||||||
# This will probably be something along the lines of 'too many jumps' or
|
|
||||||
# 'cannot prove return value is 0 or 1' or 'unsupported / unknown operation / helper',
|
|
||||||
# 'invalid bpf_context access', etc.
|
|
||||||
#
|
|
||||||
reboot_on_failure reboot,netbpfload-failed
|
|
||||||
# we're not really updatable, but want to be able to load bpf programs shipped in apexes
|
|
||||||
updatable
|
|
||||||
Reference in New Issue
Block a user