Merge changes from topic "netbpfload" into main

* changes:
  netbpfload: remove netbpfload.rc
  netbpfload: remove support for limiting selinux contexts
  netbpfload: do not create /sys/fs/bpf/loader subdir
  type safety for 'bool downstream'
  type safety for 'bool updatetime'
This commit is contained in:
Maciej Żenczykowski
2023-10-10 17:23:50 +00:00
committed by Gerrit Code Review
7 changed files with 47 additions and 183 deletions

View File

@@ -91,14 +91,14 @@ struct egress_bool { bool egress; };
#define INGRESS ((struct egress_bool){ .egress = false }) #define INGRESS ((struct egress_bool){ .egress = false })
#define EGRESS ((struct egress_bool){ .egress = true }) #define EGRESS ((struct egress_bool){ .egress = true })
// constants for passing in to 'bool downstream' struct stream_bool { bool down; };
static const bool UPSTREAM = false; #define UPSTREAM ((struct stream_bool){ .down = false })
static const bool DOWNSTREAM = true; #define DOWNSTREAM ((struct stream_bool){ .down = true })
struct rawip_bool { bool rawip; }; struct rawip_bool { bool rawip; };
#define ETHER ((struct rawip_bool){ .rawip = false }) #define ETHER ((struct rawip_bool){ .rawip = false })
#define RAWIP ((struct rawip_bool){ .rawip = true }) #define RAWIP ((struct rawip_bool){ .rawip = true })
// constants for passing in to 'bool updatetime' struct updatetime_bool { bool updatetime; };
static const bool NO_UPDATETIME = false; #define NO_UPDATETIME ((struct updatetime_bool){ .updatetime = false })
static const bool UPDATETIME = true; #define UPDATETIME ((struct updatetime_bool){ .updatetime = true })

View File

@@ -126,7 +126,7 @@ DEFINE_BPF_MAP_GRW(tether_upstream6_map, HASH, TetherUpstream6Key, Tether6Value,
static inline __always_inline int do_forward6(struct __sk_buff* skb, static inline __always_inline int do_forward6(struct __sk_buff* skb,
const struct rawip_bool rawip, const struct rawip_bool rawip,
const bool downstream, const struct stream_bool stream,
const struct kver_uint kver) { const struct kver_uint kver) {
const bool is_ethernet = !rawip.rawip; const bool is_ethernet = !rawip.rawip;
@@ -188,7 +188,7 @@ static inline __always_inline int do_forward6(struct __sk_buff* skb,
TC_PUNT(NON_GLOBAL_DST); TC_PUNT(NON_GLOBAL_DST);
// In the upstream direction do not forward traffic within the same /64 subnet. // In the upstream direction do not forward traffic within the same /64 subnet.
if (!downstream && (src32 == dst32) && (ip6->saddr.s6_addr32[1] == ip6->daddr.s6_addr32[1])) if (!stream.down && (src32 == dst32) && (ip6->saddr.s6_addr32[1] == ip6->daddr.s6_addr32[1]))
TC_PUNT(LOCAL_SRC_DST); TC_PUNT(LOCAL_SRC_DST);
TetherDownstream6Key kd = { TetherDownstream6Key kd = {
@@ -200,15 +200,15 @@ static inline __always_inline int do_forward6(struct __sk_buff* skb,
.iif = skb->ifindex, .iif = skb->ifindex,
.src64 = 0, .src64 = 0,
}; };
if (is_ethernet) __builtin_memcpy(downstream ? kd.dstMac : ku.dstMac, eth->h_dest, ETH_ALEN); if (is_ethernet) __builtin_memcpy(stream.down ? kd.dstMac : ku.dstMac, eth->h_dest, ETH_ALEN);
Tether6Value* v = downstream ? bpf_tether_downstream6_map_lookup_elem(&kd) Tether6Value* v = stream.down ? bpf_tether_downstream6_map_lookup_elem(&kd)
: bpf_tether_upstream6_map_lookup_elem(&ku); : bpf_tether_upstream6_map_lookup_elem(&ku);
// If we don't find any offload information then simply let the core stack handle it... // If we don't find any offload information then simply let the core stack handle it...
if (!v) return TC_ACT_PIPE; if (!v) return TC_ACT_PIPE;
uint32_t stat_and_limit_k = downstream ? skb->ifindex : v->oif; uint32_t stat_and_limit_k = stream.down ? skb->ifindex : v->oif;
TetherStatsValue* stat_v = bpf_tether_stats_map_lookup_elem(&stat_and_limit_k); TetherStatsValue* stat_v = bpf_tether_stats_map_lookup_elem(&stat_and_limit_k);
@@ -253,7 +253,7 @@ static inline __always_inline int do_forward6(struct __sk_buff* skb,
// We do this even if TX interface is RAWIP and thus does not need an ethernet header, // We do this even if TX interface is RAWIP and thus does not need an ethernet header,
// because this is easier and the kernel will strip extraneous ethernet header. // because this is easier and the kernel will strip extraneous ethernet header.
if (bpf_skb_change_head(skb, sizeof(struct ethhdr), /*flags*/ 0)) { if (bpf_skb_change_head(skb, sizeof(struct ethhdr), /*flags*/ 0)) {
__sync_fetch_and_add(downstream ? &stat_v->rxErrors : &stat_v->txErrors, 1); __sync_fetch_and_add(stream.down ? &stat_v->rxErrors : &stat_v->txErrors, 1);
TC_PUNT(CHANGE_HEAD_FAILED); TC_PUNT(CHANGE_HEAD_FAILED);
} }
@@ -265,7 +265,7 @@ static inline __always_inline int do_forward6(struct __sk_buff* skb,
// I do not believe this can ever happen, but keep the verifier happy... // I do not believe this can ever happen, but keep the verifier happy...
if (data + sizeof(struct ethhdr) + sizeof(*ip6) > data_end) { if (data + sizeof(struct ethhdr) + sizeof(*ip6) > data_end) {
__sync_fetch_and_add(downstream ? &stat_v->rxErrors : &stat_v->txErrors, 1); __sync_fetch_and_add(stream.down ? &stat_v->rxErrors : &stat_v->txErrors, 1);
TC_DROP(TOO_SHORT); TC_DROP(TOO_SHORT);
} }
}; };
@@ -285,8 +285,8 @@ static inline __always_inline int do_forward6(struct __sk_buff* skb,
// (-ENOTSUPP) if it isn't. // (-ENOTSUPP) if it isn't.
bpf_csum_update(skb, 0xFFFF - ntohs(old_hl) + ntohs(new_hl)); bpf_csum_update(skb, 0xFFFF - ntohs(old_hl) + ntohs(new_hl));
__sync_fetch_and_add(downstream ? &stat_v->rxPackets : &stat_v->txPackets, packets); __sync_fetch_and_add(stream.down ? &stat_v->rxPackets : &stat_v->txPackets, packets);
__sync_fetch_and_add(downstream ? &stat_v->rxBytes : &stat_v->txBytes, L3_bytes); __sync_fetch_and_add(stream.down ? &stat_v->rxBytes : &stat_v->txBytes, L3_bytes);
// Overwrite any mac header with the new one // Overwrite any mac header with the new one
// For a rawip tx interface it will simply be a bunch of zeroes and later stripped. // For a rawip tx interface it will simply be a bunch of zeroes and later stripped.
@@ -361,8 +361,8 @@ DEFINE_BPF_MAP_GRW(tether_upstream4_map, HASH, Tether4Key, Tether4Value, 1024, T
static inline __always_inline int do_forward4_bottom(struct __sk_buff* skb, static inline __always_inline int do_forward4_bottom(struct __sk_buff* skb,
const int l2_header_size, void* data, const void* data_end, const int l2_header_size, void* data, const void* data_end,
struct ethhdr* eth, struct iphdr* ip, const struct rawip_bool rawip, struct ethhdr* eth, struct iphdr* ip, const struct rawip_bool rawip,
const bool downstream, const bool updatetime, const bool is_tcp, const struct stream_bool stream, const struct updatetime_bool updatetime,
const struct kver_uint kver) { const bool is_tcp, const struct kver_uint kver) {
const bool is_ethernet = !rawip.rawip; const bool is_ethernet = !rawip.rawip;
struct tcphdr* tcph = is_tcp ? (void*)(ip + 1) : NULL; struct tcphdr* tcph = is_tcp ? (void*)(ip + 1) : NULL;
struct udphdr* udph = is_tcp ? NULL : (void*)(ip + 1); struct udphdr* udph = is_tcp ? NULL : (void*)(ip + 1);
@@ -421,13 +421,13 @@ static inline __always_inline int do_forward4_bottom(struct __sk_buff* skb,
}; };
if (is_ethernet) __builtin_memcpy(k.dstMac, eth->h_dest, ETH_ALEN); if (is_ethernet) __builtin_memcpy(k.dstMac, eth->h_dest, ETH_ALEN);
Tether4Value* v = downstream ? bpf_tether_downstream4_map_lookup_elem(&k) Tether4Value* v = stream.down ? bpf_tether_downstream4_map_lookup_elem(&k)
: bpf_tether_upstream4_map_lookup_elem(&k); : bpf_tether_upstream4_map_lookup_elem(&k);
// If we don't find any offload information then simply let the core stack handle it... // If we don't find any offload information then simply let the core stack handle it...
if (!v) return TC_ACT_PIPE; if (!v) return TC_ACT_PIPE;
uint32_t stat_and_limit_k = downstream ? skb->ifindex : v->oif; uint32_t stat_and_limit_k = stream.down ? skb->ifindex : v->oif;
TetherStatsValue* stat_v = bpf_tether_stats_map_lookup_elem(&stat_and_limit_k); TetherStatsValue* stat_v = bpf_tether_stats_map_lookup_elem(&stat_and_limit_k);
@@ -472,7 +472,7 @@ static inline __always_inline int do_forward4_bottom(struct __sk_buff* skb,
// We do this even if TX interface is RAWIP and thus does not need an ethernet header, // We do this even if TX interface is RAWIP and thus does not need an ethernet header,
// because this is easier and the kernel will strip extraneous ethernet header. // because this is easier and the kernel will strip extraneous ethernet header.
if (bpf_skb_change_head(skb, sizeof(struct ethhdr), /*flags*/ 0)) { if (bpf_skb_change_head(skb, sizeof(struct ethhdr), /*flags*/ 0)) {
__sync_fetch_and_add(downstream ? &stat_v->rxErrors : &stat_v->txErrors, 1); __sync_fetch_and_add(stream.down ? &stat_v->rxErrors : &stat_v->txErrors, 1);
TC_PUNT(CHANGE_HEAD_FAILED); TC_PUNT(CHANGE_HEAD_FAILED);
} }
@@ -486,7 +486,7 @@ static inline __always_inline int do_forward4_bottom(struct __sk_buff* skb,
// I do not believe this can ever happen, but keep the verifier happy... // I do not believe this can ever happen, but keep the verifier happy...
if (data + sizeof(struct ethhdr) + sizeof(*ip) + (is_tcp ? sizeof(*tcph) : sizeof(*udph)) > data_end) { if (data + sizeof(struct ethhdr) + sizeof(*ip) + (is_tcp ? sizeof(*tcph) : sizeof(*udph)) > data_end) {
__sync_fetch_and_add(downstream ? &stat_v->rxErrors : &stat_v->txErrors, 1); __sync_fetch_and_add(stream.down ? &stat_v->rxErrors : &stat_v->txErrors, 1);
TC_DROP(TOO_SHORT); TC_DROP(TOO_SHORT);
} }
}; };
@@ -538,10 +538,10 @@ static inline __always_inline int do_forward4_bottom(struct __sk_buff* skb,
// This requires the bpf_ktime_get_boot_ns() helper which was added in 5.8, // This requires the bpf_ktime_get_boot_ns() helper which was added in 5.8,
// and backported to all Android Common Kernel 4.14+ trees. // and backported to all Android Common Kernel 4.14+ trees.
if (updatetime) v->last_used = bpf_ktime_get_boot_ns(); if (updatetime.updatetime) v->last_used = bpf_ktime_get_boot_ns();
__sync_fetch_and_add(downstream ? &stat_v->rxPackets : &stat_v->txPackets, packets); __sync_fetch_and_add(stream.down ? &stat_v->rxPackets : &stat_v->txPackets, packets);
__sync_fetch_and_add(downstream ? &stat_v->rxBytes : &stat_v->txBytes, L3_bytes); __sync_fetch_and_add(stream.down ? &stat_v->rxBytes : &stat_v->txBytes, L3_bytes);
// Redirect to forwarded interface. // Redirect to forwarded interface.
// //
@@ -554,8 +554,8 @@ static inline __always_inline int do_forward4_bottom(struct __sk_buff* skb,
static inline __always_inline int do_forward4(struct __sk_buff* skb, static inline __always_inline int do_forward4(struct __sk_buff* skb,
const struct rawip_bool rawip, const struct rawip_bool rawip,
const bool downstream, const struct stream_bool stream,
const bool updatetime, const struct updatetime_bool updatetime,
const struct kver_uint kver) { const struct kver_uint kver) {
const bool is_ethernet = !rawip.rawip; const bool is_ethernet = !rawip.rawip;
@@ -616,16 +616,16 @@ static inline __always_inline int do_forward4(struct __sk_buff* skb,
// in such a situation we can only support TCP. This also has the added nice benefit of // in such a situation we can only support TCP. This also has the added nice benefit of
// using a separate error counter, and thus making it obvious which version of the program // using a separate error counter, and thus making it obvious which version of the program
// is loaded. // is loaded.
if (!updatetime && ip->protocol != IPPROTO_TCP) TC_PUNT(NON_TCP); if (!updatetime.updatetime && ip->protocol != IPPROTO_TCP) TC_PUNT(NON_TCP);
// We do not support offloading anything besides IPv4 TCP and UDP, due to need for NAT, // We do not support offloading anything besides IPv4 TCP and UDP, due to need for NAT,
// but no need to check this if !updatetime due to check immediately above. // but no need to check this if !updatetime due to check immediately above.
if (updatetime && (ip->protocol != IPPROTO_TCP) && (ip->protocol != IPPROTO_UDP)) if (updatetime.updatetime && (ip->protocol != IPPROTO_TCP) && (ip->protocol != IPPROTO_UDP))
TC_PUNT(NON_TCP_UDP); TC_PUNT(NON_TCP_UDP);
// We want to make sure that the compiler will, in the !updatetime case, entirely optimize // We want to make sure that the compiler will, in the !updatetime case, entirely optimize
// out all the non-tcp logic. Also note that at this point is_udp === !is_tcp. // out all the non-tcp logic. Also note that at this point is_udp === !is_tcp.
const bool is_tcp = !updatetime || (ip->protocol == IPPROTO_TCP); const bool is_tcp = !updatetime.updatetime || (ip->protocol == IPPROTO_TCP);
// This is a bit of a hack to make things easier on the bpf verifier. // This is a bit of a hack to make things easier on the bpf verifier.
// (In particular I believe the Linux 4.14 kernel's verifier can get confused later on about // (In particular I believe the Linux 4.14 kernel's verifier can get confused later on about
@@ -646,10 +646,10 @@ static inline __always_inline int do_forward4(struct __sk_buff* skb,
// if the underlying requisite kernel support (bpf_ktime_get_boot_ns) was backported. // if the underlying requisite kernel support (bpf_ktime_get_boot_ns) was backported.
if (is_tcp) { if (is_tcp) {
return do_forward4_bottom(skb, l2_header_size, data, data_end, eth, ip, return do_forward4_bottom(skb, l2_header_size, data, data_end, eth, ip,
rawip, downstream, updatetime, /* is_tcp */ true, kver); rawip, stream, updatetime, /* is_tcp */ true, kver);
} else { } else {
return do_forward4_bottom(skb, l2_header_size, data, data_end, eth, ip, return do_forward4_bottom(skb, l2_header_size, data, data_end, eth, ip,
rawip, downstream, updatetime, /* is_tcp */ false, kver); rawip, stream, updatetime, /* is_tcp */ false, kver);
} }
} }
@@ -808,16 +808,17 @@ DEFINE_BPF_PROG_KVER_RANGE("schedcls/tether_upstream4_ether$stub", TETHERING_UID
DEFINE_BPF_MAP_GRW(tether_dev_map, DEVMAP_HASH, uint32_t, uint32_t, 64, TETHERING_GID) DEFINE_BPF_MAP_GRW(tether_dev_map, DEVMAP_HASH, uint32_t, uint32_t, 64, TETHERING_GID)
static inline __always_inline int do_xdp_forward6(struct xdp_md *ctx, const struct rawip_bool rawip, static inline __always_inline int do_xdp_forward6(struct xdp_md *ctx, const struct rawip_bool rawip,
const bool downstream) { const struct stream_bool stream) {
return XDP_PASS; return XDP_PASS;
} }
static inline __always_inline int do_xdp_forward4(struct xdp_md *ctx, const struct rawip_bool rawip, static inline __always_inline int do_xdp_forward4(struct xdp_md *ctx, const struct rawip_bool rawip,
const bool downstream) { const struct stream_bool stream) {
return XDP_PASS; return XDP_PASS;
} }
static inline __always_inline int do_xdp_forward_ether(struct xdp_md *ctx, const bool downstream) { static inline __always_inline int do_xdp_forward_ether(struct xdp_md *ctx,
const struct stream_bool stream) {
const void* data = (void*)(long)ctx->data; const void* data = (void*)(long)ctx->data;
const void* data_end = (void*)(long)ctx->data_end; const void* data_end = (void*)(long)ctx->data_end;
const struct ethhdr* eth = data; const struct ethhdr* eth = data;
@@ -826,15 +827,16 @@ static inline __always_inline int do_xdp_forward_ether(struct xdp_md *ctx, const
if ((void*)(eth + 1) > data_end) return XDP_PASS; if ((void*)(eth + 1) > data_end) return XDP_PASS;
if (eth->h_proto == htons(ETH_P_IPV6)) if (eth->h_proto == htons(ETH_P_IPV6))
return do_xdp_forward6(ctx, ETHER, downstream); return do_xdp_forward6(ctx, ETHER, stream);
if (eth->h_proto == htons(ETH_P_IP)) if (eth->h_proto == htons(ETH_P_IP))
return do_xdp_forward4(ctx, ETHER, downstream); return do_xdp_forward4(ctx, ETHER, stream);
// Anything else we don't know how to handle... // Anything else we don't know how to handle...
return XDP_PASS; return XDP_PASS;
} }
static inline __always_inline int do_xdp_forward_rawip(struct xdp_md *ctx, const bool downstream) { static inline __always_inline int do_xdp_forward_rawip(struct xdp_md *ctx,
const struct stream_bool stream) {
const void* data = (void*)(long)ctx->data; const void* data = (void*)(long)ctx->data;
const void* data_end = (void*)(long)ctx->data_end; const void* data_end = (void*)(long)ctx->data_end;
@@ -842,8 +844,8 @@ static inline __always_inline int do_xdp_forward_rawip(struct xdp_md *ctx, const
if (data_end - data < 1) return XDP_PASS; if (data_end - data < 1) return XDP_PASS;
const uint8_t v = (*(uint8_t*)data) >> 4; const uint8_t v = (*(uint8_t*)data) >> 4;
if (v == 6) return do_xdp_forward6(ctx, RAWIP, downstream); if (v == 6) return do_xdp_forward6(ctx, RAWIP, stream);
if (v == 4) return do_xdp_forward4(ctx, RAWIP, downstream); if (v == 4) return do_xdp_forward4(ctx, RAWIP, stream);
// Anything else we don't know how to handle... // Anything else we don't know how to handle...
return XDP_PASS; return XDP_PASS;

View File

@@ -36,6 +36,4 @@ cc_binary {
"loader.cpp", "loader.cpp",
"NetBpfLoad.cpp", "NetBpfLoad.cpp",
], ],
init_rc: ["netbpfload.rc"],
} }

View File

@@ -65,46 +65,34 @@ bool exists(const char* const path) {
abort(); // can only hit this if permissions (likely selinux) are screwed up abort(); // can only hit this if permissions (likely selinux) are screwed up
} }
constexpr unsigned long long kTetheringApexDomainBitmask =
domainToBitmask(domain::tethering) |
domainToBitmask(domain::net_private) |
domainToBitmask(domain::net_shared) |
domainToBitmask(domain::netd_readonly) |
domainToBitmask(domain::netd_shared);
const android::bpf::Location locations[] = { const android::bpf::Location locations[] = {
// S+ Tethering mainline module (network_stack): tether offload // S+ Tethering mainline module (network_stack): tether offload
{ {
.dir = "/apex/com.android.tethering/etc/bpf/", .dir = "/apex/com.android.tethering/etc/bpf/",
.prefix = "tethering/", .prefix = "tethering/",
.allowedDomainBitmask = kTetheringApexDomainBitmask,
}, },
// T+ Tethering mainline module (shared with netd & system server) // T+ Tethering mainline module (shared with netd & system server)
// netutils_wrapper (for iptables xt_bpf) has access to programs // netutils_wrapper (for iptables xt_bpf) has access to programs
{ {
.dir = "/apex/com.android.tethering/etc/bpf/netd_shared/", .dir = "/apex/com.android.tethering/etc/bpf/netd_shared/",
.prefix = "netd_shared/", .prefix = "netd_shared/",
.allowedDomainBitmask = kTetheringApexDomainBitmask,
}, },
// T+ Tethering mainline module (shared with netd & system server) // T+ Tethering mainline module (shared with netd & system server)
// netutils_wrapper has no access, netd has read only access // netutils_wrapper has no access, netd has read only access
{ {
.dir = "/apex/com.android.tethering/etc/bpf/netd_readonly/", .dir = "/apex/com.android.tethering/etc/bpf/netd_readonly/",
.prefix = "netd_readonly/", .prefix = "netd_readonly/",
.allowedDomainBitmask = kTetheringApexDomainBitmask,
}, },
// T+ Tethering mainline module (shared with system server) // T+ Tethering mainline module (shared with system server)
{ {
.dir = "/apex/com.android.tethering/etc/bpf/net_shared/", .dir = "/apex/com.android.tethering/etc/bpf/net_shared/",
.prefix = "net_shared/", .prefix = "net_shared/",
.allowedDomainBitmask = kTetheringApexDomainBitmask,
}, },
// T+ Tethering mainline module (not shared, just network_stack) // T+ Tethering mainline module (not shared, just network_stack)
{ {
.dir = "/apex/com.android.tethering/etc/bpf/net_private/", .dir = "/apex/com.android.tethering/etc/bpf/net_private/",
.prefix = "net_private/", .prefix = "net_private/",
.allowedDomainBitmask = kTetheringApexDomainBitmask,
}, },
}; };
@@ -247,13 +235,6 @@ int main(int argc, char** argv) {
if (createSysFsBpfSubDir(location.prefix)) return 1; if (createSysFsBpfSubDir(location.prefix)) return 1;
} }
// Note: there's no actual src dir for fs_bpf_loader .o's,
// so it is not listed in 'locations[].prefix'.
// This is because this is primarily meant for triggering genfscon rules,
// and as such this will likely always be the case.
// Thus we need to manually create the /sys/fs/bpf/loader subdirectory.
if (createSysFsBpfSubDir("loader")) return 1;
// Load all ELF objects, create programs and maps, and pin them // Load all ELF objects, create programs and maps, and pin them
for (const auto& location : locations) { for (const auto& location : locations) {
if (loadAllElfObjects(location) != 0) { if (loadAllElfObjects(location) != 0) {

View File

@@ -621,8 +621,7 @@ static bool mapMatchesExpectations(const unique_fd& fd, const string& mapName,
} }
static int createMaps(const char* elfPath, ifstream& elfFile, vector<unique_fd>& mapFds, static int createMaps(const char* elfPath, ifstream& elfFile, vector<unique_fd>& mapFds,
const char* prefix, const unsigned long long allowedDomainBitmask, const char* prefix, const size_t sizeOfBpfMapDef) {
const size_t sizeOfBpfMapDef) {
int ret; int ret;
vector<char> mdData; vector<char> mdData;
vector<struct bpf_map_def> md; vector<struct bpf_map_def> md;
@@ -733,11 +732,6 @@ static int createMaps(const char* elfPath, ifstream& elfFile, vector<unique_fd>&
domain selinux_context = getDomainFromSelinuxContext(md[i].selinux_context); domain selinux_context = getDomainFromSelinuxContext(md[i].selinux_context);
if (specified(selinux_context)) { if (specified(selinux_context)) {
if (!inDomainBitmask(selinux_context, allowedDomainBitmask)) {
ALOGE("map %s has invalid selinux_context of %d (allowed bitmask 0x%llx)",
mapNames[i].c_str(), selinux_context, allowedDomainBitmask);
return -EINVAL;
}
ALOGI("map %s selinux_context [%-32s] -> %d -> '%s' (%s)", mapNames[i].c_str(), ALOGI("map %s selinux_context [%-32s] -> %d -> '%s' (%s)", mapNames[i].c_str(),
md[i].selinux_context, selinux_context, lookupSelinuxContext(selinux_context), md[i].selinux_context, selinux_context, lookupSelinuxContext(selinux_context),
lookupPinSubdir(selinux_context)); lookupPinSubdir(selinux_context));
@@ -746,11 +740,6 @@ static int createMaps(const char* elfPath, ifstream& elfFile, vector<unique_fd>&
domain pin_subdir = getDomainFromPinSubdir(md[i].pin_subdir); domain pin_subdir = getDomainFromPinSubdir(md[i].pin_subdir);
if (unrecognized(pin_subdir)) return -ENOTDIR; if (unrecognized(pin_subdir)) return -ENOTDIR;
if (specified(pin_subdir)) { if (specified(pin_subdir)) {
if (!inDomainBitmask(pin_subdir, allowedDomainBitmask)) {
ALOGE("map %s has invalid pin_subdir of %d (allowed bitmask 0x%llx)",
mapNames[i].c_str(), pin_subdir, allowedDomainBitmask);
return -EINVAL;
}
ALOGI("map %s pin_subdir [%-32s] -> %d -> '%s'", mapNames[i].c_str(), md[i].pin_subdir, ALOGI("map %s pin_subdir [%-32s] -> %d -> '%s'", mapNames[i].c_str(), md[i].pin_subdir,
pin_subdir, lookupPinSubdir(pin_subdir)); pin_subdir, lookupPinSubdir(pin_subdir));
} }
@@ -921,7 +910,7 @@ static void applyMapRelo(ifstream& elfFile, vector<unique_fd> &mapFds, vector<co
} }
static int loadCodeSections(const char* elfPath, vector<codeSection>& cs, const string& license, static int loadCodeSections(const char* elfPath, vector<codeSection>& cs, const string& license,
const char* prefix, const unsigned long long allowedDomainBitmask) { const char* prefix) {
unsigned kvers = kernelVersion(); unsigned kvers = kernelVersion();
if (!kvers) { if (!kvers) {
@@ -980,22 +969,12 @@ static int loadCodeSections(const char* elfPath, vector<codeSection>& cs, const
if (unrecognized(pin_subdir)) return -ENOTDIR; if (unrecognized(pin_subdir)) return -ENOTDIR;
if (specified(selinux_context)) { if (specified(selinux_context)) {
if (!inDomainBitmask(selinux_context, allowedDomainBitmask)) {
ALOGE("prog %s has invalid selinux_context of %d (allowed bitmask 0x%llx)",
name.c_str(), selinux_context, allowedDomainBitmask);
return -EINVAL;
}
ALOGI("prog %s selinux_context [%-32s] -> %d -> '%s' (%s)", name.c_str(), ALOGI("prog %s selinux_context [%-32s] -> %d -> '%s' (%s)", name.c_str(),
cs[i].prog_def->selinux_context, selinux_context, cs[i].prog_def->selinux_context, selinux_context,
lookupSelinuxContext(selinux_context), lookupPinSubdir(selinux_context)); lookupSelinuxContext(selinux_context), lookupPinSubdir(selinux_context));
} }
if (specified(pin_subdir)) { if (specified(pin_subdir)) {
if (!inDomainBitmask(pin_subdir, allowedDomainBitmask)) {
ALOGE("prog %s has invalid pin_subdir of %d (allowed bitmask 0x%llx)", name.c_str(),
pin_subdir, allowedDomainBitmask);
return -EINVAL;
}
ALOGI("prog %s pin_subdir [%-32s] -> %d -> '%s'", name.c_str(), ALOGI("prog %s pin_subdir [%-32s] -> %d -> '%s'", name.c_str(),
cs[i].prog_def->pin_subdir, pin_subdir, lookupPinSubdir(pin_subdir)); cs[i].prog_def->pin_subdir, pin_subdir, lookupPinSubdir(pin_subdir));
} }
@@ -1185,8 +1164,7 @@ int loadProg(const char* elfPath, bool* isCritical, const Location& location) {
/* Just for future debugging */ /* Just for future debugging */
if (0) dumpAllCs(cs); if (0) dumpAllCs(cs);
ret = createMaps(elfPath, elfFile, mapFds, location.prefix, location.allowedDomainBitmask, ret = createMaps(elfPath, elfFile, mapFds, location.prefix, sizeOfBpfMapDef);
sizeOfBpfMapDef);
if (ret) { if (ret) {
ALOGE("Failed to create maps: (ret=%d) in %s", ret, elfPath); ALOGE("Failed to create maps: (ret=%d) in %s", ret, elfPath);
return ret; return ret;
@@ -1197,8 +1175,7 @@ int loadProg(const char* elfPath, bool* isCritical, const Location& location) {
applyMapRelo(elfFile, mapFds, cs); applyMapRelo(elfFile, mapFds, cs);
ret = loadCodeSections(elfPath, cs, string(license.data()), location.prefix, ret = loadCodeSections(elfPath, cs, string(license.data()), location.prefix);
location.allowedDomainBitmask);
if (ret) ALOGE("Failed to load programs, loadCodeSections ret=%d", ret); if (ret) ALOGE("Failed to load programs, loadCodeSections ret=%d", ret);
return ret; return ret;

View File

@@ -64,18 +64,9 @@ static constexpr bool specified(domain d) {
return d != domain::unspecified; return d != domain::unspecified;
} }
static constexpr unsigned long long domainToBitmask(domain d) {
return specified(d) ? 1uLL << (static_cast<int>(d) - 1) : 0;
}
static constexpr bool inDomainBitmask(domain d, unsigned long long v) {
return domainToBitmask(d) & v;
}
struct Location { struct Location {
const char* const dir = ""; const char* const dir = "";
const char* const prefix = ""; const char* const prefix = "";
unsigned long long allowedDomainBitmask = 0;
}; };
// BPF loader implementation. Loads an eBPF ELF object // BPF loader implementation. Loads an eBPF ELF object

View File

@@ -1,85 +0,0 @@
# zygote-start is what officially starts netd (see //system/core/rootdir/init.rc)
# However, on some hardware it's started from post-fs-data as well, which is just
# a tad earlier. There's no benefit to that though, since on 4.9+ P+ devices netd
# will just block until bpfloader finishes and sets the bpf.progs_loaded property.
#
# It is important that we start netbpfload after:
# - /sys/fs/bpf is already mounted,
# - apex (incl. rollback) is initialized (so that in the future we can load bpf
# programs shipped as part of apex mainline modules)
# - logd is ready for us to log stuff
#
# At the same time we want to be as early as possible to reduce races and thus
# failures (before memory is fragmented, and cpu is busy running tons of other
# stuff) and we absolutely want to be before netd and the system boot slot is
# considered to have booted successfully.
#
on load_bpf_programs
exec_start netbpfload
service netbpfload /system/bin/netbpfload
capabilities CHOWN SYS_ADMIN NET_ADMIN
# The following group memberships are a workaround for lack of DAC_OVERRIDE
# and allow us to open (among other things) files that we created and are
# no longer root owned (due to CHOWN) but still have group read access to
# one of the following groups. This is not perfect, but a more correct
# solution requires significantly more effort to implement.
group root graphics network_stack net_admin net_bw_acct net_bw_stats net_raw system
user root
#
# Set RLIMIT_MEMLOCK to 1GiB for netbpfload
#
# Actually only 8MiB would be needed if netbpfload ran as its own uid.
#
# However, while the rlimit is per-thread, the accounting is system wide.
# So, for example, if the graphics stack has already allocated 10MiB of
# memlock data before netbpfload even gets a chance to run, it would fail
# if its memlock rlimit is only 8MiB - since there would be none left for it.
#
# netbpfload succeeding is critical to system health, since a failure will
# cause netd crashloop and thus system server crashloop... and the only
# recovery is a full kernel reboot.
#
# We've had issues where devices would sometimes (rarely) boot into
# a crashloop because netbpfload would occasionally lose a boot time
# race against the graphics stack's boot time locked memory allocation.
#
# Thus netbpfload's memlock has to be 8MB higher then the locked memory
# consumption of the root uid anywhere else in the system...
# But we don't know what that is for all possible devices...
#
# Ideally, we'd simply grant netbpfload the IPC_LOCK capability and it
# would simply ignore it's memlock rlimit... but it turns that this
# capability is not even checked by the kernel's bpf system call.
#
# As such we simply use 1GiB as a reasonable approximation of infinity.
#
rlimit memlock 1073741824 1073741824
oneshot
#
# How to debug bootloops caused by 'netbpfload-failed'.
#
# 1. On some lower RAM devices (like wembley) you may need to first enable developer mode
# (from the Settings app UI), and change the developer option "Logger buffer sizes"
# from the default (wembley: 64kB) to the maximum (1M) per log buffer.
# Otherwise buffer will overflow before you manage to dump it and you'll get useless logs.
#
# 2. comment out 'reboot_on_failure reboot,netbpfload-failed' below
# 3. rebuild/reflash/reboot
# 4. as the device is booting up capture netbpfload logs via:
# adb logcat -s 'NetBpfLoad:*' 'NetBpfLoader:*'
#
# something like:
# $ adb reboot; sleep 1; adb wait-for-device; adb root; sleep 1; adb wait-for-device; adb logcat -s 'NetBpfLoad:*' 'NetBpfLoader:*'
# will take care of capturing logs as early as possible
#
# 5. look through the logs from the kernel's bpf verifier that netbpfload dumps out,
# it usually makes sense to search back from the end and find the particular
# bpf verifier failure that caused netbpfload to terminate early with an error code.
# This will probably be something along the lines of 'too many jumps' or
# 'cannot prove return value is 0 or 1' or 'unsupported / unknown operation / helper',
# 'invalid bpf_context access', etc.
#
reboot_on_failure reboot,netbpfload-failed
# we're not really updatable, but want to be able to load bpf programs shipped in apexes
updatable