Merge changes from topic "netbpfload" into main

* changes: netbpfload: remove netbpfload.rc netbpfload: remove support for limiting selinux contexts netbpfload: do not create /sys/fs/bpf/loader subdir type safety for 'bool downstream' type safety for 'bool updatetime'
2023-10-10 17:23:50 +00:00
parent f70919fbec 1d2c93d568
commit 9c01dd494d
7 changed files with 47 additions and 183 deletions
--- a/bpf_progs/bpf_net_helpers.h
+++ b/bpf_progs/bpf_net_helpers.h
@@ -91,14 +91,14 @@ struct egress_bool { bool egress; };
 #define INGRESS ((struct egress_bool){ .egress = false })
 #define EGRESS ((struct egress_bool){ .egress = true })
-// constants for passing in to 'bool downstream'
+struct stream_bool { bool down; };
-static const bool UPSTREAM = false;
+#define UPSTREAM ((struct stream_bool){ .down = false })
-static const bool DOWNSTREAM = true;
+#define DOWNSTREAM ((struct stream_bool){ .down = true })
 struct rawip_bool { bool rawip; };
 #define ETHER ((struct rawip_bool){ .rawip = false })
 #define RAWIP ((struct rawip_bool){ .rawip = true })
-// constants for passing in to 'bool updatetime'
+struct updatetime_bool { bool updatetime; };
-static const bool NO_UPDATETIME = false;
+#define NO_UPDATETIME ((struct updatetime_bool){ .updatetime = false })
-static const bool UPDATETIME = true;
+#define UPDATETIME ((struct updatetime_bool){ .updatetime = true })
--- a/bpf_progs/offload.c
+++ b/bpf_progs/offload.c
@@ -126,7 +126,7 @@ DEFINE_BPF_MAP_GRW(tether_upstream6_map, HASH, TetherUpstream6Key, Tether6Value,
 static inline __always_inline int do_forward6(struct __sk_buff* skb,
                                              const struct rawip_bool rawip,
-                                              const bool downstream,
+                                              const struct stream_bool stream,
                                              const struct kver_uint kver) {
    const bool is_ethernet = !rawip.rawip;
@@ -188,7 +188,7 @@ static inline __always_inline int do_forward6(struct __sk_buff* skb,
        TC_PUNT(NON_GLOBAL_DST);
    // In the upstream direction do not forward traffic within the same /64 subnet.
-    if (!downstream && (src32 == dst32) && (ip6->saddr.s6_addr32[1] == ip6->daddr.s6_addr32[1]))
+    if (!stream.down && (src32 == dst32) && (ip6->saddr.s6_addr32[1] == ip6->daddr.s6_addr32[1]))
        TC_PUNT(LOCAL_SRC_DST);
    TetherDownstream6Key kd = {
@@ -200,15 +200,15 @@ static inline __always_inline int do_forward6(struct __sk_buff* skb,
            .iif = skb->ifindex,
            .src64 = 0,
    };
-    if (is_ethernet) __builtin_memcpy(downstream ? kd.dstMac : ku.dstMac, eth->h_dest, ETH_ALEN);
+    if (is_ethernet) __builtin_memcpy(stream.down ? kd.dstMac : ku.dstMac, eth->h_dest, ETH_ALEN);
-    Tether6Value* v = downstream ? bpf_tether_downstream6_map_lookup_elem(&kd)
+    Tether6Value* v = stream.down ? bpf_tether_downstream6_map_lookup_elem(&kd)
-                                 : bpf_tether_upstream6_map_lookup_elem(&ku);
+                                  : bpf_tether_upstream6_map_lookup_elem(&ku);
    // If we don't find any offload information then simply let the core stack handle it...
    if (!v) return TC_ACT_PIPE;
-    uint32_t stat_and_limit_k = downstream ? skb->ifindex : v->oif;
+    uint32_t stat_and_limit_k = stream.down ? skb->ifindex : v->oif;
    TetherStatsValue* stat_v = bpf_tether_stats_map_lookup_elem(&stat_and_limit_k);
@@ -253,7 +253,7 @@ static inline __always_inline int do_forward6(struct __sk_buff* skb,
        // We do this even if TX interface is RAWIP and thus does not need an ethernet header,
        // because this is easier and the kernel will strip extraneous ethernet header.
        if (bpf_skb_change_head(skb, sizeof(struct ethhdr), /*flags*/ 0)) {
-            __sync_fetch_and_add(downstream ? &stat_v->rxErrors : &stat_v->txErrors, 1);
+            __sync_fetch_and_add(stream.down ? &stat_v->rxErrors : &stat_v->txErrors, 1);
            TC_PUNT(CHANGE_HEAD_FAILED);
        }
@@ -265,7 +265,7 @@ static inline __always_inline int do_forward6(struct __sk_buff* skb,
        // I do not believe this can ever happen, but keep the verifier happy...
        if (data + sizeof(struct ethhdr) + sizeof(*ip6) > data_end) {
-            __sync_fetch_and_add(downstream ? &stat_v->rxErrors : &stat_v->txErrors, 1);
+            __sync_fetch_and_add(stream.down ? &stat_v->rxErrors : &stat_v->txErrors, 1);
            TC_DROP(TOO_SHORT);
        }
    };
@@ -285,8 +285,8 @@ static inline __always_inline int do_forward6(struct __sk_buff* skb,
    // (-ENOTSUPP) if it isn't.
    bpf_csum_update(skb, 0xFFFF - ntohs(old_hl) + ntohs(new_hl));
-    __sync_fetch_and_add(downstream ? &stat_v->rxPackets : &stat_v->txPackets, packets);
+    __sync_fetch_and_add(stream.down ? &stat_v->rxPackets : &stat_v->txPackets, packets);
-    __sync_fetch_and_add(downstream ? &stat_v->rxBytes : &stat_v->txBytes, L3_bytes);
+    __sync_fetch_and_add(stream.down ? &stat_v->rxBytes : &stat_v->txBytes, L3_bytes);
    // Overwrite any mac header with the new one
    // For a rawip tx interface it will simply be a bunch of zeroes and later stripped.
@@ -361,8 +361,8 @@ DEFINE_BPF_MAP_GRW(tether_upstream4_map, HASH, Tether4Key, Tether4Value, 1024, T
 static inline __always_inline int do_forward4_bottom(struct __sk_buff* skb,
        const int l2_header_size, void* data, const void* data_end,
        struct ethhdr* eth, struct iphdr* ip, const struct rawip_bool rawip,
-        const bool downstream, const bool updatetime, const bool is_tcp,
+        const struct stream_bool stream, const struct updatetime_bool updatetime,
-        const struct kver_uint kver) {
+        const bool is_tcp, const struct kver_uint kver) {
    const bool is_ethernet = !rawip.rawip;
    struct tcphdr* tcph = is_tcp ? (void*)(ip + 1) : NULL;
    struct udphdr* udph = is_tcp ? NULL : (void*)(ip + 1);
@@ -421,13 +421,13 @@ static inline __always_inline int do_forward4_bottom(struct __sk_buff* skb,
    };
    if (is_ethernet) __builtin_memcpy(k.dstMac, eth->h_dest, ETH_ALEN);
-    Tether4Value* v = downstream ? bpf_tether_downstream4_map_lookup_elem(&k)
+    Tether4Value* v = stream.down ? bpf_tether_downstream4_map_lookup_elem(&k)
-                                 : bpf_tether_upstream4_map_lookup_elem(&k);
+                                  : bpf_tether_upstream4_map_lookup_elem(&k);
    // If we don't find any offload information then simply let the core stack handle it...
    if (!v) return TC_ACT_PIPE;
-    uint32_t stat_and_limit_k = downstream ? skb->ifindex : v->oif;
+    uint32_t stat_and_limit_k = stream.down ? skb->ifindex : v->oif;
    TetherStatsValue* stat_v = bpf_tether_stats_map_lookup_elem(&stat_and_limit_k);
@@ -472,7 +472,7 @@ static inline __always_inline int do_forward4_bottom(struct __sk_buff* skb,
        // We do this even if TX interface is RAWIP and thus does not need an ethernet header,
        // because this is easier and the kernel will strip extraneous ethernet header.
        if (bpf_skb_change_head(skb, sizeof(struct ethhdr), /*flags*/ 0)) {
-            __sync_fetch_and_add(downstream ? &stat_v->rxErrors : &stat_v->txErrors, 1);
+            __sync_fetch_and_add(stream.down ? &stat_v->rxErrors : &stat_v->txErrors, 1);
            TC_PUNT(CHANGE_HEAD_FAILED);
        }
@@ -486,7 +486,7 @@ static inline __always_inline int do_forward4_bottom(struct __sk_buff* skb,
        // I do not believe this can ever happen, but keep the verifier happy...
        if (data + sizeof(struct ethhdr) + sizeof(*ip) + (is_tcp ? sizeof(*tcph) : sizeof(*udph)) > data_end) {
-            __sync_fetch_and_add(downstream ? &stat_v->rxErrors : &stat_v->txErrors, 1);
+            __sync_fetch_and_add(stream.down ? &stat_v->rxErrors : &stat_v->txErrors, 1);
            TC_DROP(TOO_SHORT);
        }
    };
@@ -538,10 +538,10 @@ static inline __always_inline int do_forward4_bottom(struct __sk_buff* skb,
    // This requires the bpf_ktime_get_boot_ns() helper which was added in 5.8,
    // and backported to all Android Common Kernel 4.14+ trees.
-    if (updatetime) v->last_used = bpf_ktime_get_boot_ns();
+    if (updatetime.updatetime) v->last_used = bpf_ktime_get_boot_ns();
-    __sync_fetch_and_add(downstream ? &stat_v->rxPackets : &stat_v->txPackets, packets);
+    __sync_fetch_and_add(stream.down ? &stat_v->rxPackets : &stat_v->txPackets, packets);
-    __sync_fetch_and_add(downstream ? &stat_v->rxBytes : &stat_v->txBytes, L3_bytes);
+    __sync_fetch_and_add(stream.down ? &stat_v->rxBytes : &stat_v->txBytes, L3_bytes);
    // Redirect to forwarded interface.
    //
@@ -554,8 +554,8 @@ static inline __always_inline int do_forward4_bottom(struct __sk_buff* skb,
 static inline __always_inline int do_forward4(struct __sk_buff* skb,
                                              const struct rawip_bool rawip,
-                                              const bool downstream,
+                                              const struct stream_bool stream,
-                                              const bool updatetime,
+                                              const struct updatetime_bool updatetime,
                                              const struct kver_uint kver) {
    const bool is_ethernet = !rawip.rawip;
@@ -616,16 +616,16 @@ static inline __always_inline int do_forward4(struct __sk_buff* skb,
    // in such a situation we can only support TCP.  This also has the added nice benefit of
    // using a separate error counter, and thus making it obvious which version of the program
    // is loaded.
-    if (!updatetime && ip->protocol != IPPROTO_TCP) TC_PUNT(NON_TCP);
+    if (!updatetime.updatetime && ip->protocol != IPPROTO_TCP) TC_PUNT(NON_TCP);
    // We do not support offloading anything besides IPv4 TCP and UDP, due to need for NAT,
    // but no need to check this if !updatetime due to check immediately above.
-    if (updatetime && (ip->protocol != IPPROTO_TCP) && (ip->protocol != IPPROTO_UDP))
+    if (updatetime.updatetime && (ip->protocol != IPPROTO_TCP) && (ip->protocol != IPPROTO_UDP))
        TC_PUNT(NON_TCP_UDP);
    // We want to make sure that the compiler will, in the !updatetime case, entirely optimize
    // out all the non-tcp logic.  Also note that at this point is_udp === !is_tcp.
-    const bool is_tcp = !updatetime || (ip->protocol == IPPROTO_TCP);
+    const bool is_tcp = !updatetime.updatetime || (ip->protocol == IPPROTO_TCP);
    // This is a bit of a hack to make things easier on the bpf verifier.
    // (In particular I believe the Linux 4.14 kernel's verifier can get confused later on about
@@ -646,10 +646,10 @@ static inline __always_inline int do_forward4(struct __sk_buff* skb,
    // if the underlying requisite kernel support (bpf_ktime_get_boot_ns) was backported.
    if (is_tcp) {
      return do_forward4_bottom(skb, l2_header_size, data, data_end, eth, ip,
-                                rawip, downstream, updatetime, /* is_tcp */ true, kver);
+                                rawip, stream, updatetime, /* is_tcp */ true, kver);
    } else {
      return do_forward4_bottom(skb, l2_header_size, data, data_end, eth, ip,
-                                rawip, downstream, updatetime, /* is_tcp */ false, kver);
+                                rawip, stream, updatetime, /* is_tcp */ false, kver);
    }
 }
@@ -808,16 +808,17 @@ DEFINE_BPF_PROG_KVER_RANGE("schedcls/tether_upstream4_ether$stub", TETHERING_UID
 DEFINE_BPF_MAP_GRW(tether_dev_map, DEVMAP_HASH, uint32_t, uint32_t, 64, TETHERING_GID)
 static inline __always_inline int do_xdp_forward6(struct xdp_md *ctx, const struct rawip_bool rawip,
-        const bool downstream) {
+        const struct stream_bool stream) {
    return XDP_PASS;
 }
 static inline __always_inline int do_xdp_forward4(struct xdp_md *ctx, const struct rawip_bool rawip,
-        const bool downstream) {
+        const struct stream_bool stream) {
    return XDP_PASS;
 }
-static inline __always_inline int do_xdp_forward_ether(struct xdp_md *ctx, const bool downstream) {
+static inline __always_inline int do_xdp_forward_ether(struct xdp_md *ctx,
                                                       const struct stream_bool stream) {
    const void* data = (void*)(long)ctx->data;
    const void* data_end = (void*)(long)ctx->data_end;
    const struct ethhdr* eth = data;
@@ -826,15 +827,16 @@ static inline __always_inline int do_xdp_forward_ether(struct xdp_md *ctx, const
    if ((void*)(eth + 1) > data_end) return XDP_PASS;
    if (eth->h_proto == htons(ETH_P_IPV6))
-        return do_xdp_forward6(ctx, ETHER, downstream);
+        return do_xdp_forward6(ctx, ETHER, stream);
    if (eth->h_proto == htons(ETH_P_IP))
-        return do_xdp_forward4(ctx, ETHER, downstream);
+        return do_xdp_forward4(ctx, ETHER, stream);
    // Anything else we don't know how to handle...
    return XDP_PASS;
 }
-static inline __always_inline int do_xdp_forward_rawip(struct xdp_md *ctx, const bool downstream) {
+static inline __always_inline int do_xdp_forward_rawip(struct xdp_md *ctx,
                                                       const struct stream_bool stream) {
    const void* data = (void*)(long)ctx->data;
    const void* data_end = (void*)(long)ctx->data_end;
@@ -842,8 +844,8 @@ static inline __always_inline int do_xdp_forward_rawip(struct xdp_md *ctx, const
    if (data_end - data < 1) return XDP_PASS;
    const uint8_t v = (*(uint8_t*)data) >> 4;
-    if (v == 6) return do_xdp_forward6(ctx, RAWIP, downstream);
+    if (v == 6) return do_xdp_forward6(ctx, RAWIP, stream);
-    if (v == 4) return do_xdp_forward4(ctx, RAWIP, downstream);
+    if (v == 4) return do_xdp_forward4(ctx, RAWIP, stream);
    // Anything else we don't know how to handle...
    return XDP_PASS;
--- a/netbpfload/Android.bp
+++ b/netbpfload/Android.bp
@@ -36,6 +36,4 @@ cc_binary {
        "loader.cpp",
        "NetBpfLoad.cpp",
    ],
    init_rc: ["netbpfload.rc"],
 }
--- a/netbpfload/NetBpfLoad.cpp
+++ b/netbpfload/NetBpfLoad.cpp
@@ -65,46 +65,34 @@ bool exists(const char* const path) {
    abort();  // can only hit this if permissions (likely selinux) are screwed up
 }
 constexpr unsigned long long kTetheringApexDomainBitmask =
        domainToBitmask(domain::tethering) |
        domainToBitmask(domain::net_private) |
        domainToBitmask(domain::net_shared) |
        domainToBitmask(domain::netd_readonly) |
        domainToBitmask(domain::netd_shared);
 const android::bpf::Location locations[] = {
        // S+ Tethering mainline module (network_stack): tether offload
        {
                .dir = "/apex/com.android.tethering/etc/bpf/",
                .prefix = "tethering/",
                .allowedDomainBitmask = kTetheringApexDomainBitmask,
        },
        // T+ Tethering mainline module (shared with netd & system server)
        // netutils_wrapper (for iptables xt_bpf) has access to programs
        {
                .dir = "/apex/com.android.tethering/etc/bpf/netd_shared/",
                .prefix = "netd_shared/",
                .allowedDomainBitmask = kTetheringApexDomainBitmask,
        },
        // T+ Tethering mainline module (shared with netd & system server)
        // netutils_wrapper has no access, netd has read only access
        {
                .dir = "/apex/com.android.tethering/etc/bpf/netd_readonly/",
                .prefix = "netd_readonly/",
                .allowedDomainBitmask = kTetheringApexDomainBitmask,
        },
        // T+ Tethering mainline module (shared with system server)
        {
                .dir = "/apex/com.android.tethering/etc/bpf/net_shared/",
                .prefix = "net_shared/",
                .allowedDomainBitmask = kTetheringApexDomainBitmask,
        },
        // T+ Tethering mainline module (not shared, just network_stack)
        {
                .dir = "/apex/com.android.tethering/etc/bpf/net_private/",
                .prefix = "net_private/",
                .allowedDomainBitmask = kTetheringApexDomainBitmask,
        },
 };
@@ -247,13 +235,6 @@ int main(int argc, char** argv) {
        if (createSysFsBpfSubDir(location.prefix)) return 1;
    }
    // Note: there's no actual src dir for fs_bpf_loader .o's,
    // so it is not listed in 'locations[].prefix'.
    // This is because this is primarily meant for triggering genfscon rules,
    // and as such this will likely always be the case.
    // Thus we need to manually create the /sys/fs/bpf/loader subdirectory.
    if (createSysFsBpfSubDir("loader")) return 1;
    // Load all ELF objects, create programs and maps, and pin them
    for (const auto& location : locations) {
        if (loadAllElfObjects(location) != 0) {
--- a/netbpfload/loader.cpp
+++ b/netbpfload/loader.cpp
@@ -621,8 +621,7 @@ static bool mapMatchesExpectations(const unique_fd& fd, const string& mapName,
 }
 static int createMaps(const char* elfPath, ifstream& elfFile, vector<unique_fd>& mapFds,
-                      const char* prefix, const unsigned long long allowedDomainBitmask,
+                      const char* prefix, const size_t sizeOfBpfMapDef) {
                      const size_t sizeOfBpfMapDef) {
    int ret;
    vector<char> mdData;
    vector<struct bpf_map_def> md;
@@ -733,11 +732,6 @@ static int createMaps(const char* elfPath, ifstream& elfFile, vector<unique_fd>&
        domain selinux_context = getDomainFromSelinuxContext(md[i].selinux_context);
        if (specified(selinux_context)) {
            if (!inDomainBitmask(selinux_context, allowedDomainBitmask)) {
                ALOGE("map %s has invalid selinux_context of %d (allowed bitmask 0x%llx)",
                      mapNames[i].c_str(), selinux_context, allowedDomainBitmask);
                return -EINVAL;
            }
            ALOGI("map %s selinux_context [%-32s] -> %d -> '%s' (%s)", mapNames[i].c_str(),
                  md[i].selinux_context, selinux_context, lookupSelinuxContext(selinux_context),
                  lookupPinSubdir(selinux_context));
@@ -746,11 +740,6 @@ static int createMaps(const char* elfPath, ifstream& elfFile, vector<unique_fd>&
        domain pin_subdir = getDomainFromPinSubdir(md[i].pin_subdir);
        if (unrecognized(pin_subdir)) return -ENOTDIR;
        if (specified(pin_subdir)) {
            if (!inDomainBitmask(pin_subdir, allowedDomainBitmask)) {
                ALOGE("map %s has invalid pin_subdir of %d (allowed bitmask 0x%llx)",
                      mapNames[i].c_str(), pin_subdir, allowedDomainBitmask);
                return -EINVAL;
            }
            ALOGI("map %s pin_subdir [%-32s] -> %d -> '%s'", mapNames[i].c_str(), md[i].pin_subdir,
                  pin_subdir, lookupPinSubdir(pin_subdir));
        }
@@ -921,7 +910,7 @@ static void applyMapRelo(ifstream& elfFile, vector<unique_fd> &mapFds, vector<co
 }
 static int loadCodeSections(const char* elfPath, vector<codeSection>& cs, const string& license,
-                            const char* prefix, const unsigned long long allowedDomainBitmask) {
+                            const char* prefix) {
    unsigned kvers = kernelVersion();
    if (!kvers) {
@@ -980,22 +969,12 @@ static int loadCodeSections(const char* elfPath, vector<codeSection>& cs, const
        if (unrecognized(pin_subdir)) return -ENOTDIR;
        if (specified(selinux_context)) {
            if (!inDomainBitmask(selinux_context, allowedDomainBitmask)) {
                ALOGE("prog %s has invalid selinux_context of %d (allowed bitmask 0x%llx)",
                      name.c_str(), selinux_context, allowedDomainBitmask);
                return -EINVAL;
            }
            ALOGI("prog %s selinux_context [%-32s] -> %d -> '%s' (%s)", name.c_str(),
                  cs[i].prog_def->selinux_context, selinux_context,
                  lookupSelinuxContext(selinux_context), lookupPinSubdir(selinux_context));
        }
        if (specified(pin_subdir)) {
            if (!inDomainBitmask(pin_subdir, allowedDomainBitmask)) {
                ALOGE("prog %s has invalid pin_subdir of %d (allowed bitmask 0x%llx)", name.c_str(),
                      pin_subdir, allowedDomainBitmask);
                return -EINVAL;
            }
            ALOGI("prog %s pin_subdir [%-32s] -> %d -> '%s'", name.c_str(),
                  cs[i].prog_def->pin_subdir, pin_subdir, lookupPinSubdir(pin_subdir));
        }
@@ -1185,8 +1164,7 @@ int loadProg(const char* elfPath, bool* isCritical, const Location& location) {
    /* Just for future debugging */
    if (0) dumpAllCs(cs);
-    ret = createMaps(elfPath, elfFile, mapFds, location.prefix, location.allowedDomainBitmask,
+    ret = createMaps(elfPath, elfFile, mapFds, location.prefix, sizeOfBpfMapDef);
                     sizeOfBpfMapDef);
    if (ret) {
        ALOGE("Failed to create maps: (ret=%d) in %s", ret, elfPath);
        return ret;
@@ -1197,8 +1175,7 @@ int loadProg(const char* elfPath, bool* isCritical, const Location& location) {
    applyMapRelo(elfFile, mapFds, cs);
-    ret = loadCodeSections(elfPath, cs, string(license.data()), location.prefix,
+    ret = loadCodeSections(elfPath, cs, string(license.data()), location.prefix);
                           location.allowedDomainBitmask);
    if (ret) ALOGE("Failed to load programs, loadCodeSections ret=%d", ret);
    return ret;
--- a/netbpfload/loader.h
+++ b/netbpfload/loader.h
@@ -64,18 +64,9 @@ static constexpr bool specified(domain d) {
    return d != domain::unspecified;
 }
 static constexpr unsigned long long domainToBitmask(domain d) {
    return specified(d) ? 1uLL << (static_cast<int>(d) - 1) : 0;
 }
 static constexpr bool inDomainBitmask(domain d, unsigned long long v) {
    return domainToBitmask(d) & v;
 }
 struct Location {
    const char* const dir = "";
    const char* const prefix = "";
    unsigned long long allowedDomainBitmask = 0;
 };
 // BPF loader implementation. Loads an eBPF ELF object
--- a/netbpfload/netbpfload.rc
+++ b/netbpfload/netbpfload.rc
@@ -1,85 +0,0 @@
 # zygote-start is what officially starts netd (see //system/core/rootdir/init.rc)
 # However, on some hardware it's started from post-fs-data as well, which is just
 # a tad earlier.  There's no benefit to that though, since on 4.9+ P+ devices netd
 # will just block until bpfloader finishes and sets the bpf.progs_loaded property.
 #
 # It is important that we start netbpfload after:
 #   - /sys/fs/bpf is already mounted,
 #   - apex (incl. rollback) is initialized (so that in the future we can load bpf
 #     programs shipped as part of apex mainline modules)
 #   - logd is ready for us to log stuff
 #
 # At the same time we want to be as early as possible to reduce races and thus
 # failures (before memory is fragmented, and cpu is busy running tons of other
 # stuff) and we absolutely want to be before netd and the system boot slot is
 # considered to have booted successfully.
 #
 on load_bpf_programs
    exec_start netbpfload
 service netbpfload /system/bin/netbpfload
    capabilities CHOWN SYS_ADMIN NET_ADMIN
    # The following group memberships are a workaround for lack of DAC_OVERRIDE
    # and allow us to open (among other things) files that we created and are
    # no longer root owned (due to CHOWN) but still have group read access to
    # one of the following groups.  This is not perfect, but a more correct
    # solution requires significantly more effort to implement.
    group root graphics network_stack net_admin net_bw_acct net_bw_stats net_raw system
    user root
    #
    # Set RLIMIT_MEMLOCK to 1GiB for netbpfload
    #
    # Actually only 8MiB would be needed if netbpfload ran as its own uid.
    #
    # However, while the rlimit is per-thread, the accounting is system wide.
    # So, for example, if the graphics stack has already allocated 10MiB of
    # memlock data before netbpfload even gets a chance to run, it would fail
    # if its memlock rlimit is only 8MiB - since there would be none left for it.
    #
    # netbpfload succeeding is critical to system health, since a failure will
    # cause netd crashloop and thus system server crashloop... and the only
    # recovery is a full kernel reboot.
    #
    # We've had issues where devices would sometimes (rarely) boot into
    # a crashloop because netbpfload would occasionally lose a boot time
    # race against the graphics stack's boot time locked memory allocation.
    #
    # Thus netbpfload's memlock has to be 8MB higher then the locked memory
    # consumption of the root uid anywhere else in the system...
    # But we don't know what that is for all possible devices...
    #
    # Ideally, we'd simply grant netbpfload the IPC_LOCK capability and it
    # would simply ignore it's memlock rlimit... but it turns that this
    # capability is not even checked by the kernel's bpf system call.
    #
    # As such we simply use 1GiB as a reasonable approximation of infinity.
    #
    rlimit memlock 1073741824 1073741824
    oneshot
    #
    # How to debug bootloops caused by 'netbpfload-failed'.
    #
    # 1. On some lower RAM devices (like wembley) you may need to first enable developer mode
    #    (from the Settings app UI), and change the developer option "Logger buffer sizes"
    #    from the default (wembley: 64kB) to the maximum (1M) per log buffer.
    #    Otherwise buffer will overflow before you manage to dump it and you'll get useless logs.
    #
    # 2. comment out 'reboot_on_failure reboot,netbpfload-failed' below
    # 3. rebuild/reflash/reboot
    # 4. as the device is booting up capture netbpfload logs via:
    #    adb logcat -s 'NetBpfLoad:*' 'NetBpfLoader:*'
    #
    # something like:
    #   $ adb reboot; sleep 1; adb wait-for-device; adb root; sleep 1; adb wait-for-device; adb logcat -s 'NetBpfLoad:*' 'NetBpfLoader:*'
    # will take care of capturing logs as early as possible
    #
    # 5. look through the logs from the kernel's bpf verifier that netbpfload dumps out,
    #    it usually makes sense to search back from the end and find the particular
    #    bpf verifier failure that caused netbpfload to terminate early with an error code.
    #    This will probably be something along the lines of 'too many jumps' or
    #    'cannot prove return value is 0 or 1' or 'unsupported / unknown operation / helper',
    #    'invalid bpf_context access', etc.
    #
    reboot_on_failure reboot,netbpfload-failed
    # we're not really updatable, but want to be able to load bpf programs shipped in apexes
    updatable