offload - finish ipv4 tethering

Test: atest, TreeHugger
Signed-off-by: Maciej Żenczykowski <maze@google.com>
Change-Id: I88f14938c7c4f66190a3afa2acd9104284e27927
This commit is contained in:
Maciej Żenczykowski
2021-01-25 02:32:01 -08:00
parent 0f7f9b6e40
commit ec5f67d97e

View File

@@ -31,6 +31,44 @@
// From kernel:include/net/ip.h // From kernel:include/net/ip.h
#define IP_DF 0x4000 // Flag: "Don't Fragment" #define IP_DF 0x4000 // Flag: "Don't Fragment"
// ----- Helper functions for offsets to fields -----
// They all assume simple IP packets:
// - no VLAN ethernet tags
// - no IPv4 options (see IPV4_HLEN/TCP4_OFFSET/UDP4_OFFSET)
// - no IPv6 extension headers
// - no TCP options (see TCP_HLEN)
//#define ETH_HLEN sizeof(struct ethhdr)
#define IP4_HLEN sizeof(struct iphdr)
#define IP6_HLEN sizeof(struct ipv6hdr)
#define TCP_HLEN sizeof(struct tcphdr)
#define UDP_HLEN sizeof(struct udphdr)
// Offsets from beginning of L4 (TCP/UDP) header
#define TCP_OFFSET(field) offsetof(struct tcphdr, field)
#define UDP_OFFSET(field) offsetof(struct udphdr, field)
// Offsets from beginning of L3 (IPv4) header
#define IP4_OFFSET(field) offsetof(struct iphdr, field)
#define IP4_TCP_OFFSET(field) (IP4_HLEN + TCP_OFFSET(field))
#define IP4_UDP_OFFSET(field) (IP4_HLEN + UDP_OFFSET(field))
// Offsets from beginning of L3 (IPv6) header
#define IP6_OFFSET(field) offsetof(struct ipv6hdr, field)
#define IP6_TCP_OFFSET(field) (IP6_HLEN + TCP_OFFSET(field))
#define IP6_UDP_OFFSET(field) (IP6_HLEN + UDP_OFFSET(field))
// Offsets from beginning of L2 (ie. Ethernet) header (which must be present)
#define ETH_IP4_OFFSET(field) (ETH_HLEN + IP4_OFFSET(field))
#define ETH_IP4_TCP_OFFSET(field) (ETH_HLEN + IP4_TCP_OFFSET(field))
#define ETH_IP4_UDP_OFFSET(field) (ETH_HLEN + IP4_UDP_OFFSET(field))
#define ETH_IP6_OFFSET(field) (ETH_HLEN + IP6_OFFSET(field))
#define ETH_IP6_TCP_OFFSET(field) (ETH_HLEN + IP6_TCP_OFFSET(field))
#define ETH_IP6_UDP_OFFSET(field) (ETH_HLEN + IP6_UDP_OFFSET(field))
// ----- Tethering stats and data limits -----
// Tethering stats, indexed by upstream interface. // Tethering stats, indexed by upstream interface.
DEFINE_BPF_MAP_GRW(tether_stats_map, HASH, TetherStatsKey, TetherStatsValue, 16, AID_NETWORK_STACK) DEFINE_BPF_MAP_GRW(tether_stats_map, HASH, TetherStatsKey, TetherStatsValue, 16, AID_NETWORK_STACK)
@@ -410,12 +448,76 @@ static inline __always_inline int do_forward4(struct __sk_buff* skb, const bool
// since we don't offload all traffic in both directions) // since we don't offload all traffic in both directions)
if (stat_v->rxBytes + stat_v->txBytes + bytes > *limit_v) return TC_ACT_OK; if (stat_v->rxBytes + stat_v->txBytes + bytes > *limit_v) return TC_ACT_OK;
// TODO: replace Errors with Packets once implemented
__sync_fetch_and_add(downstream ? &stat_v->rxErrors : &stat_v->txErrors, packets); if (!is_tcp) return TC_ACT_OK; // HACK
if (!is_ethernet) {
// Try to inject an ethernet header, and simply return if we fail.
// We do this even if TX interface is RAWIP and thus does not need an ethernet header,
// because this is easier and the kernel will strip extraneous ethernet header.
if (bpf_skb_change_head(skb, sizeof(struct ethhdr), /*flags*/ 0)) {
__sync_fetch_and_add(downstream ? &stat_v->rxErrors : &stat_v->txErrors, 1);
return TC_ACT_OK;
}
// bpf_skb_change_head() invalidates all pointers - reload them
data = (void*)(long)skb->data;
data_end = (void*)(long)skb->data_end;
eth = data;
ip = (void*)(eth + 1);
tcph = is_tcp ? (void*)(ip + 1) : NULL;
udph = is_tcp ? NULL : (void*)(ip + 1);
// I do not believe this can ever happen, but keep the verifier happy...
if (data + sizeof(struct ethhdr) + sizeof(*ip) + (is_tcp ? sizeof(*tcph) : sizeof(*udph)) > data_end) {
__sync_fetch_and_add(downstream ? &stat_v->rxErrors : &stat_v->txErrors, 1);
return TC_ACT_SHOT;
}
};
// At this point we always have an ethernet header - which will get stripped by the
// kernel during transmit through a rawip interface. ie. 'eth' pointer is valid.
// Additionally note that 'is_ethernet' and 'l2_header_size' are no longer correct.
// Overwrite any mac header with the new one
// For a rawip tx interface it will simply be a bunch of zeroes and later stripped.
*eth = v->macHeader;
const int sz4 = sizeof(__be32);
const __be32 old_daddr = k.dst4.s_addr;
const __be32 old_saddr = k.src4.s_addr;
const __be32 new_daddr = v->dst46.s6_addr32[3];
const __be32 new_saddr = v->src46.s6_addr32[3];
bpf_l4_csum_replace(skb, ETH_IP4_TCP_OFFSET(check), old_daddr, new_daddr, sz4 | BPF_F_PSEUDO_HDR);
bpf_l3_csum_replace(skb, ETH_IP4_OFFSET(check), old_daddr, new_daddr, sz4);
bpf_skb_store_bytes(skb, ETH_IP4_OFFSET(daddr), &new_daddr, sz4, 0);
bpf_l4_csum_replace(skb, ETH_IP4_TCP_OFFSET(check), old_saddr, new_saddr, sz4 | BPF_F_PSEUDO_HDR);
bpf_l3_csum_replace(skb, ETH_IP4_OFFSET(check), old_saddr, new_saddr, sz4);
bpf_skb_store_bytes(skb, ETH_IP4_OFFSET(saddr), &new_saddr, sz4, 0);
const int sz2 = sizeof(__be16);
bpf_l4_csum_replace(skb, ETH_IP4_TCP_OFFSET(check), k.srcPort, v->srcPort, sz2);
bpf_skb_store_bytes(skb, ETH_IP4_TCP_OFFSET(source), &v->srcPort, sz2, 0);
bpf_l4_csum_replace(skb, ETH_IP4_TCP_OFFSET(check), k.dstPort, v->dstPort, sz2);
bpf_skb_store_bytes(skb, ETH_IP4_TCP_OFFSET(dest), &v->dstPort, sz2, 0);
// TTL dec
// v->last_used = bpf_ktime_get_boot_ns();
__sync_fetch_and_add(downstream ? &stat_v->rxPackets : &stat_v->txPackets, packets);
__sync_fetch_and_add(downstream ? &stat_v->rxBytes : &stat_v->txBytes, bytes); __sync_fetch_and_add(downstream ? &stat_v->rxBytes : &stat_v->txBytes, bytes);
// TODO: not actually implemented yet // Redirect to forwarded interface.
return TC_ACT_OK; //
// Note that bpf_redirect() cannot fail unless you pass invalid flags.
// The redirect actually happens after the ebpf program has already terminated,
// and can fail for example for mtu reasons at that point in time, but there's nothing
// we can do about it here.
return bpf_redirect(v->oif, 0 /* this is effectively BPF_F_EGRESS */);
} }
// Real implementations for 5.9+ kernels // Real implementations for 5.9+ kernels