Merge changes from topic "configure_bpf_for_clat"

* changes:
  [CLATJ#20] ClatdCoordinator: stop bpf for clat
  [CLATJ#19] ClatdCoordinator: configure bpf for clat
  [CLATJ#18] libclat: move clat tc and bpf utilis from netd
  [CLATJ#17] bpf_connectivity_headers visible to libclat
This commit is contained in:
Maciej Żenczykowski
2022-01-24 19:14:35 +00:00
committed by Gerrit Code Review
10 changed files with 1164 additions and 3 deletions

View File

@@ -42,6 +42,7 @@ cc_library_headers {
// TODO: remove it when NetworkStatsService is moved into the mainline module and no more
// calls to JNI in libservices.core.
"//frameworks/base/services/core/jni",
"//packages/modules/Connectivity/service/native/libs/libclat",
"//packages/modules/Connectivity/Tethering",
"//packages/modules/Connectivity/service/native",
"//packages/modules/Connectivity/tests/unit/jni",

View File

@@ -65,6 +65,7 @@ cc_library_shared {
"libbase_headers",
],
static_libs: [
"libbase",
"libclat",
"libip_checksum",
"libnetjniutils",

View File

@@ -19,17 +19,22 @@
#include <fcntl.h>
#include <linux/if_tun.h>
#include <linux/ioctl.h>
#include <log/log.h>
#include <nativehelper/JNIHelp.h>
#include <net/if.h>
#include <string>
#include <netjniutils/netjniutils.h>
#include "libclat/bpfhelper.h"
#include "libclat/clatutils.h"
#include "nativehelper/scoped_utf_chars.h"
// Sync from system/netd/include/netid_client.h
#define MARK_UNSET 0u
#define DEVICEPREFIX "v4-"
namespace android {
static void throwIOException(JNIEnv* env, const char* msg, int error) {
jniThrowExceptionFmt(env, "java/io/IOException", "%s: %s", msg, strerror(error));
@@ -237,6 +242,86 @@ static void com_android_server_connectivity_ClatCoordinator_configurePacketSocke
}
}
int initTracker(const std::string& iface, const std::string& pfx96, const std::string& v4,
const std::string& v6, net::clat::ClatdTracker* output) {
strlcpy(output->iface, iface.c_str(), sizeof(output->iface));
output->ifIndex = if_nametoindex(iface.c_str());
if (output->ifIndex == 0) {
ALOGE("interface %s not found", output->iface);
return -1;
}
unsigned len = snprintf(output->v4iface, sizeof(output->v4iface),
"%s%s", DEVICEPREFIX, iface.c_str());
if (len >= sizeof(output->v4iface)) {
ALOGE("interface name too long '%s'", output->v4iface);
return -1;
}
output->v4ifIndex = if_nametoindex(output->v4iface);
if (output->v4ifIndex == 0) {
ALOGE("v4-interface %s not found", output->v4iface);
return -1;
}
if (!inet_pton(AF_INET6, pfx96.c_str(), &output->pfx96)) {
ALOGE("invalid IPv6 address specified for plat prefix: %s", pfx96.c_str());
return -1;
}
if (!inet_pton(AF_INET, v4.c_str(), &output->v4)) {
ALOGE("Invalid IPv4 address %s", v4.c_str());
return -1;
}
if (!inet_pton(AF_INET6, v6.c_str(), &output->v6)) {
ALOGE("Invalid source address %s", v6.c_str());
return -1;
}
return 0;
}
// TODO: fork clatd and rename to .._startClatd.
static jint com_android_server_connectivity_ClatCoordinator_maybeStartBpf(
JNIEnv* env, jobject clazz, jobject tunJavaFd, jobject readSockJavaFd,
jobject writeSockJavaFd, jstring iface, jstring pfx96, jstring v4, jstring v6) {
ScopedUtfChars ifaceStr(env, iface);
ScopedUtfChars pfx96Str(env, pfx96);
ScopedUtfChars v4Str(env, v4);
ScopedUtfChars v6Str(env, v6);
// Start BPF if any
if (!net::clat::initMaps()) {
net::clat::ClatdTracker tracker = {};
if (!initTracker(ifaceStr.c_str(), pfx96Str.c_str(), v4Str.c_str(), v6Str.c_str(),
&tracker)) {
net::clat::maybeStartBpf(tracker);
}
}
return 0; // TODO: return forked clatd pid.
}
// TODO: stop clatd and rename to .._stopClatd.
static void com_android_server_connectivity_ClatCoordinator_maybeStopBpf(JNIEnv* env, jobject clazz,
jstring iface, jstring pfx96,
jstring v4, jstring v6,
jint pid /* unused */) {
ScopedUtfChars ifaceStr(env, iface);
ScopedUtfChars pfx96Str(env, pfx96);
ScopedUtfChars v4Str(env, v4);
ScopedUtfChars v6Str(env, v6);
if (!net::clat::initMaps()) {
net::clat::ClatdTracker tracker = {};
if (!initTracker(ifaceStr.c_str(), pfx96Str.c_str(), v4Str.c_str(), v6Str.c_str(),
&tracker)) {
net::clat::maybeStopBpf(tracker);
}
}
}
/*
* JNI registration.
*/
@@ -259,6 +344,13 @@ static const JNINativeMethod gMethods[] = {
(void*)com_android_server_connectivity_ClatCoordinator_addAnycastSetsockopt},
{"native_configurePacketSocket", "(Ljava/io/FileDescriptor;Ljava/lang/String;I)V",
(void*)com_android_server_connectivity_ClatCoordinator_configurePacketSocket},
{"native_maybeStartBpf",
"(Ljava/io/FileDescriptor;Ljava/io/FileDescriptor;Ljava/io/FileDescriptor;Ljava/lang/"
"String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)I",
(void*)com_android_server_connectivity_ClatCoordinator_maybeStartBpf},
{"native_maybeStopBpf",
"(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;I)V",
(void*)com_android_server_connectivity_ClatCoordinator_maybeStopBpf},
};
int register_android_server_connectivity_ClatCoordinator(JNIEnv* env) {

View File

@@ -19,9 +19,22 @@ package {
cc_library_static {
name: "libclat",
defaults: ["netd_defaults"],
srcs: ["clatutils.cpp"],
header_libs: [
"bpf_connectivity_headers",
"bpf_headers",
"bpf_syscall_wrappers",
],
srcs: [
"TcUtils.cpp", // TODO: move to frameworks/libs/net
"bpfhelper.cpp",
"clatutils.cpp",
],
stl: "libc++_static",
static_libs: ["libip_checksum"],
static_libs: [
"libbase",
"libip_checksum",
"libnetdutils", // for netdutils/UidConstants.h in bpf_shared.h
],
shared_libs: ["liblog"],
export_include_dirs: ["include"],
min_sdk_version: "30",
@@ -32,7 +45,13 @@ cc_test {
name: "libclat_test",
defaults: ["netd_defaults"],
test_suites: ["device-tests"],
header_libs: [
"bpf_connectivity_headers",
"bpf_headers",
"bpf_syscall_wrappers",
],
srcs: [
"TcUtilsTest.cpp",
"clatutils_test.cpp",
],
static_libs: [
@@ -40,10 +59,12 @@ cc_test {
"libclat",
"libip_checksum",
"libnetd_test_tun_interface",
"libnetdutils", // for netdutils/UidConstants.h in bpf_shared.h
"libtcutils",
],
shared_libs: [
"liblog",
"libnetutils",
],
require_root: true,
}
}

View File

@@ -0,0 +1,390 @@
/*
* Copyright (C) 2019 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define LOG_TAG "TcUtils"
#include "libclat/TcUtils.h"
#include <arpa/inet.h>
#include <linux/if.h>
#include <linux/if_arp.h>
#include <linux/netlink.h>
#include <linux/pkt_cls.h>
#include <linux/pkt_sched.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <unistd.h>
#include <log/log.h>
#include "android-base/unique_fd.h"
namespace android {
namespace net {
using std::max;
// Sync from system/netd/server/NetlinkCommands.h
const sockaddr_nl KERNEL_NLADDR = {AF_NETLINK, 0, 0, 0};
const uint16_t NETLINK_REQUEST_FLAGS = NLM_F_REQUEST | NLM_F_ACK;
static int doSIOCGIF(const std::string& interface, int opt) {
base::unique_fd ufd(socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0));
if (ufd < 0) {
const int err = errno;
ALOGE("socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0)");
return -err;
};
struct ifreq ifr = {};
// We use strncpy() instead of strlcpy() since kernel has to be able
// to handle non-zero terminated junk passed in by userspace anyway,
// and this way too long interface names (more than IFNAMSIZ-1 = 15
// characters plus terminating NULL) will not get truncated to 15
// characters and zero-terminated and thus potentially erroneously
// match a truncated interface if one were to exist.
strncpy(ifr.ifr_name, interface.c_str(), sizeof(ifr.ifr_name));
if (ioctl(ufd, opt, &ifr, sizeof(ifr))) return -errno;
if (opt == SIOCGIFHWADDR) return ifr.ifr_hwaddr.sa_family;
if (opt == SIOCGIFMTU) return ifr.ifr_mtu;
return -EINVAL;
}
int hardwareAddressType(const std::string& interface) {
return doSIOCGIF(interface, SIOCGIFHWADDR);
}
int deviceMTU(const std::string& interface) {
return doSIOCGIF(interface, SIOCGIFMTU);
}
base::Result<bool> isEthernet(const std::string& interface) {
int rv = hardwareAddressType(interface);
if (rv < 0) {
errno = -rv;
return ErrnoErrorf("Get hardware address type of interface {} failed", interface);
}
switch (rv) {
case ARPHRD_ETHER:
return true;
case ARPHRD_NONE:
case ARPHRD_RAWIP: // in Linux 4.14+ rmnet support was upstreamed and this is 519
case 530: // this is ARPHRD_RAWIP on some Android 4.9 kernels with rmnet
return false;
default:
errno = EAFNOSUPPORT; // Address family not supported
return ErrnoErrorf("Unknown hardware address type {} on interface {}", rv, interface);
}
}
// TODO: use //system/netd/server/NetlinkCommands.cpp:openNetlinkSocket(protocol)
// and //system/netd/server/SockDiag.cpp:checkError(fd)
static int sendAndProcessNetlinkResponse(const void* req, int len) {
base::unique_fd fd(socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE));
if (fd == -1) {
const int err = errno;
ALOGE("socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE)");
return -err;
}
static constexpr int on = 1;
int rv = setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, &on, sizeof(on));
if (rv) ALOGE("setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, %d)", on);
// this is needed to get sane strace netlink parsing, it allocates the pid
rv = bind(fd, (const struct sockaddr*)&KERNEL_NLADDR, sizeof(KERNEL_NLADDR));
if (rv) {
const int err = errno;
ALOGE("bind(fd, {AF_NETLINK, 0, 0})");
return -err;
}
// we do not want to receive messages from anyone besides the kernel
rv = connect(fd, (const struct sockaddr*)&KERNEL_NLADDR, sizeof(KERNEL_NLADDR));
if (rv) {
const int err = errno;
ALOGE("connect(fd, {AF_NETLINK, 0, 0})");
return -err;
}
rv = send(fd, req, len, 0);
if (rv == -1) return -errno;
if (rv != len) return -EMSGSIZE;
struct {
nlmsghdr h;
nlmsgerr e;
char buf[256];
} resp = {};
rv = recv(fd, &resp, sizeof(resp), MSG_TRUNC);
if (rv == -1) {
const int err = errno;
ALOGE("recv() failed");
return -err;
}
if (rv < (int)NLMSG_SPACE(sizeof(struct nlmsgerr))) {
ALOGE("recv() returned short packet: %d", rv);
return -EMSGSIZE;
}
if (resp.h.nlmsg_len != (unsigned)rv) {
ALOGE("recv() returned invalid header length: %d != %d", resp.h.nlmsg_len, rv);
return -EBADMSG;
}
if (resp.h.nlmsg_type != NLMSG_ERROR) {
ALOGE("recv() did not return NLMSG_ERROR message: %d", resp.h.nlmsg_type);
return -EBADMSG;
}
return resp.e.error; // returns 0 on success
}
// ADD: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_EXCL|NLM_F_CREATE
// REPLACE: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_CREATE|NLM_F_REPLACE
// DEL: nlMsgType=RTM_DELQDISC nlMsgFlags=0
int doTcQdiscClsact(int ifIndex, uint16_t nlMsgType, uint16_t nlMsgFlags) {
// This is the name of the qdisc we are attaching.
// Some hoop jumping to make this compile time constant with known size,
// so that the structure declaration is well defined at compile time.
#define CLSACT "clsact"
// sizeof() includes the terminating NULL
static constexpr size_t ASCIIZ_LEN_CLSACT = sizeof(CLSACT);
const struct {
nlmsghdr n;
tcmsg t;
struct {
nlattr attr;
char str[NLMSG_ALIGN(ASCIIZ_LEN_CLSACT)];
} kind;
} req = {
.n =
{
.nlmsg_len = sizeof(req),
.nlmsg_type = nlMsgType,
.nlmsg_flags = static_cast<__u16>(NETLINK_REQUEST_FLAGS | nlMsgFlags),
},
.t =
{
.tcm_family = AF_UNSPEC,
.tcm_ifindex = ifIndex,
.tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0),
.tcm_parent = TC_H_CLSACT,
},
.kind =
{
.attr =
{
.nla_len = NLA_HDRLEN + ASCIIZ_LEN_CLSACT,
.nla_type = TCA_KIND,
},
.str = CLSACT,
},
};
#undef CLSACT
return sendAndProcessNetlinkResponse(&req, sizeof(req));
}
// tc filter add dev .. in/egress prio 4 protocol ipv6/ip bpf object-pinned /sys/fs/bpf/...
// direct-action
int tcFilterAddDevBpf(int ifIndex, bool ingress, uint16_t proto, int bpfFd, bool ethernet) {
// This is the name of the filter we're attaching (ie. this is the 'bpf'
// packet classifier enabled by kernel config option CONFIG_NET_CLS_BPF.
//
// We go through some hoops in order to make this compile time constants
// so that we can define the struct further down the function with the
// field for this sized correctly already during the build.
#define BPF "bpf"
// sizeof() includes the terminating NULL
static constexpr size_t ASCIIZ_LEN_BPF = sizeof(BPF);
// This is to replicate program name suffix used by 'tc' Linux cli
// when it attaches programs.
#define FSOBJ_SUFFIX ":[*fsobj]"
// This macro expands (from header files) to:
// prog_clatd_schedcls_ingress6_clat_rawip:[*fsobj]
// and is the name of the pinned ingress ebpf program for ARPHRD_RAWIP interfaces.
// (also compatible with anything that has 0 size L2 header)
static constexpr char name_clat_rx_rawip[] = CLAT_INGRESS6_PROG_RAWIP_NAME FSOBJ_SUFFIX;
// This macro expands (from header files) to:
// prog_clatd_schedcls_ingress6_clat_ether:[*fsobj]
// and is the name of the pinned ingress ebpf program for ARPHRD_ETHER interfaces.
// (also compatible with anything that has standard ethernet header)
static constexpr char name_clat_rx_ether[] = CLAT_INGRESS6_PROG_ETHER_NAME FSOBJ_SUFFIX;
// This macro expands (from header files) to:
// prog_clatd_schedcls_egress4_clat_rawip:[*fsobj]
// and is the name of the pinned egress ebpf program for ARPHRD_RAWIP interfaces.
// (also compatible with anything that has 0 size L2 header)
static constexpr char name_clat_tx_rawip[] = CLAT_EGRESS4_PROG_RAWIP_NAME FSOBJ_SUFFIX;
// This macro expands (from header files) to:
// prog_clatd_schedcls_egress4_clat_ether:[*fsobj]
// and is the name of the pinned egress ebpf program for ARPHRD_ETHER interfaces.
// (also compatible with anything that has standard ethernet header)
static constexpr char name_clat_tx_ether[] = CLAT_EGRESS4_PROG_ETHER_NAME FSOBJ_SUFFIX;
#undef FSOBJ_SUFFIX
// The actual name we'll use is determined at run time via 'ethernet' and 'ingress'
// booleans. We need to compile time allocate enough space in the struct
// hence this macro magic to make sure we have enough space for either
// possibility. In practice some of these are actually the same size.
static constexpr size_t ASCIIZ_MAXLEN_NAME = max({
sizeof(name_clat_rx_rawip),
sizeof(name_clat_rx_ether),
sizeof(name_clat_tx_rawip),
sizeof(name_clat_tx_ether),
});
// These are not compile time constants: 'name' is used in strncpy below
const char* const name_clat_rx = ethernet ? name_clat_rx_ether : name_clat_rx_rawip;
const char* const name_clat_tx = ethernet ? name_clat_tx_ether : name_clat_tx_rawip;
const char* const name = ingress ? name_clat_rx : name_clat_tx;
struct {
nlmsghdr n;
tcmsg t;
struct {
nlattr attr;
char str[NLMSG_ALIGN(ASCIIZ_LEN_BPF)];
} kind;
struct {
nlattr attr;
struct {
nlattr attr;
__u32 u32;
} fd;
struct {
nlattr attr;
char str[NLMSG_ALIGN(ASCIIZ_MAXLEN_NAME)];
} name;
struct {
nlattr attr;
__u32 u32;
} flags;
} options;
} req = {
.n =
{
.nlmsg_len = sizeof(req),
.nlmsg_type = RTM_NEWTFILTER,
.nlmsg_flags = NETLINK_REQUEST_FLAGS | NLM_F_EXCL | NLM_F_CREATE,
},
.t =
{
.tcm_family = AF_UNSPEC,
.tcm_ifindex = ifIndex,
.tcm_handle = TC_H_UNSPEC,
.tcm_parent = TC_H_MAKE(TC_H_CLSACT,
ingress ? TC_H_MIN_INGRESS : TC_H_MIN_EGRESS),
.tcm_info = static_cast<__u32>((PRIO_CLAT << 16) | htons(proto)),
},
.kind =
{
.attr =
{
.nla_len = sizeof(req.kind),
.nla_type = TCA_KIND,
},
.str = BPF,
},
.options =
{
.attr =
{
.nla_len = sizeof(req.options),
.nla_type = NLA_F_NESTED | TCA_OPTIONS,
},
.fd =
{
.attr =
{
.nla_len = sizeof(req.options.fd),
.nla_type = TCA_BPF_FD,
},
.u32 = static_cast<__u32>(bpfFd),
},
.name =
{
.attr =
{
.nla_len = sizeof(req.options.name),
.nla_type = TCA_BPF_NAME,
},
// Visible via 'tc filter show', but
// is overwritten by strncpy below
.str = "placeholder",
},
.flags =
{
.attr =
{
.nla_len = sizeof(req.options.flags),
.nla_type = TCA_BPF_FLAGS,
},
.u32 = TCA_BPF_FLAG_ACT_DIRECT,
},
},
};
#undef BPF
strncpy(req.options.name.str, name, sizeof(req.options.name.str));
return sendAndProcessNetlinkResponse(&req, sizeof(req));
}
// tc filter del dev .. in/egress prio 4 protocol ..
int tcFilterDelDev(int ifIndex, bool ingress, uint16_t prio, uint16_t proto) {
const struct {
nlmsghdr n;
tcmsg t;
} req = {
.n =
{
.nlmsg_len = sizeof(req),
.nlmsg_type = RTM_DELTFILTER,
.nlmsg_flags = NETLINK_REQUEST_FLAGS,
},
.t =
{
.tcm_family = AF_UNSPEC,
.tcm_ifindex = ifIndex,
.tcm_handle = TC_H_UNSPEC,
.tcm_parent = TC_H_MAKE(TC_H_CLSACT,
ingress ? TC_H_MIN_INGRESS : TC_H_MIN_EGRESS),
.tcm_info = (static_cast<uint32_t>(prio) << 16) |
static_cast<uint32_t>(htons(proto)),
},
};
return sendAndProcessNetlinkResponse(&req, sizeof(req));
}
} // namespace net
} // namespace android

View File

@@ -0,0 +1,212 @@
/*
* Copyright 2019 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* TcUtilsTest.cpp - unit tests for TcUtils.cpp
*/
#include <gtest/gtest.h>
#include "libclat/TcUtils.h"
#include <linux/if_arp.h>
#include <stdlib.h>
#include <sys/wait.h>
#include "bpf/BpfUtils.h"
#include "bpf_shared.h"
namespace android {
namespace net {
class TcUtilsTest : public ::testing::Test {
public:
void SetUp() {}
};
TEST_F(TcUtilsTest, HardwareAddressTypeOfNonExistingIf) {
ASSERT_EQ(-ENODEV, hardwareAddressType("not_existing_if"));
}
TEST_F(TcUtilsTest, HardwareAddressTypeOfLoopback) {
ASSERT_EQ(ARPHRD_LOOPBACK, hardwareAddressType("lo"));
}
// If wireless 'wlan0' interface exists it should be Ethernet.
TEST_F(TcUtilsTest, HardwareAddressTypeOfWireless) {
int type = hardwareAddressType("wlan0");
if (type == -ENODEV) return;
ASSERT_EQ(ARPHRD_ETHER, type);
}
// If cellular 'rmnet_data0' interface exists it should
// *probably* not be Ethernet and instead be RawIp.
TEST_F(TcUtilsTest, HardwareAddressTypeOfCellular) {
int type = hardwareAddressType("rmnet_data0");
if (type == -ENODEV) return;
ASSERT_NE(ARPHRD_ETHER, type);
// ARPHRD_RAWIP is 530 on some pre-4.14 Qualcomm devices.
if (type == 530) return;
ASSERT_EQ(ARPHRD_RAWIP, type);
}
TEST_F(TcUtilsTest, IsEthernetOfNonExistingIf) {
auto res = isEthernet("not_existing_if");
ASSERT_FALSE(res.ok());
ASSERT_EQ(ENODEV, res.error().code());
}
TEST_F(TcUtilsTest, IsEthernetOfLoopback) {
auto res = isEthernet("lo");
ASSERT_FALSE(res.ok());
ASSERT_EQ(EAFNOSUPPORT, res.error().code());
}
// If wireless 'wlan0' interface exists it should be Ethernet.
// See also HardwareAddressTypeOfWireless.
TEST_F(TcUtilsTest, IsEthernetOfWireless) {
auto res = isEthernet("wlan0");
if (!res.ok() && res.error().code() == ENODEV) return;
ASSERT_RESULT_OK(res);
ASSERT_TRUE(res.value());
}
// If cellular 'rmnet_data0' interface exists it should
// *probably* not be Ethernet and instead be RawIp.
// See also HardwareAddressTypeOfCellular.
TEST_F(TcUtilsTest, IsEthernetOfCellular) {
auto res = isEthernet("rmnet_data0");
if (!res.ok() && res.error().code() == ENODEV) return;
ASSERT_RESULT_OK(res);
ASSERT_FALSE(res.value());
}
TEST_F(TcUtilsTest, DeviceMTUOfNonExistingIf) {
ASSERT_EQ(-ENODEV, deviceMTU("not_existing_if"));
}
TEST_F(TcUtilsTest, DeviceMTUofLoopback) {
ASSERT_EQ(65536, deviceMTU("lo"));
}
TEST_F(TcUtilsTest, GetClatEgress4MapFd) {
int fd = getClatEgress4MapFd();
ASSERT_GE(fd, 3); // 0,1,2 - stdin/out/err, thus fd >= 3
EXPECT_EQ(FD_CLOEXEC, fcntl(fd, F_GETFD));
close(fd);
}
TEST_F(TcUtilsTest, GetClatEgress4RawIpProgFd) {
int fd = getClatEgress4ProgFd(RAWIP);
ASSERT_GE(fd, 3);
EXPECT_EQ(FD_CLOEXEC, fcntl(fd, F_GETFD));
close(fd);
}
TEST_F(TcUtilsTest, GetClatEgress4EtherProgFd) {
int fd = getClatEgress4ProgFd(ETHER);
ASSERT_GE(fd, 3);
EXPECT_EQ(FD_CLOEXEC, fcntl(fd, F_GETFD));
close(fd);
}
TEST_F(TcUtilsTest, GetClatIngress6MapFd) {
int fd = getClatIngress6MapFd();
ASSERT_GE(fd, 3); // 0,1,2 - stdin/out/err, thus fd >= 3
EXPECT_EQ(FD_CLOEXEC, fcntl(fd, F_GETFD));
close(fd);
}
TEST_F(TcUtilsTest, GetClatIngress6RawIpProgFd) {
int fd = getClatIngress6ProgFd(RAWIP);
ASSERT_GE(fd, 3);
EXPECT_EQ(FD_CLOEXEC, fcntl(fd, F_GETFD));
close(fd);
}
TEST_F(TcUtilsTest, GetClatIngress6EtherProgFd) {
int fd = getClatIngress6ProgFd(ETHER);
ASSERT_GE(fd, 3);
EXPECT_EQ(FD_CLOEXEC, fcntl(fd, F_GETFD));
close(fd);
}
// See Linux kernel source in include/net/flow.h
#define LOOPBACK_IFINDEX 1
TEST_F(TcUtilsTest, AttachReplaceDetachClsactLo) {
// This attaches and detaches a configuration-less and thus no-op clsact
// qdisc to loopback interface (and it takes fractions of a second)
EXPECT_EQ(0, tcQdiscAddDevClsact(LOOPBACK_IFINDEX));
EXPECT_EQ(0, tcQdiscReplaceDevClsact(LOOPBACK_IFINDEX));
EXPECT_EQ(0, tcQdiscDelDevClsact(LOOPBACK_IFINDEX));
EXPECT_EQ(-EINVAL, tcQdiscDelDevClsact(LOOPBACK_IFINDEX));
}
static void checkAttachDetachBpfFilterClsactLo(const bool ingress, const bool ethernet) {
// Older kernels return EINVAL instead of ENOENT due to lacking proper error propagation...
const int errNOENT = android::bpf::isAtLeastKernelVersion(4, 19, 0) ? ENOENT : EINVAL;
int clatBpfFd = ingress ? getClatIngress6ProgFd(ethernet) : getClatEgress4ProgFd(ethernet);
ASSERT_GE(clatBpfFd, 3);
// This attaches and detaches a clsact plus ebpf program to loopback
// interface, but it should not affect traffic by virtue of us not
// actually populating the ebpf control map.
// Furthermore: it only takes fractions of a second.
EXPECT_EQ(-EINVAL, tcFilterDelDevIngressClatIpv6(LOOPBACK_IFINDEX));
EXPECT_EQ(-EINVAL, tcFilterDelDevEgressClatIpv4(LOOPBACK_IFINDEX));
EXPECT_EQ(0, tcQdiscAddDevClsact(LOOPBACK_IFINDEX));
EXPECT_EQ(-errNOENT, tcFilterDelDevIngressClatIpv6(LOOPBACK_IFINDEX));
EXPECT_EQ(-errNOENT, tcFilterDelDevEgressClatIpv4(LOOPBACK_IFINDEX));
if (ingress) {
EXPECT_EQ(0, tcFilterAddDevIngressClatIpv6(LOOPBACK_IFINDEX, clatBpfFd, ethernet));
EXPECT_EQ(0, tcFilterDelDevIngressClatIpv6(LOOPBACK_IFINDEX));
} else {
EXPECT_EQ(0, tcFilterAddDevEgressClatIpv4(LOOPBACK_IFINDEX, clatBpfFd, ethernet));
EXPECT_EQ(0, tcFilterDelDevEgressClatIpv4(LOOPBACK_IFINDEX));
}
EXPECT_EQ(-errNOENT, tcFilterDelDevIngressClatIpv6(LOOPBACK_IFINDEX));
EXPECT_EQ(-errNOENT, tcFilterDelDevEgressClatIpv4(LOOPBACK_IFINDEX));
EXPECT_EQ(0, tcQdiscDelDevClsact(LOOPBACK_IFINDEX));
EXPECT_EQ(-EINVAL, tcFilterDelDevIngressClatIpv6(LOOPBACK_IFINDEX));
EXPECT_EQ(-EINVAL, tcFilterDelDevEgressClatIpv4(LOOPBACK_IFINDEX));
close(clatBpfFd);
}
TEST_F(TcUtilsTest, CheckAttachBpfFilterRawIpClsactEgressLo) {
checkAttachDetachBpfFilterClsactLo(EGRESS, RAWIP);
}
TEST_F(TcUtilsTest, CheckAttachBpfFilterEthernetClsactEgressLo) {
checkAttachDetachBpfFilterClsactLo(EGRESS, ETHER);
}
TEST_F(TcUtilsTest, CheckAttachBpfFilterRawIpClsactIngressLo) {
checkAttachDetachBpfFilterClsactLo(INGRESS, RAWIP);
}
TEST_F(TcUtilsTest, CheckAttachBpfFilterEthernetClsactIngressLo) {
checkAttachDetachBpfFilterClsactLo(INGRESS, ETHER);
}
} // namespace net
} // namespace android

View File

@@ -0,0 +1,231 @@
/*
* Copyright 2021 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* main.c - main function
*/
#define LOG_TAG "bpfhelper"
#include "libclat/bpfhelper.h"
#include <android-base/unique_fd.h>
#include <log/log.h>
#include "bpf/BpfMap.h"
#include "libclat/TcUtils.h"
#define DEVICEPREFIX "v4-"
using android::base::unique_fd;
using android::net::RAWIP;
using android::net::getClatEgress4MapFd;
using android::net::getClatIngress6MapFd;
using android::net::getClatEgress4ProgFd;
using android::net::getClatIngress6ProgFd;
using android::net::tcQdiscAddDevClsact;
using android::net::tcFilterAddDevEgressClatIpv4;
using android::net::tcFilterAddDevIngressClatIpv6;
using android::net::tcFilterDelDevEgressClatIpv4;
using android::net::tcFilterDelDevIngressClatIpv6;
using android::bpf::BpfMap;
BpfMap<ClatEgress4Key, ClatEgress4Value> mClatEgress4Map;
BpfMap<ClatIngress6Key, ClatIngress6Value> mClatIngress6Map;
namespace android {
namespace net {
namespace clat {
// TODO: have a clearMap function to remove all stubs while system server crash.
// For long term, move bpf access into java and map initialization should live
// ClatCoordinator constructor.
int initMaps(void) {
int rv = getClatEgress4MapFd();
if (rv < 0) {
ALOGE("getClatEgress4MapFd() failure: %s", strerror(-rv));
return -rv;
}
mClatEgress4Map.reset(rv);
rv = getClatIngress6MapFd();
if (rv < 0) {
ALOGE("getClatIngress6MapFd() failure: %s", strerror(-rv));
mClatEgress4Map.reset(-1);
return -rv;
}
mClatIngress6Map.reset(rv);
return 0;
}
void maybeStartBpf(const ClatdTracker& tracker) {
auto isEthernet = android::net::isEthernet(tracker.iface);
if (!isEthernet.ok()) {
ALOGE("isEthernet(%s[%d]) failure: %s", tracker.iface, tracker.ifIndex,
isEthernet.error().message().c_str());
return;
}
// This program will be attached to the v4-* interface which is a TUN and thus always rawip.
int rv = getClatEgress4ProgFd(RAWIP);
if (rv < 0) {
ALOGE("getClatEgress4ProgFd(RAWIP) failure: %s", strerror(-rv));
return;
}
unique_fd txRawIpProgFd(rv);
rv = getClatIngress6ProgFd(isEthernet.value());
if (rv < 0) {
ALOGE("getClatIngress6ProgFd(%d) failure: %s", isEthernet.value(), strerror(-rv));
return;
}
unique_fd rxProgFd(rv);
ClatEgress4Key txKey = {
.iif = tracker.v4ifIndex,
.local4 = tracker.v4,
};
ClatEgress4Value txValue = {
.oif = tracker.ifIndex,
.local6 = tracker.v6,
.pfx96 = tracker.pfx96,
.oifIsEthernet = isEthernet.value(),
};
auto ret = mClatEgress4Map.writeValue(txKey, txValue, BPF_ANY);
if (!ret.ok()) {
ALOGE("mClatEgress4Map.writeValue failure: %s", strerror(ret.error().code()));
return;
}
ClatIngress6Key rxKey = {
.iif = tracker.ifIndex,
.pfx96 = tracker.pfx96,
.local6 = tracker.v6,
};
ClatIngress6Value rxValue = {
// TODO: move all the clat code to eBPF and remove the tun interface entirely.
.oif = tracker.v4ifIndex,
.local4 = tracker.v4,
};
ret = mClatIngress6Map.writeValue(rxKey, rxValue, BPF_ANY);
if (!ret.ok()) {
ALOGE("mClatIngress6Map.writeValue failure: %s", strerror(ret.error().code()));
ret = mClatEgress4Map.deleteValue(txKey);
if (!ret.ok())
ALOGE("mClatEgress4Map.deleteValue failure: %s", strerror(ret.error().code()));
return;
}
// We do tc setup *after* populating the maps, so scanning through them
// can always be used to tell us what needs cleanup.
// Usually the clsact will be added in RouteController::addInterfaceToPhysicalNetwork.
// But clat is started before the v4- interface is added to the network. The clat startup have
// to add clsact of v4- tun interface first for adding bpf filter in maybeStartBpf.
// TODO: move "qdisc add clsact" of v4- tun interface out from ClatdController.
rv = tcQdiscAddDevClsact(tracker.v4ifIndex);
if (rv) {
ALOGE("tcQdiscAddDevClsact(%d[%s]) failure: %s", tracker.v4ifIndex, tracker.v4iface,
strerror(-rv));
ret = mClatEgress4Map.deleteValue(txKey);
if (!ret.ok())
ALOGE("mClatEgress4Map.deleteValue failure: %s", strerror(ret.error().code()));
ret = mClatIngress6Map.deleteValue(rxKey);
if (!ret.ok())
ALOGE("mClatIngress6Map.deleteValue failure: %s", strerror(ret.error().code()));
return;
}
rv = tcFilterAddDevEgressClatIpv4(tracker.v4ifIndex, txRawIpProgFd, RAWIP);
if (rv) {
ALOGE("tcFilterAddDevEgressClatIpv4(%d[%s], RAWIP) failure: %s", tracker.v4ifIndex,
tracker.v4iface, strerror(-rv));
// The v4- interface clsact is not deleted for unwinding error because once it is created
// with interface addition, the lifetime is till interface deletion. Moreover, the clsact
// has no clat filter now. It should not break anything.
ret = mClatEgress4Map.deleteValue(txKey);
if (!ret.ok())
ALOGE("mClatEgress4Map.deleteValue failure: %s", strerror(ret.error().code()));
ret = mClatIngress6Map.deleteValue(rxKey);
if (!ret.ok())
ALOGE("mClatIngress6Map.deleteValue failure: %s", strerror(ret.error().code()));
return;
}
rv = tcFilterAddDevIngressClatIpv6(tracker.ifIndex, rxProgFd, isEthernet.value());
if (rv) {
ALOGE("tcFilterAddDevIngressClatIpv6(%d[%s], %d) failure: %s", tracker.ifIndex,
tracker.iface, isEthernet.value(), strerror(-rv));
rv = tcFilterDelDevEgressClatIpv4(tracker.v4ifIndex);
if (rv) {
ALOGE("tcFilterDelDevEgressClatIpv4(%d[%s]) failure: %s", tracker.v4ifIndex,
tracker.v4iface, strerror(-rv));
}
// The v4- interface clsact is not deleted. See the reason in the error unwinding code of
// the egress filter attaching of v4- tun interface.
ret = mClatEgress4Map.deleteValue(txKey);
if (!ret.ok())
ALOGE("mClatEgress4Map.deleteValue failure: %s", strerror(ret.error().code()));
ret = mClatIngress6Map.deleteValue(rxKey);
if (!ret.ok())
ALOGE("mClatIngress6Map.deleteValue failure: %s", strerror(ret.error().code()));
return;
}
// success
}
void maybeStopBpf(const ClatdTracker& tracker) {
int rv = tcFilterDelDevIngressClatIpv6(tracker.ifIndex);
if (rv < 0) {
ALOGE("tcFilterDelDevIngressClatIpv6(%d[%s]) failure: %s", tracker.ifIndex, tracker.iface,
strerror(-rv));
}
rv = tcFilterDelDevEgressClatIpv4(tracker.v4ifIndex);
if (rv < 0) {
ALOGE("tcFilterDelDevEgressClatIpv4(%d[%s]) failure: %s", tracker.v4ifIndex,
tracker.v4iface, strerror(-rv));
}
// We cleanup the maps last, so scanning through them can be used to
// determine what still needs cleanup.
ClatEgress4Key txKey = {
.iif = tracker.v4ifIndex,
.local4 = tracker.v4,
};
auto ret = mClatEgress4Map.deleteValue(txKey);
if (!ret.ok()) ALOGE("mClatEgress4Map.deleteValue failure: %s", strerror(ret.error().code()));
ClatIngress6Key rxKey = {
.iif = tracker.ifIndex,
.pfx96 = tracker.pfx96,
.local6 = tracker.v6,
};
ret = mClatIngress6Map.deleteValue(rxKey);
if (!ret.ok()) ALOGE("mClatIngress6Map.deleteValue failure: %s", strerror(ret.error().code()));
}
} // namespace clat
} // namespace net
} // namespace android

View File

@@ -0,0 +1,117 @@
/*
* Copyright (C) 2019 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <android-base/result.h>
#include <errno.h>
#include <linux/if_ether.h>
#include <linux/if_link.h>
#include <linux/rtnetlink.h>
#include <string>
#include "bpf/BpfUtils.h"
#include "bpf_shared.h"
namespace android {
namespace net {
// For better code clarity - do not change values - used for booleans like
// with_ethernet_header or isEthernet.
constexpr bool RAWIP = false;
constexpr bool ETHER = true;
// For better code clarity when used for 'bool ingress' parameter.
constexpr bool EGRESS = false;
constexpr bool INGRESS = true;
// The priority of clat hook - must be after tethering.
constexpr uint16_t PRIO_CLAT = 4;
// this returns an ARPHRD_* constant or a -errno
int hardwareAddressType(const std::string& interface);
// return MTU or -errno
int deviceMTU(const std::string& interface);
base::Result<bool> isEthernet(const std::string& interface);
inline int getClatEgress4MapFd(void) {
const int fd = bpf::mapRetrieveRW(CLAT_EGRESS4_MAP_PATH);
return (fd == -1) ? -errno : fd;
}
inline int getClatEgress4ProgFd(bool with_ethernet_header) {
const int fd = bpf::retrieveProgram(with_ethernet_header ? CLAT_EGRESS4_PROG_ETHER_PATH
: CLAT_EGRESS4_PROG_RAWIP_PATH);
return (fd == -1) ? -errno : fd;
}
inline int getClatIngress6MapFd(void) {
const int fd = bpf::mapRetrieveRW(CLAT_INGRESS6_MAP_PATH);
return (fd == -1) ? -errno : fd;
}
inline int getClatIngress6ProgFd(bool with_ethernet_header) {
const int fd = bpf::retrieveProgram(with_ethernet_header ? CLAT_INGRESS6_PROG_ETHER_PATH
: CLAT_INGRESS6_PROG_RAWIP_PATH);
return (fd == -1) ? -errno : fd;
}
int doTcQdiscClsact(int ifIndex, uint16_t nlMsgType, uint16_t nlMsgFlags);
inline int tcQdiscAddDevClsact(int ifIndex) {
return doTcQdiscClsact(ifIndex, RTM_NEWQDISC, NLM_F_EXCL | NLM_F_CREATE);
}
inline int tcQdiscReplaceDevClsact(int ifIndex) {
return doTcQdiscClsact(ifIndex, RTM_NEWQDISC, NLM_F_CREATE | NLM_F_REPLACE);
}
inline int tcQdiscDelDevClsact(int ifIndex) {
return doTcQdiscClsact(ifIndex, RTM_DELQDISC, 0);
}
// tc filter add dev .. in/egress prio 4 protocol ipv6/ip bpf object-pinned /sys/fs/bpf/...
// direct-action
int tcFilterAddDevBpf(int ifIndex, bool ingress, uint16_t proto, int bpfFd, bool ethernet);
// tc filter add dev .. ingress prio 4 protocol ipv6 bpf object-pinned /sys/fs/bpf/... direct-action
inline int tcFilterAddDevIngressClatIpv6(int ifIndex, int bpfFd, bool ethernet) {
return tcFilterAddDevBpf(ifIndex, INGRESS, ETH_P_IPV6, bpfFd, ethernet);
}
// tc filter add dev .. egress prio 4 protocol ip bpf object-pinned /sys/fs/bpf/... direct-action
inline int tcFilterAddDevEgressClatIpv4(int ifIndex, int bpfFd, bool ethernet) {
return tcFilterAddDevBpf(ifIndex, EGRESS, ETH_P_IP, bpfFd, ethernet);
}
// tc filter del dev .. in/egress prio .. protocol ..
int tcFilterDelDev(int ifIndex, bool ingress, uint16_t prio, uint16_t proto);
// tc filter del dev .. ingress prio 4 protocol ipv6
inline int tcFilterDelDevIngressClatIpv6(int ifIndex) {
return tcFilterDelDev(ifIndex, INGRESS, PRIO_CLAT, ETH_P_IPV6);
}
// tc filter del dev .. egress prio 4 protocol ip
inline int tcFilterDelDevEgressClatIpv4(int ifIndex) {
return tcFilterDelDev(ifIndex, EGRESS, PRIO_CLAT, ETH_P_IP);
}
} // namespace net
} // namespace android

View File

@@ -0,0 +1,40 @@
// Copyright (C) 2021 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <arpa/inet.h>
#include <linux/if.h>
namespace android {
namespace net {
namespace clat {
struct ClatdTracker {
unsigned ifIndex;
char iface[IFNAMSIZ];
unsigned v4ifIndex;
char v4iface[IFNAMSIZ];
in_addr v4;
in6_addr v6;
in6_addr pfx96;
};
int initMaps(void);
void maybeStartBpf(const ClatdTracker& tracker);
void maybeStopBpf(const ClatdTracker& tracker);
} // namespace clat
} // namespace net
} // namespace android

View File

@@ -74,6 +74,12 @@ public class ClatCoordinator {
private final Dependencies mDeps;
@Nullable
private String mIface = null;
@Nullable
private String mNat64Prefix = null;
@Nullable
private String mXlatLocalAddress4 = null;
@Nullable
private String mXlatLocalAddress6 = null;
private int mPid = INVALID_PID;
@VisibleForTesting
@@ -162,6 +168,23 @@ public class ClatCoordinator {
throws IOException {
native_configurePacketSocket(sock, v6, ifindex);
}
/**
* Maybe start bpf.
*/
public int maybeStartBpf(@NonNull FileDescriptor tunfd, @NonNull FileDescriptor readsock6,
@NonNull FileDescriptor writesock6, @NonNull String iface, @NonNull String pfx96,
@NonNull String v4, @NonNull String v6) throws IOException {
return native_maybeStartBpf(tunfd, readsock6, writesock6, iface, pfx96, v4, v6);
}
/**
* Maybe stop bpf.
*/
public void maybeStopBpf(String iface, String pfx96, String v4, String v6, int pid)
throws IOException {
native_maybeStopBpf(iface, pfx96, v4, v6, pid);
}
}
@VisibleForTesting
@@ -304,10 +327,38 @@ public class ClatCoordinator {
throw new IOException("configure packet socket failed: " + e);
}
// [5] Maybe start bpf.
try {
mDeps.maybeStartBpf(tunFd.getFileDescriptor(), readSock6.getFileDescriptor(),
writeSock6.getFileDescriptor(), iface, pfx96, v4, v6);
mIface = iface;
mNat64Prefix = pfx96;
mXlatLocalAddress4 = v4;
mXlatLocalAddress6 = v6;
} catch (IOException e) {
throw new IOException("Error start bpf on " + iface + ": " + e);
}
// TODO: start clatd and returns local xlat464 v6 address.
return null;
}
/**
* Stop clatd
*/
public void clatStop() throws IOException {
mDeps.maybeStopBpf(mIface, mNat64Prefix, mXlatLocalAddress4, mXlatLocalAddress6,
mPid /* unused */);
// TODO: remove setIptablesDropRule
Log.i(TAG, "clatd on " + mIface + " stopped");
mIface = null;
mNat64Prefix = null;
mXlatLocalAddress4 = null;
mXlatLocalAddress6 = null;
}
private static native String native_selectIpv4Address(String v4addr, int prefixlen)
throws IOException;
private static native String native_generateIpv6Address(String iface, String v4,
@@ -321,4 +372,9 @@ public class ClatCoordinator {
int ifindex) throws IOException;
private static native void native_configurePacketSocket(FileDescriptor sock, String v6,
int ifindex) throws IOException;
private static native int native_maybeStartBpf(FileDescriptor tunfd, FileDescriptor readsock6,
FileDescriptor writesock6, String iface, String pfx96, String v4, String v6)
throws IOException;
private static native void native_maybeStopBpf(String iface, String pfx96, String v4,
String v6, int pid) throws IOException;
}