Test: TreeHugger Signed-off-by: Maciej Żenczykowski <maze@google.com> Change-Id: I18ccf19024f0617778799f548c8707a518eefadc git-subtree-dir: clatd git-subtree-mainline:655d0850acgit-subtree-split:2007830075
308 lines
11 KiB
C
308 lines
11 KiB
C
/*
|
|
* Copyright 2012 Daniel Drown
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*
|
|
* clatd.c - tun interface setup and main event loop
|
|
*/
|
|
#include <arpa/inet.h>
|
|
#include <errno.h>
|
|
#include <fcntl.h>
|
|
#include <poll.h>
|
|
#include <signal.h>
|
|
#include <stdbool.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <sys/ioctl.h>
|
|
#include <sys/prctl.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/types.h>
|
|
#include <time.h>
|
|
#include <unistd.h>
|
|
|
|
#include <linux/filter.h>
|
|
#include <linux/if.h>
|
|
#include <linux/if_ether.h>
|
|
#include <linux/if_packet.h>
|
|
#include <linux/if_tun.h>
|
|
#include <linux/virtio_net.h>
|
|
#include <net/if.h>
|
|
#include <sys/uio.h>
|
|
|
|
#include "clatd.h"
|
|
#include "checksum.h"
|
|
#include "config.h"
|
|
#include "dump.h"
|
|
#include "logging.h"
|
|
#include "translate.h"
|
|
|
|
struct clat_config Global_Clatd_Config;
|
|
|
|
volatile sig_atomic_t running = 1;
|
|
|
|
// reads IPv6 packet from AF_PACKET socket, translates to IPv4, writes to tun
|
|
void process_packet_6_to_4(struct tun_data *tunnel) {
|
|
// ethernet header is 14 bytes, plus 4 for a normal VLAN tag or 8 for Q-in-Q
|
|
// we don't really support vlans (or especially Q-in-Q)...
|
|
// but a few bytes of extra buffer space doesn't hurt...
|
|
struct {
|
|
struct virtio_net_hdr vnet;
|
|
uint8_t payload[22 + MAXMTU];
|
|
char pad; // +1 to make packet truncation obvious
|
|
} buf;
|
|
struct iovec iov = {
|
|
.iov_base = &buf,
|
|
.iov_len = sizeof(buf),
|
|
};
|
|
char cmsg_buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
|
|
struct msghdr msgh = {
|
|
.msg_iov = &iov,
|
|
.msg_iovlen = 1,
|
|
.msg_control = cmsg_buf,
|
|
.msg_controllen = sizeof(cmsg_buf),
|
|
};
|
|
ssize_t readlen = recvmsg(tunnel->read_fd6, &msgh, /*flags*/ 0);
|
|
|
|
if (readlen < 0) {
|
|
if (errno != EAGAIN) {
|
|
logmsg(ANDROID_LOG_WARN, "%s: read error: %s", __func__, strerror(errno));
|
|
}
|
|
return;
|
|
} else if (readlen == 0) {
|
|
logmsg(ANDROID_LOG_WARN, "%s: packet socket removed?", __func__);
|
|
running = 0;
|
|
return;
|
|
} else if (readlen >= sizeof(buf)) {
|
|
logmsg(ANDROID_LOG_WARN, "%s: read truncation - ignoring pkt", __func__);
|
|
return;
|
|
}
|
|
|
|
bool ok = false;
|
|
__u32 tp_status = 0;
|
|
__u16 tp_net = 0;
|
|
|
|
for (struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL; cmsg = CMSG_NXTHDR(&msgh,cmsg)) {
|
|
if (cmsg->cmsg_level == SOL_PACKET && cmsg->cmsg_type == PACKET_AUXDATA) {
|
|
struct tpacket_auxdata *aux = (struct tpacket_auxdata *)CMSG_DATA(cmsg);
|
|
ok = true;
|
|
tp_status = aux->tp_status;
|
|
tp_net = aux->tp_net;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!ok) {
|
|
// theoretically this should not happen...
|
|
static bool logged = false;
|
|
if (!logged) {
|
|
logmsg(ANDROID_LOG_ERROR, "%s: failed to fetch tpacket_auxdata cmsg", __func__);
|
|
logged = true;
|
|
}
|
|
}
|
|
|
|
const int payload_offset = offsetof(typeof(buf), payload);
|
|
if (readlen < payload_offset + tp_net) {
|
|
logmsg(ANDROID_LOG_WARN, "%s: ignoring %zd byte pkt shorter than %d+%u L2 header",
|
|
__func__, readlen, payload_offset, tp_net);
|
|
return;
|
|
}
|
|
|
|
const int pkt_len = readlen - payload_offset;
|
|
|
|
// This will detect a skb->ip_summed == CHECKSUM_PARTIAL packet with non-final L4 checksum
|
|
if (tp_status & TP_STATUS_CSUMNOTREADY) {
|
|
static bool logged = false;
|
|
if (!logged) {
|
|
logmsg(ANDROID_LOG_WARN, "%s: L4 checksum calculation required", __func__);
|
|
logged = true;
|
|
}
|
|
|
|
// These are non-negative by virtue of csum_start/offset being u16
|
|
const int cs_start = buf.vnet.csum_start;
|
|
const int cs_offset = cs_start + buf.vnet.csum_offset;
|
|
if (cs_start > pkt_len) {
|
|
logmsg(ANDROID_LOG_ERROR, "%s: out of range - checksum start %d > %d",
|
|
__func__, cs_start, pkt_len);
|
|
} else if (cs_offset + 1 >= pkt_len) {
|
|
logmsg(ANDROID_LOG_ERROR, "%s: out of range - checksum offset %d + 1 >= %d",
|
|
__func__, cs_offset, pkt_len);
|
|
} else {
|
|
uint16_t csum = ip_checksum(buf.payload + cs_start, pkt_len - cs_start);
|
|
if (!csum) csum = 0xFFFF; // required fixup for UDP, TCP must live with it
|
|
buf.payload[cs_offset] = csum & 0xFF;
|
|
buf.payload[cs_offset + 1] = csum >> 8;
|
|
}
|
|
}
|
|
|
|
translate_packet(tunnel->fd4, 0 /* to_ipv6 */, buf.payload + tp_net, pkt_len - tp_net);
|
|
}
|
|
|
|
// reads TUN_PI + L3 IPv4 packet from tun, translates to IPv6, writes to AF_INET6/RAW socket
|
|
void process_packet_4_to_6(struct tun_data *tunnel) {
|
|
struct {
|
|
struct tun_pi pi;
|
|
uint8_t payload[MAXMTU];
|
|
char pad; // +1 byte to make packet truncation obvious
|
|
} buf;
|
|
ssize_t readlen = read(tunnel->fd4, &buf, sizeof(buf));
|
|
|
|
if (readlen < 0) {
|
|
if (errno != EAGAIN) {
|
|
logmsg(ANDROID_LOG_WARN, "%s: read error: %s", __func__, strerror(errno));
|
|
}
|
|
return;
|
|
} else if (readlen == 0) {
|
|
logmsg(ANDROID_LOG_WARN, "%s: tun interface removed", __func__);
|
|
running = 0;
|
|
return;
|
|
} else if (readlen >= sizeof(buf)) {
|
|
logmsg(ANDROID_LOG_WARN, "%s: read truncation - ignoring pkt", __func__);
|
|
return;
|
|
}
|
|
|
|
const int payload_offset = offsetof(typeof(buf), payload);
|
|
|
|
if (readlen < payload_offset) {
|
|
logmsg(ANDROID_LOG_WARN, "%s: short read: got %ld bytes", __func__, readlen);
|
|
return;
|
|
}
|
|
|
|
const int pkt_len = readlen - payload_offset;
|
|
|
|
uint16_t proto = ntohs(buf.pi.proto);
|
|
if (proto != ETH_P_IP) {
|
|
logmsg(ANDROID_LOG_WARN, "%s: unknown packet type = 0x%x", __func__, proto);
|
|
return;
|
|
}
|
|
|
|
if (buf.pi.flags != 0) {
|
|
logmsg(ANDROID_LOG_WARN, "%s: unexpected flags = %d", __func__, buf.pi.flags);
|
|
}
|
|
|
|
translate_packet(tunnel->write_fd6, 1 /* to_ipv6 */, buf.payload, pkt_len);
|
|
}
|
|
|
|
// IPv6 DAD packet format:
|
|
// Ethernet header (if needed) will be added by the kernel:
|
|
// u8[6] src_mac; u8[6] dst_mac '33:33:ff:XX:XX:XX'; be16 ethertype '0x86DD'
|
|
// IPv6 header:
|
|
// be32 0x60000000 - ipv6, tclass 0, flowlabel 0
|
|
// be16 payload_length '32'; u8 nxt_hdr ICMPv6 '58'; u8 hop limit '255'
|
|
// u128 src_ip6 '::'
|
|
// u128 dst_ip6 'ff02::1:ffXX:XXXX'
|
|
// ICMPv6 header:
|
|
// u8 type '135'; u8 code '0'; u16 icmp6 checksum; u32 reserved '0'
|
|
// ICMPv6 neighbour solicitation payload:
|
|
// u128 tgt_ip6
|
|
// ICMPv6 ND options:
|
|
// u8 opt nr '14'; u8 length '1'; u8[6] nonce '6 random bytes'
|
|
void send_dad(int fd, const struct in6_addr* tgt) {
|
|
struct {
|
|
struct ip6_hdr ip6h;
|
|
struct nd_neighbor_solicit ns;
|
|
uint8_t ns_opt_nr;
|
|
uint8_t ns_opt_len;
|
|
uint8_t ns_opt_nonce[6];
|
|
} dad_pkt = {
|
|
.ip6h = {
|
|
.ip6_flow = htonl(6 << 28), // v6, 0 tclass, 0 flowlabel
|
|
.ip6_plen = htons(sizeof(dad_pkt) - sizeof(struct ip6_hdr)), // payload length, ie. 32
|
|
.ip6_nxt = IPPROTO_ICMPV6, // 58
|
|
.ip6_hlim = 255,
|
|
.ip6_src = {}, // ::
|
|
.ip6_dst.s6_addr = {
|
|
0xFF, 0x02, 0, 0,
|
|
0, 0, 0, 0,
|
|
0, 0, 0, 1,
|
|
0xFF, tgt->s6_addr[13], tgt->s6_addr[14], tgt->s6_addr[15],
|
|
}, // ff02::1:ffXX:XXXX - multicast group address derived from bottom 24-bits of tgt
|
|
},
|
|
.ns = {
|
|
.nd_ns_type = ND_NEIGHBOR_SOLICIT, // 135
|
|
.nd_ns_code = 0,
|
|
.nd_ns_cksum = 0, // will be calculated later
|
|
.nd_ns_reserved = 0,
|
|
.nd_ns_target = *tgt,
|
|
},
|
|
.ns_opt_nr = 14, // icmp6 option 'nonce' from RFC3971
|
|
.ns_opt_len = 1, // in units of 8 bytes, including option nr and len
|
|
.ns_opt_nonce = {}, // opt_len *8 - sizeof u8(opt_nr) - sizeof u8(opt_len) = 6 ranodmized bytes
|
|
};
|
|
arc4random_buf(&dad_pkt.ns_opt_nonce, sizeof(dad_pkt.ns_opt_nonce));
|
|
|
|
// 40 byte IPv6 header + 8 byte ICMPv6 header + 16 byte ipv6 target address + 8 byte nonce option
|
|
_Static_assert(sizeof(dad_pkt) == 40 + 8 + 16 + 8, "sizeof dad packet != 72");
|
|
|
|
// IPv6 header checksum is standard negated 16-bit one's complement sum over the icmpv6 pseudo
|
|
// header (which includes payload length, nextheader, and src/dst ip) and the icmpv6 payload.
|
|
//
|
|
// Src/dst ip immediately prefix the icmpv6 header itself, so can be handled along
|
|
// with the payload. We thus only need to manually account for payload len & next header.
|
|
//
|
|
// The magic '8' is simply the offset of the ip6_src field in the ipv6 header,
|
|
// ie. we're skipping over the ipv6 version, tclass, flowlabel, payload length, next header
|
|
// and hop limit fields, because they're not quite where we want them to be.
|
|
//
|
|
// ip6_plen is already in network order, while ip6_nxt is a single byte and thus needs htons().
|
|
uint32_t csum = dad_pkt.ip6h.ip6_plen + htons(dad_pkt.ip6h.ip6_nxt);
|
|
csum = ip_checksum_add(csum, &dad_pkt.ip6h.ip6_src, sizeof(dad_pkt) - 8);
|
|
dad_pkt.ns.nd_ns_cksum = ip_checksum_finish(csum);
|
|
|
|
const struct sockaddr_in6 dst = {
|
|
.sin6_family = AF_INET6,
|
|
.sin6_addr = dad_pkt.ip6h.ip6_dst,
|
|
.sin6_scope_id = if_nametoindex(Global_Clatd_Config.native_ipv6_interface),
|
|
};
|
|
|
|
sendto(fd, &dad_pkt, sizeof(dad_pkt), 0 /*flags*/, (const struct sockaddr *)&dst, sizeof(dst));
|
|
}
|
|
|
|
/* function: event_loop
|
|
* reads packets from the tun network interface and passes them down the stack
|
|
* tunnel - tun device data
|
|
*/
|
|
void event_loop(struct tun_data *tunnel) {
|
|
// Apparently some network gear will refuse to perform NS for IPs that aren't DAD'ed,
|
|
// this would then result in an ipv6-only network with working native ipv6, working
|
|
// IPv4 via DNS64, but non-functioning IPv4 via CLAT (ie. IPv4 literals + IPv4 only apps).
|
|
// The kernel itself doesn't do DAD for anycast ips (but does handle IPV6 MLD and handle ND).
|
|
// So we'll spoof dad here, and yeah, we really should check for a response and in
|
|
// case of failure pick a different IP. Seeing as 48-bits of the IP are utterly random
|
|
// (with the other 16 chosen to guarantee checksum neutrality) this seems like a remote
|
|
// concern...
|
|
// TODO: actually perform true DAD
|
|
send_dad(tunnel->write_fd6, &Global_Clatd_Config.ipv6_local_subnet);
|
|
|
|
struct pollfd wait_fd[] = {
|
|
{ tunnel->read_fd6, POLLIN, 0 },
|
|
{ tunnel->fd4, POLLIN, 0 },
|
|
};
|
|
|
|
while (running) {
|
|
if (poll(wait_fd, ARRAY_SIZE(wait_fd), -1) == -1) {
|
|
if (errno != EINTR) {
|
|
logmsg(ANDROID_LOG_WARN, "event_loop/poll returned an error: %s", strerror(errno));
|
|
}
|
|
} else {
|
|
// Call process_packet if the socket has data to be read, but also if an
|
|
// error is waiting. If we don't call read() after getting POLLERR, a
|
|
// subsequent poll() will return immediately with POLLERR again,
|
|
// causing this code to spin in a loop. Calling read() will clear the
|
|
// socket error flag instead.
|
|
if (wait_fd[0].revents) process_packet_6_to_4(tunnel);
|
|
if (wait_fd[1].revents) process_packet_4_to_6(tunnel);
|
|
}
|
|
}
|
|
}
|