ClatUtils.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
  1. /*
  2. * Copyright (C) 2019 The Android Open Source Project
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "ClatUtils.h"
  17. #include <arpa/inet.h>
  18. #include <errno.h>
  19. #include <linux/if.h>
  20. #include <linux/netlink.h>
  21. #include <linux/pkt_cls.h>
  22. #include <linux/pkt_sched.h>
  23. #include <linux/rtnetlink.h>
  24. #include <sys/ioctl.h>
  25. #include <sys/socket.h>
  26. #include <sys/types.h>
  27. #include <unistd.h>
  28. #define LOG_TAG "ClatUtils"
  29. #include <log/log.h>
  30. #include "NetlinkCommands.h"
  31. #include "android-base/unique_fd.h"
  32. #include "bpf/BpfUtils.h"
  33. #include "netdbpf/bpf_shared.h"
  34. namespace android {
  35. namespace net {
  36. int hardwareAddressType(const std::string& interface) {
  37. base::unique_fd ufd(socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0));
  38. if (ufd < 0) {
  39. const int err = errno;
  40. ALOGE("socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0)");
  41. return -err;
  42. };
  43. struct ifreq ifr = {};
  44. // We use strncpy() instead of strlcpy() since kernel has to be able
  45. // to handle non-zero terminated junk passed in by userspace anyway,
  46. // and this way too long interface names (more than IFNAMSIZ-1 = 15
  47. // characters plus terminating NULL) will not get truncated to 15
  48. // characters and zero-terminated and thus potentially erroneously
  49. // match a truncated interface if one were to exist.
  50. strncpy(ifr.ifr_name, interface.c_str(), sizeof(ifr.ifr_name));
  51. if (ioctl(ufd, SIOCGIFHWADDR, &ifr, sizeof(ifr))) return -errno;
  52. return ifr.ifr_hwaddr.sa_family;
  53. }
  54. int getClatIngressMapFd(void) {
  55. const int fd = bpf::bpfFdGet(CLAT_INGRESS_MAP_PATH, 0);
  56. return (fd == -1) ? -errno : fd;
  57. }
  58. int getClatIngressProgFd(bool with_ethernet_header) {
  59. const int fd = bpf::bpfFdGet(
  60. with_ethernet_header ? CLAT_INGRESS_PROG_ETHER_PATH : CLAT_INGRESS_PROG_RAWIP_PATH, 0);
  61. return (fd == -1) ? -errno : fd;
  62. }
  63. // TODO: use //system/netd/server/NetlinkCommands.cpp:openNetlinkSocket(protocol)
  64. int openNetlinkSocket(void) {
  65. base::unique_fd fd(socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE));
  66. if (fd == -1) {
  67. const int err = errno;
  68. ALOGE("socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE)");
  69. return -err;
  70. }
  71. int rv;
  72. const int on = 1;
  73. rv = setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, &on, sizeof(on));
  74. if (rv) ALOGE("setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, %d)", on);
  75. // this is needed to get sane strace netlink parsing, it allocates the pid
  76. rv = bind(fd, (const struct sockaddr*)&KERNEL_NLADDR, sizeof(KERNEL_NLADDR));
  77. if (rv) {
  78. const int err = errno;
  79. ALOGE("bind(fd, {AF_NETLINK, 0, 0})");
  80. return -err;
  81. }
  82. // we do not want to receive messages from anyone besides the kernel
  83. rv = connect(fd, (const struct sockaddr*)&KERNEL_NLADDR, sizeof(KERNEL_NLADDR));
  84. if (rv) {
  85. const int err = errno;
  86. ALOGE("connect(fd, {AF_NETLINK, 0, 0})");
  87. return -err;
  88. }
  89. return fd.release();
  90. }
  91. // TODO: merge with //system/netd/server/SockDiag.cpp:checkError(fd)
  92. int processNetlinkResponse(int fd) {
  93. struct {
  94. nlmsghdr h;
  95. nlmsgerr e;
  96. char buf[256];
  97. } resp = {};
  98. const int rv = recv(fd, &resp, sizeof(resp), MSG_TRUNC);
  99. if (rv == -1) {
  100. const int err = errno;
  101. ALOGE("recv() failed");
  102. return -err;
  103. }
  104. if (rv < (int)NLMSG_SPACE(sizeof(struct nlmsgerr))) {
  105. ALOGE("recv() returned short packet: %d", rv);
  106. return -EMSGSIZE;
  107. }
  108. if (resp.h.nlmsg_len != (unsigned)rv) {
  109. ALOGE("recv() returned invalid header length: %d != %d", resp.h.nlmsg_len, rv);
  110. return -EBADMSG;
  111. }
  112. if (resp.h.nlmsg_type != NLMSG_ERROR) {
  113. ALOGE("recv() did not return NLMSG_ERROR message: %d", resp.h.nlmsg_type);
  114. return -EBADMSG;
  115. }
  116. return resp.e.error; // returns 0 on success
  117. }
  118. // ADD: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_EXCL|NLM_F_CREATE
  119. // REPLACE: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_CREATE|NLM_F_REPLACE
  120. // DEL: nlMsgType=RTM_DELQDISC nlMsgFlags=0
  121. int doTcQdiscClsact(int fd, int ifIndex, __u16 nlMsgType, __u16 nlMsgFlags) {
  122. // This is the name of the qdisc we are attaching.
  123. // Some hoop jumping to make this compile time constant with known size,
  124. // so that the structure declaration is well defined at compile time.
  125. #define CLSACT "clsact"
  126. static const char clsact[] = CLSACT;
  127. // sizeof() includes the terminating NULL
  128. #define ASCIIZ_LEN_CLSACT sizeof(clsact)
  129. const struct {
  130. nlmsghdr n;
  131. tcmsg t;
  132. struct {
  133. nlattr attr;
  134. char str[NLMSG_ALIGN(ASCIIZ_LEN_CLSACT)];
  135. } kind;
  136. } req = {
  137. .n =
  138. {
  139. .nlmsg_len = sizeof(req),
  140. .nlmsg_type = nlMsgType,
  141. .nlmsg_flags = static_cast<__u16>(NETLINK_REQUEST_FLAGS | nlMsgFlags),
  142. },
  143. .t =
  144. {
  145. .tcm_family = AF_UNSPEC,
  146. .tcm_ifindex = ifIndex,
  147. .tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0),
  148. .tcm_parent = TC_H_CLSACT,
  149. },
  150. .kind =
  151. {
  152. .attr =
  153. {
  154. .nla_len = NLA_HDRLEN + ASCIIZ_LEN_CLSACT,
  155. .nla_type = TCA_KIND,
  156. },
  157. .str = CLSACT,
  158. },
  159. };
  160. #undef ASCIIZ_LEN_CLSACT
  161. #undef CLSACT
  162. const int rv = send(fd, &req, sizeof(req), 0);
  163. if (rv == -1) return -errno;
  164. if (rv != sizeof(req)) return -EMSGSIZE;
  165. return processNetlinkResponse(fd);
  166. }
  167. int tcQdiscAddDevClsact(int fd, int ifIndex) {
  168. return doTcQdiscClsact(fd, ifIndex, RTM_NEWQDISC, NLM_F_EXCL | NLM_F_CREATE);
  169. }
  170. int tcQdiscReplaceDevClsact(int fd, int ifIndex) {
  171. return doTcQdiscClsact(fd, ifIndex, RTM_NEWQDISC, NLM_F_CREATE | NLM_F_REPLACE);
  172. }
  173. int tcQdiscDelDevClsact(int fd, int ifIndex) {
  174. return doTcQdiscClsact(fd, ifIndex, RTM_DELQDISC, 0);
  175. }
  176. // tc filter add dev .. ingress prio 1 protocol ipv6 bpf object-pinned /sys/fs/bpf/... direct-action
  177. int tcFilterAddDevBpf(int fd, int ifIndex, int bpfFd, bool ethernet) {
  178. // The priority doesn't matter until we actually start attaching multiple
  179. // things to the same interface's ingress point.
  180. const int prio = 1;
  181. // This is the name of the filter we're attaching (ie. this is the 'bpf'
  182. // packet classifier enabled by kernel config option CONFIG_NET_CLS_BPF.
  183. //
  184. // We go through some hoops in order to make this compile time constants
  185. // so that we can define the struct further down the function with the
  186. // field for this sized correctly already during the build.
  187. #define BPF "bpf"
  188. const char bpf[] = BPF;
  189. // sizeof() includes the terminating NULL
  190. #define ASCIIZ_LEN_BPF sizeof(bpf)
  191. // This is to replicate program name suffix used by 'tc' Linux cli
  192. // when it attaches programs.
  193. #define FSOBJ_SUFFIX ":[*fsobj]"
  194. // This macro expands (from header files) to:
  195. // prog_clatd_schedcls_ingress_clat_rawip:[*fsobj]
  196. // and is the name of the pinned ebpf program for ARPHRD_RAWIP interfaces.
  197. // (also compatible with anything that has 0 size L2 header)
  198. #define NAME_RAWIP CLAT_INGRESS_PROG_RAWIP_NAME FSOBJ_SUFFIX
  199. const char name_rawip[] = NAME_RAWIP;
  200. // This macro expands (from header files) to:
  201. // prog_clatd_schedcls_ingress_clat_ether:[*fsobj]
  202. // and is the name of the pinned ebpf program for ARPHRD_ETHER interfaces.
  203. // (also compatible with anything that has standard ethernet header)
  204. #define NAME_ETHER CLAT_INGRESS_PROG_ETHER_NAME FSOBJ_SUFFIX
  205. const char name_ether[] = NAME_ETHER;
  206. // The actual name we'll use is determined at run time via 'ethernet'
  207. // boolean. We need to compile time allocate enough space in the struct
  208. // hence this macro magic to make sure we have enough space for either
  209. // possibility. In practice both are actually the same size.
  210. #define ASCIIZ_MAXLEN_NAME \
  211. ((sizeof(name_rawip) > sizeof(name_ether)) ? sizeof(name_rawip) : sizeof(name_ether))
  212. // This is not a compile time constant and is used in strcpy below
  213. #define NAME (ethernet ? NAME_ETHER : NAME_RAWIP)
  214. struct {
  215. nlmsghdr n;
  216. tcmsg t;
  217. struct {
  218. nlattr attr;
  219. char str[NLMSG_ALIGN(ASCIIZ_LEN_BPF)];
  220. } kind;
  221. struct {
  222. nlattr attr;
  223. struct {
  224. nlattr attr;
  225. __u32 u32;
  226. } fd;
  227. struct {
  228. nlattr attr;
  229. char str[NLMSG_ALIGN(ASCIIZ_MAXLEN_NAME)];
  230. } name;
  231. struct {
  232. nlattr attr;
  233. __u32 u32;
  234. } flags;
  235. } options;
  236. } req = {
  237. .n =
  238. {
  239. .nlmsg_len = sizeof(req),
  240. .nlmsg_type = RTM_NEWTFILTER,
  241. .nlmsg_flags = NETLINK_REQUEST_FLAGS | NLM_F_EXCL | NLM_F_CREATE,
  242. },
  243. .t =
  244. {
  245. .tcm_family = AF_UNSPEC,
  246. .tcm_ifindex = ifIndex,
  247. .tcm_handle = TC_H_UNSPEC,
  248. .tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS),
  249. .tcm_info = (prio << 16) | htons(ETH_P_IPV6),
  250. },
  251. .kind =
  252. {
  253. .attr =
  254. {
  255. .nla_len = sizeof(req.kind),
  256. .nla_type = TCA_KIND,
  257. },
  258. .str = BPF,
  259. },
  260. .options =
  261. {
  262. .attr =
  263. {
  264. .nla_len = sizeof(req.options),
  265. .nla_type = TCA_OPTIONS,
  266. },
  267. .fd =
  268. {
  269. .attr =
  270. {
  271. .nla_len = sizeof(req.options.fd),
  272. .nla_type = TCA_BPF_FD,
  273. },
  274. .u32 = static_cast<__u32>(bpfFd),
  275. },
  276. .name =
  277. {
  278. .attr =
  279. {
  280. .nla_len = sizeof(req.options.name),
  281. .nla_type = TCA_BPF_NAME,
  282. },
  283. // Visible via 'tc filter show', but
  284. // is overwritten by strcpy below
  285. .str = "placeholder",
  286. },
  287. .flags =
  288. {
  289. .attr =
  290. {
  291. .nla_len = sizeof(req.options.flags),
  292. .nla_type = TCA_BPF_FLAGS,
  293. },
  294. .u32 = TCA_BPF_FLAG_ACT_DIRECT,
  295. },
  296. },
  297. };
  298. strncpy(req.options.name.str, NAME, sizeof(req.options.name.str));
  299. #undef NAME
  300. #undef ASCIIZ_MAXLEN_NAME
  301. #undef NAME_ETHER
  302. #undef NAME_RAWIP
  303. #undef NAME
  304. #undef ASCIIZ_LEN_BPF
  305. #undef BPF
  306. const int rv = send(fd, &req, sizeof(req), 0);
  307. if (rv == -1) return -errno;
  308. if (rv != sizeof(req)) return -EMSGSIZE;
  309. return processNetlinkResponse(fd);
  310. }
  311. } // namespace net
  312. } // namespace android