diff mbox series

[RFC,bpf-next,3/4] bpf: add bpf_trace_btf helper

Message ID 1596724945-22859-4-git-send-email-alan.maguire@oracle.com
State New
Headers show
Series None | expand

Commit Message

Alan Maguire Aug. 6, 2020, 2:42 p.m. UTC
A helper is added to support tracing kernel type information in BPF
using the BPF Type Format (BTF).  Its signature is

long bpf_trace_btf(struct btf_ptr *ptr, u32 btf_ptr_size, u32 trace_id,
                   u64 flags);

struct btf_ptr * specifies

- a pointer to the data to be traced;
- the BTF id of the type of data pointed to; or
- a string representation of the type of data pointed to
- a flags field is provided for future use; these flags
  are not to be confused with the BTF_TRACE_F_* flags
  below that control how the btf_ptr is displayed; the
  flags member of the struct btf_ptr may be used to
  disambiguate types in kernel versus module BTF, etc;
  the main distinction is the flags relate to the type
  and information needed in identifying it; not how it
  is displayed.

The helper also specifies a trace id which is set for the
bpf_trace_printk tracepoint; this allows BPF programs
to filter on specific trace ids, ensuring output does
not become mixed between different traced events and
hard to read.

For example a BPF program with a struct sk_buff *skb
could do the following:

        static const char *skb_type = "struct sk_buff";
        static struct btf_ptr b = { };

        b.ptr = skb;
        b.type = skb_type;
        bpf_trace_btf(&b, sizeof(b), 0, 0);

Default output in the trace_pipe looks like this:

          <idle>-0     [023] d.s.  1825.778400: bpf_trace_printk: (struct sk_buff){
          <idle>-0     [023] d.s.  1825.778409: bpf_trace_printk:  (union){
          <idle>-0     [023] d.s.  1825.778410: bpf_trace_printk:   (struct){
          <idle>-0     [023] d.s.  1825.778412: bpf_trace_printk:    .prev = (struct sk_buff *)0x00000000b2a3df7e,
          <idle>-0     [023] d.s.  1825.778413: bpf_trace_printk:    (union){
          <idle>-0     [023] d.s.  1825.778414: bpf_trace_printk:     .dev = (struct net_device *)0x000000001658808b,
          <idle>-0     [023] d.s.  1825.778416: bpf_trace_printk:     .dev_scratch = (long unsigned int)18446628460391432192,
          <idle>-0     [023] d.s.  1825.778417: bpf_trace_printk:    },
          <idle>-0     [023] d.s.  1825.778417: bpf_trace_printk:   },
          <idle>-0     [023] d.s.  1825.778418: bpf_trace_printk:   .rbnode = (struct rb_node){
          <idle>-0     [023] d.s.  1825.778419: bpf_trace_printk:    .rb_right = (struct rb_node *)0x00000000b2a3df7e,
          <idle>-0     [023] d.s.  1825.778420: bpf_trace_printk:    .rb_left = (struct rb_node *)0x000000001658808b,
          <idle>-0     [023] d.s.  1825.778420: bpf_trace_printk:   },
          <idle>-0     [023] d.s.  1825.778421: bpf_trace_printk:   .list = (struct list_head){
          <idle>-0     [023] d.s.  1825.778422: bpf_trace_printk:    .prev = (struct list_head *)0x00000000b2a3df7e,
          <idle>-0     [023] d.s.  1825.778422: bpf_trace_printk:   },
          <idle>-0     [023] d.s.  1825.778422: bpf_trace_printk:  },
          <idle>-0     [023] d.s.  1825.778426: bpf_trace_printk:  .len = (unsigned int)168,
          <idle>-0     [023] d.s.  1825.778427: bpf_trace_printk:  .mac_len = (__u16)14,
          <idle>-0     [023] d.s.  1825.778428: bpf_trace_printk:  .queue_mapping = (__u16)17,
          <idle>-0     [023] d.s.  1825.778430: bpf_trace_printk:  .head_frag = (__u8)0x1,
          <idle>-0     [023] d.s.  1825.778431: bpf_trace_printk:  .ip_summed = (__u8)0x1,
          <idle>-0     [023] d.s.  1825.778432: bpf_trace_printk:  .l4_hash = (__u8)0x1,
          <idle>-0     [023] d.s.  1825.778433: bpf_trace_printk:  .hash = (__u32)1873247608,
          <idle>-0     [023] d.s.  1825.778434: bpf_trace_printk:  (union){
          <idle>-0     [023] d.s.  1825.778435: bpf_trace_printk:   .napi_id = (unsigned int)8209,
          <idle>-0     [023] d.s.  1825.778436: bpf_trace_printk:   .sender_cpu = (unsigned int)8209,
          <idle>-0     [023] d.s.  1825.778436: bpf_trace_printk:  },
          <idle>-0     [023] d.s.  1825.778437: bpf_trace_printk:  .protocol = (__be16)8,
          <idle>-0     [023] d.s.  1825.778438: bpf_trace_printk:  .transport_header = (__u16)226,
          <idle>-0     [023] d.s.  1825.778439: bpf_trace_printk:  .network_header = (__u16)206,
          <idle>-0     [023] d.s.  1825.778440: bpf_trace_printk:  .mac_header = (__u16)192,
          <idle>-0     [023] d.s.  1825.778440: bpf_trace_printk:  .tail = (sk_buff_data_t)374,
          <idle>-0     [023] d.s.  1825.778441: bpf_trace_printk:  .end = (sk_buff_data_t)1728,
          <idle>-0     [023] d.s.  1825.778442: bpf_trace_printk:  .head = (unsigned char *)0x000000009798cb6b,
          <idle>-0     [023] d.s.  1825.778443: bpf_trace_printk:  .data = (unsigned char *)0x0000000064823282,
          <idle>-0     [023] d.s.  1825.778444: bpf_trace_printk:  .truesize = (unsigned int)2304,
          <idle>-0     [023] d.s.  1825.778445: bpf_trace_printk:  .users = (refcount_t){
          <idle>-0     [023] d.s.  1825.778445: bpf_trace_printk:   .refs = (atomic_t){
          <idle>-0     [023] d.s.  1825.778447: bpf_trace_printk:    .counter = (int)1,
          <idle>-0     [023] d.s.  1825.778447: bpf_trace_printk:   },
          <idle>-0     [023] d.s.  1825.778447: bpf_trace_printk:  },
          <idle>-0     [023] d.s.  1825.778448: bpf_trace_printk: }

Flags modifying display are as follows:

- BTF_TRACE_F_COMPACT:	no formatting around type information
- BTF_TRACE_F_NONAME:	no struct/union member names/types
- BTF_TRACE_F_PTR_RAW:	show raw (unobfuscated) pointer values;
			equivalent to %px.
- BTF_TRACE_F_ZERO:	show zero-valued struct/union members;
			they are not displayed by default

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
---
 include/linux/bpf.h            |   1 +
 include/linux/btf.h            |   9 ++--
 include/uapi/linux/bpf.h       |  63 +++++++++++++++++++++++++
 kernel/bpf/core.c              |   5 ++
 kernel/bpf/helpers.c           |   4 ++
 kernel/trace/bpf_trace.c       | 102 ++++++++++++++++++++++++++++++++++++++++-
 scripts/bpf_helpers_doc.py     |   2 +
 tools/include/uapi/linux/bpf.h |  63 +++++++++++++++++++++++++
 8 files changed, 243 insertions(+), 6 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6143b6e..f67819d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -934,6 +934,7 @@  struct bpf_event_entry {
 const char *kernel_type_name(u32 btf_type_id);
 
 const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
+const struct bpf_func_proto *bpf_get_trace_btf_proto(void);
 
 typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
 					unsigned long off, unsigned long len);
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 46bf9f4..3d31e28 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -6,6 +6,7 @@ 
 
 #include <linux/types.h>
 #include <uapi/linux/btf.h>
+#include <uapi/linux/bpf.h>
 
 #define BTF_TYPE_EMIT(type) ((void)(type *)0)
 
@@ -61,10 +62,10 @@  const struct btf_type *btf_type_id_size(const struct btf *btf,
  *	- BTF_SHOW_UNSAFE: skip use of bpf_probe_read() to safely read
  *	  data before displaying it.
  */
-#define BTF_SHOW_COMPACT	(1ULL << 0)
-#define BTF_SHOW_NONAME		(1ULL << 1)
-#define BTF_SHOW_PTR_RAW	(1ULL << 2)
-#define BTF_SHOW_ZERO		(1ULL << 3)
+#define BTF_SHOW_COMPACT	BTF_TRACE_F_COMPACT
+#define BTF_SHOW_NONAME		BTF_TRACE_F_NONAME
+#define BTF_SHOW_PTR_RAW	BTF_TRACE_F_PTR_RAW
+#define BTF_SHOW_ZERO		BTF_TRACE_F_ZERO
 #define BTF_SHOW_NONEWLINE	(1ULL << 32)
 #define BTF_SHOW_UNSAFE		(1ULL << 33)
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b134e67..726fee4 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3394,6 +3394,36 @@  struct bpf_stack_build_id {
  *		A non-negative value equal to or less than *size* on success,
  *		or a negative error in case of failure.
  *
+ * long bpf_trace_btf(struct btf_ptr *ptr, u32 btf_ptr_size, u32 trace_id, u64 flags)
+ *	Description
+ *		Utilize BTF to trace a representation of *ptr*->ptr, using
+ *		*ptr*->type name or *ptr*->type_id.  *ptr*->type_name
+ *		should specify the type *ptr*->ptr points to. Traversing that
+ *		data structure using BTF, the type information and values are
+ *		bpf_trace_printk()ed.  Safe copy of the pointer data is
+ *		carried out to avoid kernel crashes during data display.
+ *		Tracing specifies *trace_id* as the id associated with the
+ *		trace event; this can be used to filter trace events
+ *		to show a subset of all traced output, helping to avoid
+ *		the situation where BTF output is intermixed with other
+ *		output.
+ *
+ *		*flags* is a combination of
+ *
+ *		**BTF_TRACE_F_COMPACT**
+ *			no formatting around type information
+ *		**BTF_TRACE_F_NONAME**
+ *			no struct/union member names/types
+ *		**BTF_TRACE_F_PTR_RAW**
+ *			show raw (unobfuscated) pointer values;
+ *			equivalent to printk specifier %px.
+ *		**BTF_TRACE_F_ZERO**
+ *			show zero-valued struct/union members; they
+ *			are not displayed by default
+ *
+ *	Return
+ *		The number of bytes traced, or a negative error in cases of
+ *		failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3538,6 +3568,7 @@  struct bpf_stack_build_id {
 	FN(skc_to_tcp_request_sock),	\
 	FN(skc_to_udp6_sock),		\
 	FN(get_task_stack),		\
+	FN(trace_btf),			\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4446,4 +4477,36 @@  struct bpf_sk_lookup {
 	__u32 local_port;	/* Host byte order */
 };
 
+/*
+ * struct btf_ptr is used for typed pointer display; the
+ * additional type string/BTF type id are used to render the pointer
+ * data as the appropriate type via the bpf_trace_btf() helper
+ * above.  A flags field - potentially to specify additional details
+ * about the BTF pointer (rather than its mode of display) - is
+ * present for future use.  Display flags - BTF_TRACE_F_* - are
+ * passed to display functions separately.
+ */
+struct btf_ptr {
+	void *ptr;
+	const char *type;
+	__u32 type_id;
+	__u32 flags;		/* BTF ptr flags; unused at present. */
+};
+
+/*
+ * Flags to control bpf_trace_btf() behaviour.
+ *	- BTF_TRACE_F_COMPACT: no formatting around type information
+ *	- BTF_TRACE_F_NONAME: no struct/union member names/types
+ *	- BTF_TRACE_F_PTR_RAW: show raw (unobfuscated) pointer values;
+ *	  equivalent to %px.
+ *	- BTF_TRACE_F_ZERO: show zero-valued struct/union members; they
+ *	  are not displayed by default
+ */
+enum {
+	BTF_TRACE_F_COMPACT	=	(1ULL << 0),
+	BTF_TRACE_F_NONAME	=	(1ULL << 1),
+	BTF_TRACE_F_PTR_RAW	=	(1ULL << 2),
+	BTF_TRACE_F_ZERO	=	(1ULL << 3),
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index bde9334..82b3a98 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2214,6 +2214,11 @@  const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
 	return NULL;
 }
 
+const struct bpf_func_proto * __weak bpf_get_trace_btf_proto(void)
+{
+	return NULL;
+}
+
 u64 __weak
 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
 		 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index be43ab3..b9a842b 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -661,6 +661,10 @@  static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags,
 		if (!perfmon_capable())
 			return NULL;
 		return bpf_get_trace_printk_proto();
+	case BPF_FUNC_trace_btf:
+		if (!perfmon_capable())
+			return NULL;
+		return bpf_get_trace_btf_proto();
 	case BPF_FUNC_jiffies64:
 		return &bpf_jiffies64_proto;
 	default:
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 6453a75..92212a1 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -14,8 +14,12 @@ 
 #include <linux/spinlock.h>
 #include <linux/syscalls.h>
 #include <linux/error-injection.h>
+#include <linux/btf.h>
 #include <linux/btf_ids.h>
 
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/btf.h>
+
 #include <asm/tlb.h>
 
 #include "trace_probe.h"
@@ -555,10 +559,91 @@  static __printf(1, 0) int bpf_do_trace_printk(const char *fmt, ...)
 	.arg2_type	= ARG_CONST_SIZE,
 };
 
-const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
+#define BTF_TRACE_F_ALL	(BTF_TRACE_F_COMPACT | BTF_TRACE_F_NONAME | \
+			 BTF_TRACE_F_PTR_RAW | BTF_TRACE_F_ZERO)
+
+BPF_CALL_4(bpf_trace_btf, struct btf_ptr *, ptr, u32, btf_ptr_size,
+	   u32, trace_id, u64, flags)
+{
+	u8 btf_kind = BTF_KIND_TYPEDEF;
+	char type_name[KSYM_NAME_LEN];
+	const struct btf_type *t;
+	const struct btf *btf;
+	const char *btf_type;
+	s32 btf_id;
+	int ret;
+
+	if (unlikely(flags & ~(BTF_TRACE_F_ALL)))
+		return -EINVAL;
+
+	if (btf_ptr_size != sizeof(struct btf_ptr))
+		return -EINVAL;
+
+	btf = bpf_get_btf_vmlinux();
+
+	if (IS_ERR_OR_NULL(btf))
+		return PTR_ERR(btf);
+
+	if (ptr->type != NULL) {
+		ret = copy_from_kernel_nofault(type_name, ptr->type,
+					       sizeof(type_name));
+		if (ret)
+			return ret;
+
+		btf_type = type_name;
+
+		if (strncmp(btf_type, "struct ", strlen("struct ")) == 0) {
+			btf_kind = BTF_KIND_STRUCT;
+			btf_type += strlen("struct ");
+		} else if (strncmp(btf_type, "union ", strlen("union ")) == 0) {
+			btf_kind = BTF_KIND_UNION;
+			btf_type += strlen("union ");
+		} else if (strncmp(btf_type, "enum ", strlen("enum ")) == 0) {
+			btf_kind = BTF_KIND_ENUM;
+			btf_type += strlen("enum ");
+		}
+
+		if (strlen(btf_type) == 0)
+			return -EINVAL;
+
+		/*
+		 * Assume type specified is a typedef as there's not much
+		 * benefit in specifying int types other than wasting time
+		 * on BTF lookups; we optimize for the most useful path.
+		 *
+		 * Fall back to BTF_KIND_INT if this fails.
+		 */
+		btf_id = btf_find_by_name_kind(btf, btf_type, btf_kind);
+		if (btf_id < 0)
+			btf_id = btf_find_by_name_kind(btf, btf_type,
+						       BTF_KIND_INT);
+	} else if (ptr->type_id > 0)
+		btf_id = ptr->type_id;
+	else
+		return -EINVAL;
+
+	if (btf_id > 0)
+		t = btf_type_by_id(btf, btf_id);
+	if (btf_id <= 0 || !t)
+		return -ENOENT;
+
+	return btf_type_trace_show(btf, btf_id, ptr->ptr, trace_id, flags);
+}
+
+static const struct bpf_func_proto bpf_trace_btf_proto = {
+	.func		= bpf_trace_btf,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_MEM,
+	.arg2_type	= ARG_CONST_SIZE,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_ANYTHING,
+};
+
+static void bpf_trace_printk_enable(void)
 {
 	/*
-	 * This program might be calling bpf_trace_printk,
+	 * This program might be calling bpf_trace_[printk|btf],
 	 * so enable the associated bpf_trace/bpf_trace_printk event.
 	 * Repeat this each time as it is possible a user has
 	 * disabled bpf_trace_printk events.  By loading a program
@@ -567,10 +652,21 @@  const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
 	 */
 	if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1))
 		pr_warn_ratelimited("could not enable bpf_trace_printk events");
+}
+const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
+{
+	bpf_trace_printk_enable();
 
 	return &bpf_trace_printk_proto;
 }
 
+const struct bpf_func_proto *bpf_get_trace_btf_proto(void)
+{
+	bpf_trace_printk_enable();
+
+	return &bpf_trace_btf_proto;
+}
+
 #define MAX_SEQ_PRINTF_VARARGS		12
 #define MAX_SEQ_PRINTF_MAX_MEMCPY	6
 #define MAX_SEQ_PRINTF_STR_LEN		128
@@ -1139,6 +1235,8 @@  static int bpf_send_signal_common(u32 sig, enum pid_type type)
 		return &bpf_get_current_comm_proto;
 	case BPF_FUNC_trace_printk:
 		return bpf_get_trace_printk_proto();
+	case BPF_FUNC_trace_btf:
+		return bpf_get_trace_btf_proto();
 	case BPF_FUNC_get_smp_processor_id:
 		return &bpf_get_smp_processor_id_proto;
 	case BPF_FUNC_get_numa_node_id:
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index 5bfa448..7c7384b 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -432,6 +432,7 @@  class PrinterHelpers(Printer):
             'struct __sk_buff',
             'struct sk_msg_md',
             'struct xdp_md',
+            'struct btf_ptr',
     ]
     known_types = {
             '...',
@@ -472,6 +473,7 @@  class PrinterHelpers(Printer):
             'struct tcp_request_sock',
             'struct udp6_sock',
             'struct task_struct',
+            'struct btf_ptr',
     }
     mapped_types = {
             'u8': '__u8',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b134e67..726fee4 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3394,6 +3394,36 @@  struct bpf_stack_build_id {
  *		A non-negative value equal to or less than *size* on success,
  *		or a negative error in case of failure.
  *
+ * long bpf_trace_btf(struct btf_ptr *ptr, u32 btf_ptr_size, u32 trace_id, u64 flags)
+ *	Description
+ *		Utilize BTF to trace a representation of *ptr*->ptr, using
+ *		*ptr*->type name or *ptr*->type_id.  *ptr*->type_name
+ *		should specify the type *ptr*->ptr points to. Traversing that
+ *		data structure using BTF, the type information and values are
+ *		bpf_trace_printk()ed.  Safe copy of the pointer data is
+ *		carried out to avoid kernel crashes during data display.
+ *		Tracing specifies *trace_id* as the id associated with the
+ *		trace event; this can be used to filter trace events
+ *		to show a subset of all traced output, helping to avoid
+ *		the situation where BTF output is intermixed with other
+ *		output.
+ *
+ *		*flags* is a combination of
+ *
+ *		**BTF_TRACE_F_COMPACT**
+ *			no formatting around type information
+ *		**BTF_TRACE_F_NONAME**
+ *			no struct/union member names/types
+ *		**BTF_TRACE_F_PTR_RAW**
+ *			show raw (unobfuscated) pointer values;
+ *			equivalent to printk specifier %px.
+ *		**BTF_TRACE_F_ZERO**
+ *			show zero-valued struct/union members; they
+ *			are not displayed by default
+ *
+ *	Return
+ *		The number of bytes traced, or a negative error in cases of
+ *		failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3538,6 +3568,7 @@  struct bpf_stack_build_id {
 	FN(skc_to_tcp_request_sock),	\
 	FN(skc_to_udp6_sock),		\
 	FN(get_task_stack),		\
+	FN(trace_btf),			\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4446,4 +4477,36 @@  struct bpf_sk_lookup {
 	__u32 local_port;	/* Host byte order */
 };
 
+/*
+ * struct btf_ptr is used for typed pointer display; the
+ * additional type string/BTF type id are used to render the pointer
+ * data as the appropriate type via the bpf_trace_btf() helper
+ * above.  A flags field - potentially to specify additional details
+ * about the BTF pointer (rather than its mode of display) - is
+ * present for future use.  Display flags - BTF_TRACE_F_* - are
+ * passed to display functions separately.
+ */
+struct btf_ptr {
+	void *ptr;
+	const char *type;
+	__u32 type_id;
+	__u32 flags;		/* BTF ptr flags; unused at present. */
+};
+
+/*
+ * Flags to control bpf_trace_btf() behaviour.
+ *	- BTF_TRACE_F_COMPACT: no formatting around type information
+ *	- BTF_TRACE_F_NONAME: no struct/union member names/types
+ *	- BTF_TRACE_F_PTR_RAW: show raw (unobfuscated) pointer values;
+ *	  equivalent to %px.
+ *	- BTF_TRACE_F_ZERO: show zero-valued struct/union members; they
+ *	  are not displayed by default
+ */
+enum {
+	BTF_TRACE_F_COMPACT	=	(1ULL << 0),
+	BTF_TRACE_F_NONAME	=	(1ULL << 1),
+	BTF_TRACE_F_PTR_RAW	=	(1ULL << 2),
+	BTF_TRACE_F_ZERO	=	(1ULL << 3),
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */