155 lines
5.5 KiB
Diff
155 lines
5.5 KiB
Diff
From 610707057ac60311fde94b3a049de9d4826a3bf2 Mon Sep 17 00:00:00 2001
|
|
From: Andrii Nakryiko <andrii@kernel.org>
|
|
Date: Fri, 15 Jul 2022 16:09:51 -0700
|
|
Subject: [PATCH] libbpf: make RINGBUF map size adjustments more eagerly
|
|
|
|
Make libbpf adjust RINGBUF map size (rounding it up to closest power-of-2
|
|
of page_size) more eagerly: during open phase when initializing the map
|
|
and on explicit calls to bpf_map__set_max_entries().
|
|
|
|
Such approach allows user to check actual size of BPF ringbuf even
|
|
before it's created in the kernel, but also it prevents various edge
|
|
case scenarios where BPF ringbuf size can get out of sync with what it
|
|
would be in kernel. One of them (reported in [0]) is during an attempt
|
|
to pin/reuse BPF ringbuf.
|
|
|
|
Move adjust_ringbuf_sz() helper closer to its first actual use. The
|
|
implementation of the helper is unchanged.
|
|
|
|
Also make detection of whether bpf_object is already loaded more robust
|
|
by checking obj->loaded explicitly, given that map->fd can be < 0 even
|
|
if bpf_object is already loaded due to ability to disable map creation
|
|
with bpf_map__set_autocreate(map, false).
|
|
|
|
[0] Closes: https://github.com/libbpf/libbpf/pull/530
|
|
|
|
Fixes: 0087a681fa8c ("libbpf: Automatically fix up BPF_MAP_TYPE_RINGBUF size, if necessary")
|
|
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
|
|
Acked-by: Yonghong Song <yhs@fb.com>
|
|
Link: https://lore.kernel.org/r/20220715230952.2219271-1-andrii@kernel.org
|
|
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
|
|
---
|
|
src/libbpf.c | 77 ++++++++++++++++++++++++++++------------------------
|
|
1 file changed, 42 insertions(+), 35 deletions(-)
|
|
|
|
diff --git a/src/libbpf.c b/src/libbpf.c
|
|
index 9b5500659..b01fe01b0 100644
|
|
--- a/src/libbpf.c
|
|
+++ b/src/libbpf.c
|
|
@@ -2331,6 +2331,37 @@ int parse_btf_map_def(const char *map_name, struct btf *btf,
|
|
return 0;
|
|
}
|
|
|
|
+static size_t adjust_ringbuf_sz(size_t sz)
|
|
+{
|
|
+ __u32 page_sz = sysconf(_SC_PAGE_SIZE);
|
|
+ __u32 mul;
|
|
+
|
|
+ /* if user forgot to set any size, make sure they see error */
|
|
+ if (sz == 0)
|
|
+ return 0;
|
|
+ /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be
|
|
+ * a power-of-2 multiple of kernel's page size. If user diligently
|
|
+ * satisified these conditions, pass the size through.
|
|
+ */
|
|
+ if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz))
|
|
+ return sz;
|
|
+
|
|
+ /* Otherwise find closest (page_sz * power_of_2) product bigger than
|
|
+ * user-set size to satisfy both user size request and kernel
|
|
+ * requirements and substitute correct max_entries for map creation.
|
|
+ */
|
|
+ for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) {
|
|
+ if (mul * page_sz > sz)
|
|
+ return mul * page_sz;
|
|
+ }
|
|
+
|
|
+ /* if it's impossible to satisfy the conditions (i.e., user size is
|
|
+ * very close to UINT_MAX but is not a power-of-2 multiple of
|
|
+ * page_size) then just return original size and let kernel reject it
|
|
+ */
|
|
+ return sz;
|
|
+}
|
|
+
|
|
static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
|
|
{
|
|
map->def.type = def->map_type;
|
|
@@ -2344,6 +2375,10 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def
|
|
map->btf_key_type_id = def->key_type_id;
|
|
map->btf_value_type_id = def->value_type_id;
|
|
|
|
+ /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
|
|
+ if (map->def.type == BPF_MAP_TYPE_RINGBUF)
|
|
+ map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
|
|
+
|
|
if (def->parts & MAP_DEF_MAP_TYPE)
|
|
pr_debug("map '%s': found type = %u.\n", map->name, def->map_type);
|
|
|
|
@@ -4317,9 +4352,15 @@ struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
|
|
|
|
int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
|
|
{
|
|
- if (map->fd >= 0)
|
|
+ if (map->obj->loaded)
|
|
return libbpf_err(-EBUSY);
|
|
+
|
|
map->def.max_entries = max_entries;
|
|
+
|
|
+ /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
|
|
+ if (map->def.type == BPF_MAP_TYPE_RINGBUF)
|
|
+ map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
|
|
+
|
|
return 0;
|
|
}
|
|
|
|
@@ -4875,37 +4916,6 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
|
|
|
|
static void bpf_map__destroy(struct bpf_map *map);
|
|
|
|
-static size_t adjust_ringbuf_sz(size_t sz)
|
|
-{
|
|
- __u32 page_sz = sysconf(_SC_PAGE_SIZE);
|
|
- __u32 mul;
|
|
-
|
|
- /* if user forgot to set any size, make sure they see error */
|
|
- if (sz == 0)
|
|
- return 0;
|
|
- /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be
|
|
- * a power-of-2 multiple of kernel's page size. If user diligently
|
|
- * satisified these conditions, pass the size through.
|
|
- */
|
|
- if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz))
|
|
- return sz;
|
|
-
|
|
- /* Otherwise find closest (page_sz * power_of_2) product bigger than
|
|
- * user-set size to satisfy both user size request and kernel
|
|
- * requirements and substitute correct max_entries for map creation.
|
|
- */
|
|
- for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) {
|
|
- if (mul * page_sz > sz)
|
|
- return mul * page_sz;
|
|
- }
|
|
-
|
|
- /* if it's impossible to satisfy the conditions (i.e., user size is
|
|
- * very close to UINT_MAX but is not a power-of-2 multiple of
|
|
- * page_size) then just return original size and let kernel reject it
|
|
- */
|
|
- return sz;
|
|
-}
|
|
-
|
|
static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
|
|
{
|
|
LIBBPF_OPTS(bpf_map_create_opts, create_attr);
|
|
@@ -4944,9 +4954,6 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
|
|
}
|
|
|
|
switch (def->type) {
|
|
- case BPF_MAP_TYPE_RINGBUF:
|
|
- map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
|
|
- /* fallthrough */
|
|
case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
|
|
case BPF_MAP_TYPE_CGROUP_ARRAY:
|
|
case BPF_MAP_TYPE_STACK_TRACE:
|
|
--
|
|
2.33.0
|