Skip to content

Commit a482612

Browse files
committed
udp_buffer: add UDP buffer tuning for udp_mem, rmem_max,default
Trace UDP receive fails and based on the error code (-ENOBUFS for overall UDP memory exhaustion, -ENOMEM for socket receive buffer exhaustion), modify UDP-related buffer parameters. Similar to TCP, look for memory pressure/memory exhaustion approach in UDP and modify udp_mem and rmem appropriately to avoid exhaustion where possible or mitigate it if unavoidable. For net.core.rmem_max, if a socket experiences losses with a rcvbuf ~= rmem_max, increase rmem_max. If a socket experiences losses with rcvbuf ~= rmem_default _and_ it is not locked - i.e. does not have an explicit rcvbuf value set by setsockopt - increase rmem_default also. Note that a given socket - because UDP has a fixed rcvbuf size - will only contribute to such updates where its rcvbuf is within range of rmem_max,default. So if a particular socket cannot cope with load consistently - rathern than the kind of bursty issues increasing rmem can help with - it will not keep contributing to rmem updates. Add tests for rmem updates, rmem locked (where we expect an explicit setsockopt from iperf3 will lock rcvbuf so no updates to rmem_default are expected) and udp mem exhaustion. Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
1 parent c20ddf2 commit a482612

19 files changed

+1160
-20
lines changed

README.md

+2
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,8 @@ The key components are
166166
which helps power namespace awareness for bpftune as a whole.
167167
Namespace awareness is important as we want to be able to auto-tune
168168
containers also. See [bpftune-netns (8)](./docs/bpftune-netns.rst)
169+
- UDP buffer tuner: auto-tune buffers relating to UDP. See
170+
[bpftune-udp-buffer (8)](./docs/bpftune-udp-buffer.rst)
169171

170172
## Code organization
171173

docs/Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ man8dir = $(mandir)/man8
3636

3737
MAN8_RST = bpftune.rst bpftune-sysctl.rst bpftune-tcp-conn.rst \
3838
bpftune-neigh.rst bpftune-tcp-buffer.rst bpftune-netns.rst \
39-
bpftune-net-buffer.rst bpftune-ip-frag.rst
39+
bpftune-net-buffer.rst bpftune-ip-frag.rst bpftune-udp-buffer.rst
4040

4141
_DOC_MAN8 = $(patsubst %.rst,%.8,$(MAN8_RST))
4242
DOC_MAN8 = $(addprefix $(OUTPUT),$(_DOC_MAN8))

docs/bpftune-udp-buffer.rst

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
==================
2+
BPFTUNE-UDP-BUFFER
3+
==================
4+
-------------------------------------------------------------------------------
5+
UDP buffer bpftune plugin for managing UDP buffer sizes, memory limits
6+
-------------------------------------------------------------------------------
7+
8+
:Manual section: 8
9+
10+
11+
DESCRIPTION
12+
===========
13+
14+
For UDP - like TCP - a triple of min, pressure, max
15+
represents UDP memory limits and is specified in
16+
17+
net.ipv4.udp_mem
18+
19+
If receive fails with -ENOBUFS this indicates memory
20+
limits are being reached; we adaptively increase pressure and
21+
max to ensure that memory exhaustion does not occur (as long
22+
as we do not approach real memory exhaustion). As memory
23+
exhaustion is approached and we can no longer increase
24+
overall memory limits, reduce net.core.rmem* values to limit
25+
socket memory overheads.
26+
27+
For UDP receive buffer memory, bump net.core.rmem_max if
28+
a socket experiences receive buffer drops within range of
29+
the rmem_max_value. Similarly bump rmem_default if sockets are
30+
within range of it and do not have a locked (via setsockopt)
31+
value.
32+
33+
Tunables:
34+
35+
- net.ipv4.udp_mem: min, pressure, max UDP memory
36+
- net.core.rmem_max: max rcvbuf size specifiable via setsockopt()
37+
- net.core.rmem_default: default rcvbuf size where none was set

include/bpftune/bpftune.bpf.h

+9-7
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,10 @@ unsigned long long bpftune_init_net;
233233
#define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM)
234234
#endif
235235

236+
#ifndef SOL_SOCKET
237+
#define SOL_SOCKET 1
238+
#endif
239+
236240
#ifndef SOL_TCP
237241
#define SOL_TCP 6
238242
#endif
@@ -274,6 +278,9 @@ unsigned long long bpftune_init_net;
274278
#endif
275279

276280
#define EINVAL 22
281+
#define ENOMEM 12
282+
#define EAGAIN 11
283+
#define ENOBUFS 105
277284

278285
bool debug;
279286

@@ -348,7 +355,6 @@ static __always_inline long send_net_sysctl_event(struct net *net,
348355
__u64 event_key = 0;
349356
long nscookie = 0;
350357
__u64 *last_timep = NULL;
351-
int ret = 0;
352358

353359
nscookie = get_netns_cookie(net);
354360
if (nscookie < 0)
@@ -359,7 +365,7 @@ static __always_inline long send_net_sysctl_event(struct net *net,
359365
last_timep = bpf_map_lookup_elem(&last_event_map, &event_key);
360366
if (last_timep) {
361367
if ((now - *last_timep) < (25 * MSEC))
362-
return 0;
368+
return -EAGAIN;
363369
*last_timep = now;
364370
} else {
365371
bpf_map_update_elem(&last_event_map, &event_key, &now, 0);
@@ -375,11 +381,7 @@ static __always_inline long send_net_sysctl_event(struct net *net,
375381
event->update[0].new[0] = new[0];
376382
event->update[0].new[1] = new[1];
377383
event->update[0].new[2] = new[2];
378-
ret = bpf_ringbuf_output(&ring_buffer_map, event, sizeof(*event), 0);
379-
bpftune_debug("tuner [%d] scenario [%d]: event send: %d ",
380-
tuner_id, scenario_id, ret);
381-
bpftune_debug("\told '%ld %ld %ld'\n", old[0], old[1], old[2]);
382-
bpftune_debug("\tnew '%ld %ld %ld'\n", new[0], new[1], new[2]);
384+
bpf_ringbuf_output(&ring_buffer_map, event, sizeof(*event), 0);
383385
return 0;
384386
}
385387

include/bpftune/bpftune.h

+3
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ extern unsigned short bpftune_learning_rate;
5454
/* shrink by delta (default 25%) */
5555
#define BPFTUNE_SHRINK_BY_DELTA(val) ((val) - ((val) >> BPFTUNE_BITSHIFT))
5656

57+
#define BPFTUNE_WITHIN_BITSHIFT(val1, val2, shift) \
58+
((val1 + (val1 >> shift)) > val2)
59+
5760
#define MSEC ((__u64)1000000)
5861
#define SECOND ((__u64)1000000000)
5962
#define MINUTE (60 * SECOND)

include/bpftune/libbpftune.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -351,10 +351,11 @@ void bpftune_sysctl_name_to_path(const char *name, char *path, size_t path_sz);
351351
int bpftune_sysctl_read(int netns_fd, const char *name, long *values);
352352
int bpftune_sysctl_write(int netns_fd, const char *name, __u8 num_values, long *values);
353353
long long bpftune_ksym_addr(char type, const char *name);
354-
int bpftune_snmpstat_read(unsigned long netns_cookie, int family, const char *name, long *value);
355-
int bpftune_netstat_read(unsigned long netns_cookie, int family, const char *name, long *value);
354+
int bpftune_snmpstat_read(unsigned long netns_cookie, int family, const char *linename, const char *name, long *value);
355+
int bpftune_netstat_read(unsigned long netns_cookie, int family, const char *linename, const char *name, long *value);
356356
int bpftune_sched_wait_run_percent_read(void);
357357
bool bpftune_netns_cookie_supported(void);
358+
unsigned long bpftune_global_netns_cookie(void);
358359
int bpftune_netns_set(int fd, int *orig_fd, bool quiet);
359360
int bpftune_netns_info(int pid, int *fd, unsigned long *cookie);
360361
int bpftune_netns_init_all(void);

src/Makefile

+2-1
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,8 @@ submake_extras := feature_display=0
9999
endif
100100

101101
TUNERS = tcp_buffer_tuner route_table_tuner neigh_table_tuner sysctl_tuner \
102-
tcp_conn_tuner netns_tuner net_buffer_tuner ip_frag_tuner
102+
tcp_conn_tuner netns_tuner net_buffer_tuner ip_frag_tuner \
103+
udp_buffer_tuner
103104

104105
TUNER_OBJS = $(patsubst %,%.o,$(TUNERS))
105106
TUNER_SRCS = $(patsubst %,%.c,$(TUNERS))

src/ip_frag_tuner.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,9 @@ void event_handler(struct bpftuner *tuner,
7575
key.netns_cookie = event->netns_cookie;
7676

7777
af = id == IP_FRAG_MAX_THRESHOLD ? AF_INET : AF_INET6;
78-
if (!bpftune_snmpstat_read(event->netns_cookie, af,
78+
if (!bpftune_snmpstat_read(event->netns_cookie, af, NULL,
7979
"ReasmFails", &reasmfails) &&
80-
!bpftune_snmpstat_read(event->netns_cookie, af,
80+
!bpftune_snmpstat_read(event->netns_cookie, af, NULL,
8181
"ReasmReqds", &reasmreqds)) {
8282
/* % of reasm fails */
8383
reasm_failrate = (reasmfails * 100)/reasmreqds;

src/libbpftune.c

+21-5
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,13 @@ static void bpftune_global_netns_init(void)
692692
}
693693
}
694694

695+
unsigned long bpftune_global_netns_cookie(void)
696+
{
697+
if (!global_netns_cookie)
698+
bpftune_global_netns_init();
699+
return global_netns_cookie;
700+
}
701+
695702
/* add a tuner to the list of tuners, or replace existing inactive tuner.
696703
* If successful, call init().
697704
*/
@@ -725,6 +732,8 @@ struct bpftuner *bpftuner_init(const char *path)
725732
free(tuner);
726733
return NULL;
727734
}
735+
tuner->bpf_support = support_level;
736+
728737
/* If we have a ringbuf fd from any tuner, use its fd to be re-used
729738
* for other ringbuf maps (so we can use the same ring buffer for all
730739
* BPF events.
@@ -1135,7 +1144,8 @@ long long bpftune_ksym_addr(char type, const char *name)
11351144
}
11361145

11371146
static int bpftune_nstat_read(unsigned long netns_cookie, int family,
1138-
const char *file, const char *name, long *value)
1147+
const char *file, const char *linename,
1148+
const char *name, long *value)
11391149
{
11401150
int err, netns_fd = 0, orig_netns_fd = 0, stat_index = 0;
11411151
char line[1024];
@@ -1188,6 +1198,12 @@ static int bpftune_nstat_read(unsigned long netns_cookie, int family,
11881198
* have same index on the next line.
11891199
*/
11901200
if (strcmp(next, name) == 0) {
1201+
/* ensure line is Udp: or Tcp: if specified;
1202+
* avoids matching InErrs for wrong protocol.
1203+
*/
1204+
if (linename &&
1205+
strncmp(line, linename, strlen(linename)) != 0)
1206+
continue;
11911207
stat_index = index;
11921208
break;
11931209
}
@@ -1207,19 +1223,19 @@ static int bpftune_nstat_read(unsigned long netns_cookie, int family,
12071223
}
12081224

12091225
int bpftune_snmpstat_read(unsigned long netns_cookie, int family,
1210-
const char *name, long *value)
1226+
const char *linename, const char *name, long *value)
12111227
{
12121228
return bpftune_nstat_read(netns_cookie, family,
12131229
family == AF_INET ? "/proc/net/snmp" :
12141230
"/proc/net/snmp6",
1215-
name, value);
1231+
linename, name, value);
12161232
}
12171233

12181234
int bpftune_netstat_read(unsigned long netns_cookie, int family,
1219-
const char *name, long *value)
1235+
const char *linename, const char *name, long *value)
12201236
{
12211237
return bpftune_nstat_read(netns_cookie, family, "/proc/net/netstat",
1222-
name, value);
1238+
linename, name, value);
12231239
}
12241240

12251241
/* return % of overall wait/run time on all cpus gathered from

src/libbpftune.map

+1
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ LIBBPFTUNE_0.1.1 {
4141
bpftuner_bpf_prog_in_strategy;
4242
bpftuner_tunables_fini;
4343
bpftune_netns_cookie_supported;
44+
bpftune_global_netns_cookie;
4445
bpftuner_netns_init;
4546
bpftuner_netns_fini;
4647
bpftuner_netns_from_cookie;

src/tcp_buffer_tuner.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -373,9 +373,9 @@ void event_handler(struct bpftuner *tuner,
373373
break;
374374
t = bpftuner_tunable(tuner, TCP_BUFFER_TCP_SYNCOOKIES);
375375
if (t && t->current_values[0] > 0 &&
376-
!bpftune_netstat_read(event->netns_cookie, AF_INET,
376+
!bpftune_netstat_read(event->netns_cookie, AF_INET, NULL,
377377
"SyncookiesRecv", &goodcookies) &&
378-
!bpftune_netstat_read(event->netns_cookie, AF_INET,
378+
!bpftune_netstat_read(event->netns_cookie, AF_INET, NULL,
379379
"SyncookiesFailed", &badcookies)) {
380380

381381
/* syncookies are enabled; are they effective? compare good/bad counts.

0 commit comments

Comments
 (0)