Skip to content
This repository has been archived by the owner on Jan 19, 2023. It is now read-only.

verifier: xdp7.p4 hit the kernel max BPF stack size #22

Open
williamtu opened this issue Feb 8, 2017 · 27 comments
Open

verifier: xdp7.p4 hit the kernel max BPF stack size #22

williamtu opened this issue Feb 8, 2017 · 27 comments

Comments

@williamtu
Copy link
Contributor

williamtu commented Feb 8, 2017

xdp7.p4 show the error message

394: (05) goto pc+56
451: (7b) *(u64 *)(r10 -528) = r0
invalid stack off=-528 size=8

looking at the source code

    } else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) {
        if (off >= 0 || off < -MAX_BPF_STACK) {
            verbose("invalid stack off=%d size=%d\n", off, size);
            return -EACCES;
        }

we only allow stack size of 512 + 32 + 8

#define MAX_BPF_STACK (512 /* from filter.h */ + \
    32 /* space for rbx,r13,r14,r15 */ + \
    8 /* space for skb_copy_bits */)

need to discuss with kernel bpf maintainer...

@mihaibudiu
Copy link
Contributor

This one won't be easy to fix. But 528 < 512 + 32 + 8.
I wonder why we use so much stack.
The space we need is for:

  • a copy of the headers
  • enough space to assemble all table keys that we use

Table keys are never all live at the same time, so we should only need the largest of them on the stack.

Can you please add xdp6 and 7 to the repo?

@williamtu
Copy link
Contributor Author

williamtu commented Feb 8, 2017

thanks, I've pushed xdp6 and xdp7

@mihaibudiu
Copy link
Contributor

All headers in xdp7.c require a bit more than 74 bytes, a little more than on the wire, since they are unpacked (I didn't count padding, maybe that adds up to something as well).

There are a few more local variables which add up to about 24 bytes.

The key requires 2 bytes.

So we should be pretty far from 528.

@williamtu
Copy link
Contributor Author

there are two MAX_BPF_STACK,
previous one is at arch/x86/net/bpf_jit.S
I think we are using the definition at "include/linux/filter.h"

#define MAX_BPF_STACK   512

@mihaibudiu
Copy link
Contributor

Even that should be large enough. I wonder why we use 528 bytes of stack.

@williamtu
Copy link
Contributor Author

right... that's weird... let me dig deeper

@williamtu
Copy link
Contributor Author

@williamtu
Copy link
Contributor Author

williamtu commented Feb 10, 2017

this part looks not right.
the generated code machine code is using 8 byte every time to save into hd.ethernet.source[n], which should be 1 byte
see 72, 80, 88, 96 ...

so I guess the ethernet header takes 8 * (6+6) + 2 byte?

      18:	r3 = *(u8 *)(r1 + 6)
; hd.ethernet.source[5] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 5) >> 0));
      19:	*(u64 *)(r10 - 72) = r3
      20:	r3 = *(u8 *)(r1 + 5)
; hd.ethernet.source[4] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 4) >> 0));
      21:	*(u64 *)(r10 - 80) = r3
      22:	r3 = *(u8 *)(r1 + 4)
; hd.ethernet.source[3] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 3) >> 0));
      23:	*(u64 *)(r10 - 88) = r3
      24:	r3 = *(u8 *)(r1 + 3)
; hd.ethernet.source[2] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 2) >> 0));
      25:	*(u64 *)(r10 - 96) = r3
      26:	r3 = *(u8 *)(r1 + 2)
; hd.ethernet.source[1] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 1) >> 0));
      27:	*(u64 *)(r10 - 104) = r3
      28:	r3 = *(u8 *)(r1 + 1)
; hd.ethernet.source[0] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 0) >> 0));
      29:	*(u64 *)(r10 - 112) = r3
      30:	r3 = *(u8 *)(r1 + 0)

@mihaibudiu
Copy link
Contributor

Can you try to replace char source[6] with u8 source[6]?
This does not make any sense.

@mihaibudiu
Copy link
Contributor

The bug is in the load_byte macro - the cast should be applied to data before adding

@williamtu
Copy link
Contributor Author

still the same, with the following 2 changes.

 #define load_byte(data, b)  (*((u8 *)data + (b)))
struct Ethernet {
    u8 source[6]; /* bit<48> */
    u8 destination[6]; /* bit<48> */
    u16 protocol; /* bit<16> */
    u8 ebpf_valid;
};

@mihaibudiu
Copy link
Contributor

can you just print sizeof(headers) and sizeof(headers.ethernet)?

@williamtu
Copy link
Contributor Author

another solution is to avoid using local stack, but put the struct Headers in a BPF MAP. I tried it a bit and seems ok. It's s.t lik

--- a/tmp/xdp1.c
+++ b/tmp/xdp1.c.usemap
@@ -1,5 +1,6 @@
 #define KBUILD_MODNAME "xdptest"
 #include <linux/bpf.h>
 #include "bpf_helpers.h"
 
 #define load_byte(data, b)  (*(u8 *)(data + (b)))
@@ -36,7 +37,7 @@ struct Ethernet {
     char destination[6]; /* bit<48> */
     u16 protocol; /* bit<16> */
     u8 ebpf_valid;
};
 
 struct IPv4 {
     u8 version; /* bit<4> */
@@ -67,16 +68,18 @@ struct bpf_map_def SEC("maps") ebpf_outTable = {
     .max_entries = 1 /* No multicast support */
 };
 
+struct bpf_map_def SEC("maps") Headers = {
+    .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+    .key_size = sizeof(u32),
+    .value_size = sizeof(struct Headers),
+    .pinning = 2, /* PIN_GLOBAL_NS */
+    .max_entries = 1 /* No multicast support */
+};
+
+
 SEC("prog")
 int ebpf_filter(struct xdp_md* skb){
-    struct Headers hd = {
-        .ethernet = {
-            .ebpf_valid = 0
-        },
-        .ipv4 = {
-            .ebpf_valid = 0
-        },
-    };
+    struct Headers hd_zero, *hd;
     unsigned ebpf_packetOffsetInBits = 0;
     enum ebpf_errorCodes ebpf_errorCode = NoError;
     void* ebpf_packetStart = ((void*)(long)skb->data);
@@ -88,82 +91,88 @@ int ebpf_filter(struct xdp_md* skb){
     /* TODO: this should be initialized by the environment. HOW? */
     struct xdp_input xin;
 
+       memset(&hd_zero, 0, sizeof(hd_zero));
+       bpf_map_update_elem(&Headers, &ebpf_zero, &hd_zero, BPF_ANY);
+       hd = bpf_map_lookup_elem(&Headers, &ebpf_zero);
+       if (!hd)
+               return XDP_ABORTED;
+
     goto start;
     start: {
-        /* extract(hd.ethernet)*/
+        /* extract(hd->ethernet)*/

@mihaibudiu
Copy link
Contributor

This should be much slower. We should be able to figure out what the problem with the stack is.

@williamtu
Copy link
Contributor Author

williamtu commented Feb 10, 2017

adding printk

    printk("headers %d ethernet %d\n", sizeof(struct Headers), sizeof(hd.ethernet));
    goto start;
    start: {

it prints out

          <idle>-0     [001] ..s. 27808.060250: : headers 44 ethernet 16

here I'm using xdp1, headers has

struct Ethernet {
    u8 source[6]; /* bit<48> */
    u8 destination[6]; /* bit<48> */
    u16 protocol; /* bit<16> */
    u8 ebpf_valid;
};

struct IPv4 {
    u8 version; /* bit<4> */
    u8 ihl; /* bit<4> */
    u8 diffserv; /* bit<8> */
    u16 totalLen; /* bit<16> */
    u16 identification; /* bit<16> */
    u8 flags; /* bit<3> */
    u16 fragOffset; /* bit<13> */
    u8 ttl; /* bit<8> */
    u8 protocol; /* bit<8> */
    u16 hdrChecksum; /* bit<16> */
    u32 srcAddr; /* bit<32> */
    u32 dstAddr; /* bit<32> */
    u8 ebpf_valid;
};

struct Headers {
    struct Ethernet ethernet; /* Ethernet */
    struct IPv4 ipv4; /* IPv4 */
};

@mihaibudiu
Copy link
Contributor

So we know that stack usage is not the problem.

@mihaibudiu
Copy link
Contributor

How about you just run the preprocessor and look at the generated C?

@williamtu
Copy link
Contributor Author

williamtu commented Feb 10, 2017 via email

@mihaibudiu
Copy link
Contributor

Actually it can't be the load_byte - that's the load. The store is the problem.

@williamtu
Copy link
Contributor Author

I did another experiment using kernel's sample code
https://lists.iovisor.org/pipermail/iovisor-dev/2017-February/000652.html

@mihaibudiu
Copy link
Contributor

This looks like a bug in the bpf llvm back-end bug.
If you look at the assembly, they always write a double-word (64 bits) no matter what the output type is; in this case, a byte.

@mihaibudiu
Copy link
Contributor

For example, I changed the source to be just an u8, and the code generated is this:

.loc	2 89 37                 # xdp7.c:89:37
std	-464(r10), r1
ldb	r1, 0(r2)

As far as I can tell, ldb is load byte, and std is store double.

@mihaibudiu
Copy link
Contributor

I found a workaround this problem: if you take the address of the headers struct the compiler has to put it on the stack. So I just added this:

printk("%p", &hd);

I will add this to the generated code.

@williamtu
Copy link
Contributor Author

yes, it will put it on stack, but still uses 8 bytes to store an u8
for example using xdp1.p4, and adding printk("%p", &hd);

root@osboxes:~/p4c/extensions/p4c-xdp/tests# llvm-objdump -S -no-show-raw-insn xdp1.o

xdp1.o:	file format ELF64-BPF

Disassembly of section prog:
ebpf_filter:
; int ebpf_filter(struct xdp_md* skb){
       0:	r6 = r1
       1:	r7 = 0
; struct Headers hd = {
       2:	*(u32 *)(r10 - 8) = r7
       3:	*(u64 *)(r10 - 16) = r7
       4:	*(u64 *)(r10 - 24) = r7
       5:	*(u64 *)(r10 - 32) = r7
       6:	*(u64 *)(r10 - 40) = r7
       7:	*(u64 *)(r10 - 48) = r7

hd still uses 8 bytes for each u8

@williamtu
Copy link
Contributor Author

from Alexei's reply
https://lists.iovisor.org/pipermail/iovisor-dev/2017-February/000653.html
I think it's a limitation in llvm which it always spills 8byte on the stack, since BPF register size is 8byte.

@williamtu
Copy link
Contributor Author

williamtu commented Feb 15, 2017

I force llvm to allocate struct Headers on stack using alloca
first, from bitcode to LLVM IR

clang -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign -Wno-compare-distinct-pointer-types -Wno-gnu-variable-sized-type-not-at-end -Wno-tautological-compare -O2 -emit-llvm -g -c xdp9.c
llvm-dis xdp9.bc -o /tmp/xdp9.ll

make the change in LLVM IR, xdp9.ll

  %ebpf_zero = alloca i32, align 4
  %xout = alloca %struct.xdp_output, align 4
  %key = alloca %struct.dstmactable_key, align 2
+  %hd = alloca %struct.Headers, align 4
+  %hd.ethernet = alloca %struct.Ethernet, align 2
+  %hd.ipv4 = alloca %struct.IPv4, align 4

assembly back from LLVM IR to bitcode

llvm-as /tmp/xdp9.ll -o xdp9.bc
llc -march=bpf -filetype=obj -o xdp9.o xdp9.bc
llvm-objdump -S -no-show-raw-insn xdp9.o

result of objdump

Disassembly of section prog:
ebpf_filter:
; int ebpf_filter(struct xdp_md* skb){
       0:	r6 = r1
; void* ebpf_packetEnd = ((void*)(long)skb->data_end);
       1:	r3 = *(u32 *)(r6 + 4)
; void* ebpf_packetStart = ((void*)(long)skb->data);
       2:	r2 = *(u32 *)(r6 + 0)
       3:	r8 = 0
; u32 ebpf_zero = 0;
       4:	*(u32 *)(r10 - 92) = r8
; if (ebpf_packetEnd < ebpf_packetStart + BYTES(ebpf_packetOffsetInBits + 112)) {
       5:	r1 = r2
       6:	r1 += 14
       7:	if r1 > r3 goto 891
       8:	r1 = 14
       9:	*(u64 *)(r10 - 136) = r1
      10:	r5 = 0
; hd.ethernet.destination[5] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 5) >> 0));
      11:	r1 = *(u8 *)(r2 + 11)
; hd.ethernet.destination[4] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 4) >> 0));
      12:	*(u64 *)(r10 - 544) = r1
      13:	r1 = *(u8 *)(r2 + 10)
; hd.ethernet.destination[3] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 3) >> 0));
      14:	*(u64 *)(r10 - 552) = r1
      15:	r1 = *(u8 *)(r2 + 9)
; hd.ethernet.destination[2] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 2) >> 0));
      16:	*(u64 *)(r10 - 560) = r1
      17:	r1 = *(u8 *)(r2 + 8)
; hd.ethernet.destination[1] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 1) >> 0));
      18:	*(u64 *)(r10 - 568) = r1
      19:	r1 = *(u8 *)(r2 + 7)
; hd.ethernet.destination[0] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 0) >> 0));
      20:	*(u64 *)(r10 - 576) = r1
      21:	r1 = *(u8 *)(r2 + 6)
; hd.ethernet.source[5] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 5) >> 0));
      22:	*(u64 *)(r10 - 584) = r1
      23:	r1 = *(u8 *)(r2 + 5)
; hd.ethernet.source[4] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 4) >> 0));
      24:	*(u64 *)(r10 - 600) = r1
      25:	r1 = *(u8 *)(r2 + 4)
; hd.ethernet.source[3] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 3) >> 0));
      26:	*(u64 *)(r10 - 608) = r1
      27:	r1 = *(u8 *)(r2 + 3)

unfortunately it's still the same, the hd is using r10 - off, where off is 8 byte offset

@williamtu
Copy link
Contributor Author

williamtu commented Feb 15, 2017

I don't understand why a 'switch' statement need to clear the stack
see ; switch (hd.ethernet.protocol)

; int ebpf_filter(struct xdp_md* skb){
       0:	r6 = r1
; void* ebpf_packetEnd = ((void*)(long)skb->data_end);
       1:	r4 = *(u32 *)(r6 + 4)
; void* ebpf_packetStart = ((void*)(long)skb->data);
       2:	r3 = *(u32 *)(r6 + 0)
       3:	r7 = 0
; u32 ebpf_zero = 0;
       4:	*(u32 *)(r10 - 4) = r7
; if (ebpf_packetEnd < ebpf_packetStart + BYTES(ebpf_packetOffsetInBits + 112)) {
       5:	r1 = r3
       6:	r1 += 14
       7:	if r1 > r4 goto 245
       8:	r2 = 14
       9:	r1 = 0
; hd.ethernet.destination[5] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 5) >> 0));
      10:	*(u64 *)(r10 - 120) = r1
      11:	r1 = *(u8 *)(r3 + 11)
; hd.ethernet.destination[4] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 4) >> 0));
      12:	*(u64 *)(r10 - 136) = r1
      13:	r1 = *(u8 *)(r3 + 10)
; hd.ethernet.destination[3] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 3) >> 0));
      14:	*(u64 *)(r10 - 144) = r1
      15:	r1 = *(u8 *)(r3 + 9)
; hd.ethernet.destination[2] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 2) >> 0));
      16:	*(u64 *)(r10 - 152) = r1
      17:	r1 = *(u8 *)(r3 + 8)
; hd.ethernet.destination[1] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 1) >> 0));
      18:	*(u64 *)(r10 - 160) = r1
      19:	r1 = *(u8 *)(r3 + 7)
; hd.ethernet.destination[0] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 0) >> 0));
      20:	*(u64 *)(r10 - 168) = r1
      21:	r1 = *(u8 *)(r3 + 6)
; hd.ethernet.source[5] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 5) >> 0));
      22:	*(u64 *)(r10 - 176) = r1
      23:	r1 = *(u8 *)(r3 + 5)
; hd.ethernet.source[4] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 4) >> 0));
      24:	*(u64 *)(r10 - 184) = r1
      25:	r1 = *(u8 *)(r3 + 4)
; hd.ethernet.source[3] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 3) >> 0));
      26:	*(u64 *)(r10 - 192) = r1
      27:	r1 = *(u8 *)(r3 + 3)
; hd.ethernet.source[2] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 2) >> 0));
      28:	*(u64 *)(r10 - 200) = r1
      29:	r1 = *(u8 *)(r3 + 2)
; hd.ethernet.source[1] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 1) >> 0));
      30:	*(u64 *)(r10 - 208) = r1
      31:	r1 = *(u8 *)(r3 + 1)
; hd.ethernet.source[0] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 0) >> 0));
      32:	*(u64 *)(r10 - 216) = r1
      33:	r1 = *(u8 *)(r3 + 0)
; hd.ethernet.protocol = (u16)((load_half(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits))));
      34:	*(u64 *)(r10 - 224) = r1
      35:	r9 = *(u16 *)(r3 + 12)
; switch (hd.ethernet.protocol) {
      36:	r5 = r9
      37:	r5 <<= 8
; hd.ethernet.protocol = (u16)((load_half(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits))));
      38:	r8 = r9
      39:	r8 >>= 8
; switch (hd.ethernet.protocol) {
      40:	r1 = r8
      41:	r1 |= r5
      42:	r1 &= 65535
      43:	r5 = 0
      44:	*(u64 *)(r10 - 296) = r5
      45:	r5 = 0
      46:	*(u64 *)(r10 - 232) = r5
      47:	r5 = 0
      48:	*(u64 *)(r10 - 240) = r5
      49:	r5 = 0
      50:	*(u64 *)(r10 - 280) = r5
      51:	r5 = 0
      52:	*(u64 *)(r10 - 272) = r5
      53:	r5 = 0
      54:	r7 = 0
      55:	r0 = 0
      56:	*(u64 *)(r10 - 256) = r0
      57:	r0 = 0
      58:	*(u64 *)(r10 - 248) = r0
      59:	r0 = 0
      60:	*(u64 *)(r10 - 128) = r0
      61:	r0 = 0
      62:	*(u64 *)(r10 - 264) = r0
      63:	r0 = 0
      64:	*(u64 *)(r10 - 288) = r0
      65:	r0 = 0
      66:	*(u64 *)(r10 - 312) = r0
      67:	r0 = 0
      68:	*(u64 *)(r10 - 328) = r0
      69:	r0 = 0
      70:	*(u64 *)(r10 - 320) = r0
      71:	r0 = 0
      72:	*(u64 *)(r10 - 304) = r0
      73:	r0 = 0
      74:	*(u64 *)(r10 - 376) = r0
      75:	r0 = 0
      76:	*(u64 *)(r10 - 368) = r0
      77:	r0 = 0
      78:	*(u64 *)(r10 - 336) = r0
      79:	r0 = 0
      80:	*(u64 *)(r10 - 344) = r0
      81:	r0 = 0
      82:	*(u64 *)(r10 - 352) = r0
      83:	r0 = 0
      84:	*(u64 *)(r10 - 360) = r0
      85:	if r1 != 2048 goto 60
      86:	r7 = 0
; if (ebpf_packetEnd < ebpf_packetStart + BYTES(ebpf_packetOffsetInBits + 160)) {
      87:	r2 = r3
      88:	r2 += 34
      89:	if r2 > r4 goto 163
      90:	r2 = 34

Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants