Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce two enhancements for func IP #468

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 160 additions & 6 deletions bpf/kprobe_pwru.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@
const static bool TRUE = true;
const static u32 ZERO = 0;

volatile const static __u64 BPF_PROG_ADDR = 0;

enum {
TRACKED_BY_FILTER = (1 << 0),
TRACKED_BY_SKB = (1 << 1),
Expand Down Expand Up @@ -401,6 +399,24 @@ get_tracing_fp(void)
return fp;
}

#ifdef bpf_target_arm64
static __always_inline u64 detect_tramp_fp(void);
#endif

static __always_inline u64
get_tramp_fp(void) {
u64 fp_tramp = 0;

#ifdef bpf_target_x86
u64 fp = get_tracing_fp();
bpf_probe_read_kernel(&fp_tramp, sizeof(fp_tramp), (void *) fp);
#elif defined(bpf_target_arm64)
fp_tramp = detect_tramp_fp();
#endif

return fp_tramp;
}

static __always_inline u64
get_kprobe_fp(struct pt_regs *ctx)
{
Expand All @@ -410,7 +426,7 @@ get_kprobe_fp(struct pt_regs *ctx)
static __always_inline u64
get_stackid(void *ctx, const bool is_kprobe) {
u64 caller_fp;
u64 fp = is_kprobe ? get_kprobe_fp(ctx) : get_tracing_fp();
u64 fp = is_kprobe ? get_kprobe_fp(ctx) : get_tramp_fp();
for (int depth = 0; depth < MAX_STACK_DEPTH; depth++) {
if (bpf_probe_read_kernel(&caller_fp, sizeof(caller_fp), (void *)fp) < 0)
break;
Expand Down Expand Up @@ -513,6 +529,60 @@ handle_everything(struct sk_buff *skb, void *ctx, struct event_t *event, u64 *_s
return true;
}

#ifdef bpf_target_x86
/* The following gen_endbr_poison() and is_endbr() functions are taken from the
* kernel's arch/x86/include/asm/ibt.h file.
*/

static __always_inline u32
gen_endbr_poison(void) {
/*
* 4 byte NOP that isn't NOP4 (in fact it is OSP NOP3), such that it
* will be unique to (former) ENDBR sites.
*/
return 0x001f0f66; /* osp nopl (%rax) */
}

static __always_inline bool
is_endbr(u32 val) {
static const u32 endbr64 = 0xfa1e0ff3;
static const u32 endbr32 = 0xfb1e0ff3;

if (val == gen_endbr_poison())
return true;

val &= ~0x01000000U; /* ENDBR32 -> ENDBR64 */
return val == endbr64;
}

static __always_inline u64
correct_func_ip(u64 ip) {
u32 endbr;

bpf_probe_read_kernel(&endbr, sizeof(endbr), (void *) (ip - 4));
return is_endbr(endbr) ? (ip - 4) : ip;
}
#endif

static __always_inline u64 get_func_ip(void);

static __always_inline u64
get_addr(void *ctx, const bool is_kprobe, const bool has_get_func_ip) {
u64 ip;

if (has_get_func_ip) {
ip = bpf_get_func_ip(ctx); /* endbr has been handled in helper. */
} else {
ip = is_kprobe ? PT_REGS_IP((struct pt_regs *) ctx) : get_func_ip();
#ifdef bpf_target_x86
ip = correct_func_ip(ip);
ip -= is_kprobe; /* -1 always on x86 if kprobe. */
#endif
}

return ip;
}

static __always_inline int
kprobe_skb(struct sk_buff *skb, struct pt_regs *ctx, const bool has_get_func_ip, u64 *_stackid) {
struct event_t event = {};
Expand All @@ -521,7 +591,7 @@ kprobe_skb(struct sk_buff *skb, struct pt_regs *ctx, const bool has_get_func_ip,
return BPF_OK;

event.skb_addr = (u64) skb;
event.addr = has_get_func_ip ? bpf_get_func_ip(ctx) : PT_REGS_IP(ctx);
event.addr = get_addr(ctx, true, has_get_func_ip);
event.param_second = PT_REGS_PARM2(ctx);
event.param_third = PT_REGS_PARM3(ctx);
if (CFG.output_caller)
Expand Down Expand Up @@ -606,6 +676,90 @@ int BPF_PROG(fexit_skb_copy, struct sk_buff *old, gfp_t mask, struct sk_buff *ne
return BPF_OK;
}

#ifdef bpf_target_arm64
/* As R10 of bpf is not A64_FP, we need to detect the FP of trampoline
* by scanning the stacks of current bpf prog and the trampoline.
*
* Since commit 5d4fa9ec5643 ("bpf, arm64: Avoid blindly saving/restoring all callee-saved registers"),
* the number of callee-saved registers saved in the bpf prog prologue is
* dynamic, not fixed anymore.
*/
static __always_inline u64
detect_tramp_fp(void) {
static const int range_of_detection = 256;
u64 fp, r10;

r10 = get_tracing_fp(); /* R10 of current bpf prog */
for (int i = 6; i >= 0; i--) {
bpf_probe_read_kernel(&fp, sizeof(fp), (void *) (r10 + i * 16));
if (r10 < fp && fp < r10 + range_of_detection)
return fp;
}

return r10;
}
#endif

static __always_inline u64
get_func_ip(void) {
u64 fp_tramp, ip;

#if defined(bpf_target_x86)
static const int ip_offset = 5/* sizeof callq insn */;
u64 fp;
#elif defined(bpf_target_arm64)
/* Ref: commit b2ad54e1533e ("bpf, arm64: Implement bpf_arch_text_poke() for arm64") */
static const int ip_offset = 12/* sizeof 3 insns */;
#else
#error Unsupported architecture
#endif

/* Stack layout on x86:
* +-----+ FP of tracee's caller
* | ... |
* | rip | IP of tracee's caller
* | rip | IP of tracee
* | rbp | FP of tracee's caller
* +-----+ FP of trampoline
* | ... |
* | rip | IP of trampoline
* | rbp | FP of trampoline
* +-----+ FP of current prog
* | ... |
* +-----+ SP of current prog
*
* Stack layout on arm64:
* | r9 |
* | fp | FP of tracee's caller
* | lr | IP of tracee
* | fp | FP of tracee
* +------+ FP of trampoline <-------+
* | .. | padding |
* | .. | callee saved regs |
* | retv | retval of tracee |
* | regs | regs of tracee |
* | nreg | number of regs |
* | ip | IP of tracee if needed | possible range of
* | rctx | bpf_tramp_run_ctx | detection
* | lr | IP of trampoline |
* | fp | FP of trampoline <--------- detect it
* +------+ FP of current prog |
* | regs | callee saved regs |
* +------+ R10 of bpf prog <-------+
* | .. |
* +------+ SP of current prog
*/

#if defined(bpf_target_x86)
fp = get_tracing_fp(); /* FP of current prog */
bpf_probe_read_kernel(&fp_tramp, sizeof(fp_tramp), (void *)fp); /* FP of trampoline */
#elif defined(bpf_target_arm64)
fp_tramp = detect_tramp_fp(); /* FP of trampoline */
#endif
bpf_probe_read_kernel(&ip, sizeof(ip), (void *)(fp_tramp + 8)); /* IP of tracee */
return ip - ip_offset;
}

SEC("fentry/tc")
int BPF_PROG(fentry_tc, struct sk_buff *skb) {
struct event_t event = {};
Expand All @@ -614,7 +768,7 @@ int BPF_PROG(fentry_tc, struct sk_buff *skb) {
return BPF_OK;

event.skb_addr = (u64) skb;
event.addr = BPF_PROG_ADDR;
event.addr = get_addr(ctx, false, false);
event.type = EVENT_TYPE_TC;
bpf_map_push_elem(&events, &event, BPF_EXIST);

Expand Down Expand Up @@ -722,7 +876,7 @@ int BPF_PROG(fentry_xdp, struct xdp_buff *xdp) {
event.ts = bpf_ktime_get_ns();
event.cpu_id = bpf_get_smp_processor_id();
event.skb_addr = (u64) &xdp;
event.addr = BPF_PROG_ADDR;
event.addr = get_addr(ctx, false, false);
event.type = EVENT_TYPE_XDP;
bpf_map_push_elem(&events, &event, BPF_EXIST);

Expand Down
6 changes: 2 additions & 4 deletions internal/pwru/bpf_prog.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ import (

var errNotFound = errors.New("not found")

type BpfProgName2Addr map[string]uint64

func listBpfProgs(typ ebpf.ProgramType) ([]*ebpf.Program, error) {
var (
id ebpf.ProgramID
Expand Down Expand Up @@ -42,7 +40,7 @@ func listBpfProgs(typ ebpf.ProgramType) ([]*ebpf.Program, error) {
return progs, nil
}

func getBpfProgInfo(prog *ebpf.Program) (entryFuncName, progName, tag string, err error) {
func getBpfProgInfo(prog *ebpf.Program) (entryFuncName string, err error) {
info, err := prog.Info()
if err != nil {
err = fmt.Errorf("failed to get program info: %w", err)
Expand All @@ -67,7 +65,7 @@ func getBpfProgInfo(prog *ebpf.Program) (entryFuncName, progName, tag string, er
for _, insn := range insns {
sym := insn.Symbol()
if sym != "" {
return sym, info.Name, info.Tag, nil
return sym, nil
}
}

Expand Down
14 changes: 5 additions & 9 deletions internal/pwru/ksym.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,15 @@ func (a *Addr2Name) findNearestSym(ip uint64) string {
return strings.Replace(a.Addr2NameSlice[i-1].name, "\t", "", -1)
}

func ParseKallsyms(funcs Funcs, all bool) (Addr2Name, BpfProgName2Addr, error) {
func ParseKallsyms(funcs Funcs, all bool) (Addr2Name, error) {
a2n := Addr2Name{
Addr2NameMap: make(map[uint64]*ksym),
Name2AddrMap: make(map[string][]uintptr),
}
n2a := BpfProgName2Addr{}

file, err := os.Open("/proc/kallsyms")
if err != nil {
return a2n, n2a, err
return a2n, err
}
defer file.Close()

Expand All @@ -62,7 +61,7 @@ func ParseKallsyms(funcs Funcs, all bool) (Addr2Name, BpfProgName2Addr, error) {
if all || (funcs[name] > 0) {
addr, err := strconv.ParseUint(line[0], 16, 64)
if err != nil {
return a2n, n2a, err
return a2n, err
}
sym := &ksym{
addr: addr,
Expand All @@ -73,18 +72,15 @@ func ParseKallsyms(funcs Funcs, all bool) (Addr2Name, BpfProgName2Addr, error) {
if all {
a2n.Addr2NameSlice = append(a2n.Addr2NameSlice, sym)
}
if isBpfProg := strings.HasSuffix(name, "[bpf]"); isBpfProg {
n2a[name] = addr
}
}
}
if err := scanner.Err(); err != nil {
return a2n, n2a, err
return a2n, err
}

if all {
sort.Sort(byAddr(a2n.Addr2NameSlice))
}

return a2n, n2a, nil
return a2n, nil
}
23 changes: 1 addition & 22 deletions internal/pwru/output.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,19 +257,6 @@ func getExecName(pid int) string {
return execName
}

func getAddrByArch(event *Event, o *output) (addr uint64) {
switch runtime.GOARCH {
case "amd64":
addr = event.Addr
if !o.kprobeMulti && event.Type == eventTypeKprobe {
addr -= 1
}
case "arm64":
addr = event.Addr
}
return addr
}

func getTupleData(event *Event) (tupleData string) {
tupleData = fmt.Sprintf("%s:%d->%s:%d(%s)",
addrToStr(event.Tuple.L3Proto, event.Tuple.Saddr), byteorder.NetworkToHost16(event.Tuple.Sport),
Expand Down Expand Up @@ -350,11 +337,6 @@ func getOutFuncName(o *output, event *Event, addr uint64) string {

if ksym, ok := o.addr2name.Addr2NameMap[addr]; ok {
funcName = ksym.name
} else if ksym, ok := o.addr2name.Addr2NameMap[addr-4]; runtime.GOARCH == "amd64" && ok {
// Assume that function has ENDBR in its prelude (enabled by CONFIG_X86_KERNEL_IBT).
// See https://lore.kernel.org/bpf/20220811091526.172610-5-jolsa@kernel.org/
// for more ctx.
funcName = ksym.name
} else {
funcName = fmt.Sprintf("0x%x", addr)
}
Expand Down Expand Up @@ -411,10 +393,7 @@ func (o *output) Print(event *Event) {
ts = getRelativeTs(event, o)
}

// XXX: not sure why the -1 offset is needed on x86 but not on arm64
addr := getAddrByArch(event, o)

outFuncName := getOutFuncName(o, event, addr)
outFuncName := getOutFuncName(o, event, event.Addr)

fmt.Fprintf(o.writer, "%-18s %-3s %-16s", fmt.Sprintf("%#x", event.SkbAddr),
fmt.Sprintf("%d", event.CPU), fmt.Sprintf("%s", execName))
Expand Down
Loading
Loading