System Monitoring¶

📊 Learning Objectives

Monitor system calls and events
Track process execution and lifecycle
Monitor file system operations
Build real-time monitoring tools
Collect and analyze system metrics

Introduction¶

eBPF excels at system monitoring because it can observe kernel events with minimal overhead. This chapter shows how to build monitoring tools for system calls, processes, and file operations.

Monitoring Advantages

Low overhead: Runs in kernel space
Real-time: Immediate event capture
Comprehensive: Access to all kernel events
Safe: Verified before execution

Monitoring System Calls¶

Basic System Call Tracer¶

#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>

struct {
    __uint(type, BPF_MAP_TYPE_HASH);
    __uint(max_entries, 1024);
    __type(key, u32);
    __type(value, u64);
} syscall_count SEC(".maps");

SEC("tracepoint/syscalls/sys_enter_openat")
int trace_openat(struct trace_event_raw_sys_enter *ctx) {
    u32 pid = bpf_get_current_pid_tgid() >> 32;
    u64 *count, zero = 0, one = 1;

    count = bpf_map_lookup_elem(&syscall_count, &pid);
    if (!count) {
        bpf_map_update_elem(&syscall_count, &pid, &zero, BPF_NOEXIST);
        count = bpf_map_lookup_elem(&syscall_count, &pid);
    }
    if (count) {
        __sync_fetch_and_add(count, 1);
    }

    return 0;
}

char LICENSE[] SEC("license") = "GPL";

Advanced: System Call with Arguments¶

SEC("tracepoint/syscalls/sys_enter_openat")
int trace_openat_args(struct trace_event_raw_sys_enter *ctx) {
    u32 pid = bpf_get_current_pid_tgid() >> 32;
    char comm[16];
    char filename[256] = {};

    bpf_get_current_comm(comm, sizeof(comm));

    // Read filename from syscall arguments
    if (ctx->args[1]) {
        bpf_probe_read_user_str(filename, sizeof(filename), 
                                (char *)ctx->args[1]);
    }

    bpf_printk("PID: %d, Comm: %s, File: %s", pid, comm, filename);
    return 0;
}

Process Monitoring¶

Process Execution Monitor¶

struct {
    __uint(type, BPF_MAP_TYPE_HASH);
    __uint(max_entries, 10240);
    __type(key, u32);
    __type(value, struct {
        char comm[16];
        u64 start_time;
        u32 pid;
    });
} processes SEC(".maps");

SEC("tracepoint/sched/sched_process_exec")
int trace_exec(struct trace_event_raw_sched_process_exec *ctx) {
    u32 pid = bpf_get_current_pid_tgid() >> 32;
    struct {
        char comm[16];
        u64 start_time;
        u32 pid;
    } proc = {};

    bpf_get_current_comm(proc.comm, sizeof(proc.comm));
    proc.start_time = bpf_ktime_get_ns();
    proc.pid = pid;

    bpf_map_update_elem(&processes, &pid, &proc, BPF_ANY);
    bpf_printk("Process started: %s (PID: %d)", proc.comm, pid);

    return 0;
}

SEC("tracepoint/sched/sched_process_exit")
int trace_exit(struct trace_event_raw_sched_process_exit *ctx) {
    u32 pid = ctx->pid;
    struct {
        char comm[16];
        u64 start_time;
        u32 pid;
    } *proc;

    proc = bpf_map_lookup_elem(&processes, &pid);
    if (proc) {
        u64 duration = bpf_ktime_get_ns() - proc->start_time;
        bpf_printk("Process exited: %s (PID: %d, Duration: %llu ns)", 
                   proc->comm, pid, duration);
        bpf_map_delete_elem(&processes, &pid);
    }

    return 0;
}

File System Monitoring¶

File Open Monitor¶

struct file_event {
    u32 pid;
    u32 uid;
    char comm[16];
    char filename[256];
    u64 timestamp;
};

struct {
    __uint(type, BPF_MAP_TYPE_RINGBUF);
    __uint(max_entries, 256 * 1024);
} file_events SEC(".maps");

SEC("tracepoint/syscalls/sys_enter_openat")
int trace_file_open(struct trace_event_raw_sys_enter *ctx) {
    struct file_event *event;

    event = bpf_ringbuf_reserve(&file_events, sizeof(*event), 0);
    if (!event) {
        return 0;
    }

    event->pid = bpf_get_current_pid_tgid() >> 32;
    event->uid = bpf_get_current_uid_gid() >> 32;
    event->timestamp = bpf_ktime_get_ns();

    bpf_get_current_comm(event->comm, sizeof(event->comm));

    if (ctx->args[1]) {
        bpf_probe_read_user_str(event->filename, sizeof(event->filename),
                                (char *)ctx->args[1]);
    }

    bpf_ringbuf_submit(event, 0);
    return 0;
}

CPU and Performance Monitoring¶

CPU Usage Monitor¶

struct {
    __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
    __uint(max_entries, 1024);
    __type(key, u32);
    __type(value, u64);
} cpu_time SEC(".maps");

SEC("tracepoint/sched/sched_switch")
int trace_sched_switch(struct trace_event_raw_sched_switch *ctx) {
    u32 prev_pid = ctx->prev_pid;
    u32 next_pid = ctx->next_pid;
    u64 *time, zero = 0;
    u64 now = bpf_ktime_get_ns();

    // Account time for previous process
    if (prev_pid) {
        time = bpf_map_lookup_elem(&cpu_time, &prev_pid);
        if (!time) {
            bpf_map_update_elem(&cpu_time, &prev_pid, &zero, BPF_NOEXIST);
            time = bpf_map_lookup_elem(&cpu_time, &prev_pid);
        }
        if (time) {
            __sync_fetch_and_add(time, now - ctx->prev_state);
        }
    }

    return 0;
}

Memory Monitoring¶

Memory Allocation Tracker¶

struct {
    __uint(type, BPF_MAP_TYPE_HASH);
    __uint(max_entries, 1024);
    __type(key, u32);
    __type(value, u64);
} memory_allocated SEC(".maps");

SEC("kprobe/kmalloc")
int trace_kmalloc(struct pt_regs *ctx) {
    u32 pid = bpf_get_current_pid_tgid() >> 32;
    u64 size = PT_REGS_PARM1(ctx);
    u64 *total, zero = 0;

    total = bpf_map_lookup_elem(&memory_allocated, &pid);
    if (!total) {
        bpf_map_update_elem(&memory_allocated, &pid, &zero, BPF_NOEXIST);
        total = bpf_map_lookup_elem(&memory_allocated, &pid);
    }
    if (total) {
        __sync_fetch_and_add(total, size);
    }

    return 0;
}

Network Monitoring¶

Socket Monitor¶

struct socket_event {
    u32 pid;
    u32 uid;
    char comm[16];
    u64 timestamp;
};

struct {
    __uint(type, BPF_MAP_TYPE_RINGBUF);
    __uint(max_entries, 256 * 1024);
} socket_events SEC(".maps");

SEC("tracepoint/syscalls/sys_enter_socket")
int trace_socket(struct trace_event_raw_sys_enter *ctx) {
    struct socket_event *event;

    event = bpf_ringbuf_reserve(&socket_events, sizeof(*event), 0);
    if (!event) {
        return 0;
    }

    event->pid = bpf_get_current_pid_tgid() >> 32;
    event->uid = bpf_get_current_uid_gid() >> 32;
    event->timestamp = bpf_ktime_get_ns();
    bpf_get_current_comm(event->comm, sizeof(event->comm));

    bpf_ringbuf_submit(event, 0);
    return 0;
}

Best Practices¶

Monitoring Best Practices

Use appropriate map types: Per-CPU maps for high-frequency events
Ring buffers for events: Lower overhead than perf buffers
Filter early: Reduce data collection overhead
Aggregate in kernel: Minimize user space processing
Handle errors: Check all map operations
Use atomic operations: For concurrent updates

Summary¶

Key Takeaways

eBPF provides low-overhead system monitoring
Tracepoints are stable and recommended
Use ring buffers for event streaming
Per-CPU maps improve performance
Aggregate data in kernel when possible

Next Steps: - Learn network tracing (Chapter 8) - Build security tools (Chapter 9) - Profile applications (Chapter 10)

Previous: Helper Functions
Next: Network Tracing