// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) // Copyright (c) 2020 Wenbo Zhang // // Based on biosnoop(8) from BCC by Brendan Gregg. // 29-Jun-2020 Wenbo Zhang Created this. #include #include #include #include #include #include #include #include #include "blk_types.h" #include "biosnoop.h" #include "biosnoop.skel.h" #include "trace_helpers.h" #define PERF_BUFFER_PAGES 16 #define PERF_POLL_TIMEOUT_MS 100 static volatile sig_atomic_t exiting = 0; static struct env { char *disk; int duration; bool timestamp; bool queued; bool verbose; } env = {}; static volatile __u64 start_ts; const char *argp_program_version = "biosnoop 0.1"; const char *argp_program_bug_address = "https://github.com/iovisor/bcc/tree/master/libbpf-tools"; const char argp_program_doc[] = "Trace block I/O.\n" "\n" "USAGE: biosnoop [--help] [-d DISK] [-Q]\n" "\n" "EXAMPLES:\n" " biosnoop # trace all block I/O\n" " biosnoop -Q # include OS queued time in I/O time\n" " biosnoop 10 # trace for 10 seconds only\n" " biosnoop -d sdc # trace sdc only\n"; static const struct argp_option opts[] = { { "queued", 'Q', NULL, 0, "Include OS queued time in I/O time" }, { "disk", 'd', "DISK", 0, "Trace this disk only" }, { "verbose", 'v', NULL, 0, "Verbose debug output" }, { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" }, {}, }; static error_t parse_arg(int key, char *arg, struct argp_state *state) { static int pos_args; switch (key) { case 'h': argp_state_help(state, stderr, ARGP_HELP_STD_HELP); break; case 'v': env.verbose = true; break; case 'Q': env.queued = true; break; case 'd': env.disk = arg; if (strlen(arg) + 1 > DISK_NAME_LEN) { fprintf(stderr, "invaild disk name: too long\n"); argp_usage(state); } break; case ARGP_KEY_ARG: if (pos_args++) { fprintf(stderr, "unrecognized positional argument: %s\n", arg); argp_usage(state); } errno = 0; env.duration = strtoll(arg, NULL, 10); if (errno || env.duration <= 0) { fprintf(stderr, "invalid delay (in us): %s\n", arg); argp_usage(state); } break; default: return ARGP_ERR_UNKNOWN; } return 0; } int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) { if (level == LIBBPF_DEBUG && !env.verbose) return 0; return vfprintf(stderr, format, args); } static void sig_int(int signo) { exiting = 1; } static void blk_fill_rwbs(char *rwbs, unsigned int op) { int i = 0; if (op & REQ_PREFLUSH) rwbs[i++] = 'F'; switch (op & REQ_OP_MASK) { case REQ_OP_WRITE: case REQ_OP_WRITE_SAME: rwbs[i++] = 'W'; break; case REQ_OP_DISCARD: rwbs[i++] = 'D'; break; case REQ_OP_SECURE_ERASE: rwbs[i++] = 'D'; rwbs[i++] = 'E'; break; case REQ_OP_FLUSH: rwbs[i++] = 'F'; break; case REQ_OP_READ: rwbs[i++] = 'R'; break; default: rwbs[i++] = 'N'; } if (op & REQ_FUA) rwbs[i++] = 'F'; if (op & REQ_RAHEAD) rwbs[i++] = 'A'; if (op & REQ_SYNC) rwbs[i++] = 'S'; if (op & REQ_META) rwbs[i++] = 'M'; rwbs[i] = '\0'; } static struct partitions *partitions; void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) { const struct partition *partition; const struct event *e = data; char rwbs[RWBS_LEN]; if (!start_ts) start_ts = e->ts; blk_fill_rwbs(rwbs, e->cmd_flags); partition = partitions__get_by_dev(partitions, e->dev); printf("%-11.6f %-14.14s %-6d %-7s %-4s %-10lld %-7d ", (e->ts - start_ts) / 1000000000.0, e->comm, e->pid, partition ? partition->name : "Unknown", rwbs, e->sector, e->len); if (env.queued) printf("%7.3f ", e->qdelta != -1 ? e->qdelta / 1000000.0 : -1); printf("%7.3f\n", e->delta / 1000000.0); } void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt) { fprintf(stderr, "lost %llu events on CPU #%d\n", lost_cnt, cpu); } int main(int argc, char **argv) { const struct partition *partition; static const struct argp argp = { .options = opts, .parser = parse_arg, .doc = argp_program_doc, }; struct perf_buffer_opts pb_opts; struct perf_buffer *pb = NULL; struct ksyms *ksyms = NULL; struct biosnoop_bpf *obj; __u64 time_end = 0; int err; err = argp_parse(&argp, argc, argv, 0, NULL, NULL); if (err) return err; libbpf_set_print(libbpf_print_fn); err = bump_memlock_rlimit(); if (err) { fprintf(stderr, "failed to increase rlimit: %d\n", err); return 1; } obj = biosnoop_bpf__open(); if (!obj) { fprintf(stderr, "failed to open BPF object\n"); return 1; } partitions = partitions__load(); if (!partitions) { fprintf(stderr, "failed to load partitions info\n"); goto cleanup; } /* initialize global data (filtering options) */ if (env.disk) { partition = partitions__get_by_name(partitions, env.disk); if (!partition) { fprintf(stderr, "invaild partition name: not exist\n"); goto cleanup; } } obj->rodata->targ_queued = env.queued; err = biosnoop_bpf__load(obj); if (err) { fprintf(stderr, "failed to load BPF object: %d\n", err); goto cleanup; } obj->links.blk_account_io_start = bpf_program__attach(obj->progs.blk_account_io_start); err = libbpf_get_error(obj->links.blk_account_io_start); if (err) { fprintf(stderr, "failed to attach blk_account_io_start: %s\n", strerror(err)); goto cleanup; } ksyms = ksyms__load(); if (!ksyms) { fprintf(stderr, "failed to load kallsyms\n"); goto cleanup; } if (ksyms__get_symbol(ksyms, "blk_account_io_merge_bio")) { obj->links.blk_account_io_merge_bio = bpf_program__attach(obj->progs.blk_account_io_merge_bio); err = libbpf_get_error(obj->links.blk_account_io_merge_bio); if (err) { fprintf(stderr, "failed to attach " "blk_account_io_merge_bio: %s\n", strerror(err)); goto cleanup; } } if (env.queued) { obj->links.block_rq_insert = bpf_program__attach(obj->progs.block_rq_insert); err = libbpf_get_error(obj->links.block_rq_insert); if (err) { fprintf(stderr, "failed to attach block_rq_insert: %s\n", strerror(err)); goto cleanup; } } obj->links.block_rq_issue = bpf_program__attach(obj->progs.block_rq_issue); err = libbpf_get_error(obj->links.block_rq_issue); if (err) { fprintf(stderr, "failed to attach block_rq_issue: %s\n", strerror(err)); goto cleanup; } obj->links.block_rq_complete = bpf_program__attach(obj->progs.block_rq_complete); err = libbpf_get_error(obj->links.block_rq_complete); if (err) { fprintf(stderr, "failed to attach block_rq_complete: %s\n", strerror(err)); goto cleanup; } pb_opts.sample_cb = handle_event; pb_opts.lost_cb = handle_lost_events; pb = perf_buffer__new(bpf_map__fd(obj->maps.events), PERF_BUFFER_PAGES, &pb_opts); err = libbpf_get_error(pb); if (err) { pb = NULL; fprintf(stderr, "failed to open perf buffer: %d\n", err); goto cleanup; } printf("%-11s %-14s %-6s %-7s %-4s %-10s %-7s ", "TIME(s)", "COMM", "PID", "DISK", "T", "SECTOR", "BYTES"); if (env.queued) printf("%7s ", "QUE(ms)"); printf("%7s\n", "LAT(ms)"); /* setup duration */ if (env.duration) time_end = get_ktime_ns() + env.duration * NSEC_PER_SEC; if (signal(SIGINT, sig_int) == SIG_ERR) { fprintf(stderr, "can't set signal handler: %s\n", strerror(errno)); err = 1; goto cleanup; } /* main: poll */ while (!exiting) { err = perf_buffer__poll(pb, PERF_POLL_TIMEOUT_MS); if (err < 0 && errno != EINTR) { fprintf(stderr, "error polling perf buffer: %s\n", strerror(errno)); goto cleanup; } if (env.duration && get_ktime_ns() > time_end) goto cleanup; /* reset err to return 0 if exiting */ err = 0; } cleanup: perf_buffer__free(pb); biosnoop_bpf__destroy(obj); ksyms__free(ksyms); partitions__free(partitions); return err != 0; }