--[[ Copyright 2016 Marek Vavrusa Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ]] local ffi = require('ffi') local bit = require('bit') local has_syscall, S = pcall(require, 'syscall') local M = {} ffi.cdef [[ struct bpf { /* Instruction classes */ static const int LD = 0x00; static const int LDX = 0x01; static const int ST = 0x02; static const int STX = 0x03; static const int ALU = 0x04; static const int JMP = 0x05; static const int ALU64 = 0x07; /* ld/ldx fields */ static const int W = 0x00; static const int H = 0x08; static const int B = 0x10; static const int ABS = 0x20; static const int IND = 0x40; static const int MEM = 0x60; static const int LEN = 0x80; static const int MSH = 0xa0; /* alu/jmp fields */ static const int ADD = 0x00; static const int SUB = 0x10; static const int MUL = 0x20; static const int DIV = 0x30; static const int OR = 0x40; static const int AND = 0x50; static const int LSH = 0x60; static const int RSH = 0x70; static const int NEG = 0x80; static const int MOD = 0x90; static const int XOR = 0xa0; static const int JA = 0x00; static const int JEQ = 0x10; static const int JGT = 0x20; static const int JGE = 0x30; static const int JSET = 0x40; static const int K = 0x00; static const int X = 0x08; static const int JNE = 0x50; /* jump != */ static const int JSGT = 0x60; /* SGT is signed '>', GT in x86 */ static const int JSGE = 0x70; /* SGE is signed '>=', GE in x86 */ static const int CALL = 0x80; /* function call */ static const int EXIT = 0x90; /* function return */ /* ld/ldx fields */ static const int DW = 0x18; /* double word */ static const int XADD = 0xc0; /* exclusive add */ /* alu/jmp fields */ static const int MOV = 0xb0; /* mov reg to reg */ static const int ARSH = 0xc0; /* sign extending arithmetic shift right */ /* change endianness of a register */ static const int END = 0xd0; /* flags for endianness conversion: */ static const int TO_LE = 0x00; /* convert to little-endian */ static const int TO_BE = 0x08; /* convert to big-endian */ /* misc */ static const int PSEUDO_MAP_FD = 0x01; /* helper functions */ static const int F_CURRENT_CPU = 0xffffffff; static const int F_USER_STACK = 1 << 8; static const int F_FAST_STACK_CMP = 1 << 9; static const int F_REUSE_STACKID = 1 << 10; /* special offsets for ancillary data */ static const int NET_OFF = -0x100000; static const int LL_OFF = -0x200000; }; /* eBPF commands */ struct bpf_cmd { static const int MAP_CREATE = 0; static const int MAP_LOOKUP_ELEM = 1; static const int MAP_UPDATE_ELEM = 2; static const int MAP_DELETE_ELEM = 3; static const int MAP_GET_NEXT_KEY = 4; static const int PROG_LOAD = 5; static const int OBJ_PIN = 6; static const int OBJ_GET = 7; }; /* eBPF helpers */ struct bpf_func_id { static const int unspec = 0; static const int map_lookup_elem = 1; static const int map_update_elem = 2; static const int map_delete_elem = 3; static const int probe_read = 4; static const int ktime_get_ns = 5; static const int trace_printk = 6; static const int get_prandom_u32 = 7; static const int get_smp_processor_id = 8; static const int skb_store_bytes = 9; static const int l3_csum_replace = 10; static const int l4_csum_replace = 11; static const int tail_call = 12; static const int clone_redirect = 13; static const int get_current_pid_tgid = 14; static const int get_current_uid_gid = 15; static const int get_current_comm = 16; static const int get_cgroup_classid = 17; static const int skb_vlan_push = 18; static const int skb_vlan_pop = 19; static const int skb_get_tunnel_key = 20; static const int skb_set_tunnel_key = 21; static const int perf_event_read = 22; static const int redirect = 23; static const int get_route_realm = 24; static const int perf_event_output = 25; static const int skb_load_bytes = 26; static const int get_stackid = 27; }; /* BPF_MAP_STACK_TRACE structures and constants */ static const int BPF_MAX_STACK_DEPTH = 127; struct bpf_stacktrace { uint64_t ip[BPF_MAX_STACK_DEPTH]; }; ]] -- Compatibility: ljsyscall doesn't have support for BPF syscall if not has_syscall or not S.bpf then error("ljsyscall doesn't support bpf(), must be updated") else local strflag = require('syscall.helpers').strflag -- Compatibility: ljsyscall<=0.12 if not S.c.BPF_MAP.LRU_HASH then S.c.BPF_MAP = strflag { UNSPEC = 0, HASH = 1, ARRAY = 2, PROG_ARRAY = 3, PERF_EVENT_ARRAY = 4, PERCPU_HASH = 5, PERCPU_ARRAY = 6, STACK_TRACE = 7, CGROUP_ARRAY = 8, LRU_HASH = 9, LRU_PERCPU_HASH = 10, LPM_TRIE = 11, ARRAY_OF_MAPS = 12, HASH_OF_MAPS = 13, DEVMAP = 14, SOCKMAP = 15, CPUMAP = 16, } end if not S.c.BPF_PROG.TRACEPOINT then S.c.BPF_PROG = strflag { UNSPEC = 0, SOCKET_FILTER = 1, KPROBE = 2, SCHED_CLS = 3, SCHED_ACT = 4, TRACEPOINT = 5, XDP = 6, PERF_EVENT = 7, CGROUP_SKB = 8, CGROUP_SOCK = 9, LWT_IN = 10, LWT_OUT = 11, LWT_XMIT = 12, SOCK_OPS = 13, SK_SKB = 14, CGROUP_DEVICE = 15, SK_MSG = 16, RAW_TRACEPOINT = 17, CGROUP_SOCK_ADDR = 18, } end end -- Compatibility: metatype for stacktrace local function stacktrace_iter(t, i) i = i + 1 if i < #t and t.ip[i] > 0 then return i, t.ip[i] end end ffi.metatype('struct bpf_stacktrace', { __len = function (t) return ffi.sizeof(t.ip) / ffi.sizeof(t.ip[0]) end, __ipairs = function (t) return stacktrace_iter, t, -1 end, }) -- Reflect cdata type function M.typename(v) if not v or type(v) ~= 'cdata' then return nil end return string.match(tostring(ffi.typeof(v)), '<([^>]+)') end -- Reflect if cdata type can be pointer (accepts array or pointer) function M.isptr(v, noarray) local ctname = M.typename(v) if ctname then ctname = string.sub(ctname, -1) ctname = ctname == '*' or (not noarray and ctname == ']') end return ctname end -- Return true if variable is a non-nil constant that can be used as immediate value -- e.g. result of KSHORT and KNUM function M.isimmconst(v) return (type(v.const) == 'number' and not ffi.istype(v.type, ffi.typeof('void'))) or type(v.const) == 'cdata' and ffi.istype(v.type, ffi.typeof('uint64_t')) -- Lua numbers are at most 52 bits or type(v.const) == 'cdata' and ffi.istype(v.type, ffi.typeof('int64_t')) end function M.osversion() -- We have no better way to extract current kernel hex-string other -- than parsing headers, compiling a helper function or reading /proc local ver_str, count = S.sysctl('kernel.version'):match('%d+.%d+.%d+'), 2 if not ver_str then -- kernel.version is freeform, fallback to kernel.osrelease ver_str = S.sysctl('kernel.osrelease'):match('%d+.%d+.%d+') end local version = 0 for i in ver_str:gmatch('%d+') do -- Convert 'X.Y.Z' to 0xXXYYZZ version = bit.bor(version, bit.lshift(tonumber(i), 8*count)) count = count - 1 end return version end function M.event_reader(reader, event_type) -- Caller can specify event message binary format if event_type then assert(type(event_type) == 'string' and ffi.typeof(event_type), 'not a valid type for event reader') event_type = ffi.typeof(event_type .. '*') -- Convert type to pointer-to-type end -- Wrap reader in interface that can interpret read event messages return setmetatable({reader=reader,type=event_type}, {__index = { block = function(_ --[[self]]) return S.select { readfds = {reader.fd} } end, next = function(_ --[[self]], k) local len, ev = reader:next(k) -- Filter out only sample frames while ev and ev.type ~= S.c.PERF_RECORD.SAMPLE do len, ev = reader:next(len) end if ev and event_type then -- The perf event reader returns framed data with header and variable length -- This is going skip the frame header and cast data to given type ev = ffi.cast(event_type, ffi.cast('char *', ev) + ffi.sizeof('struct perf_event_header') + ffi.sizeof('uint32_t')) end return len, ev end, read = function(self) return self.next, self, nil end, }}) end function M.tracepoint_type(tp) -- Read tracepoint format string local fp = assert(io.open('/sys/kernel/debug/tracing/events/'..tp..'/format', 'r')) local fmt = fp:read '*a' fp:close() -- Parse struct fields local fields = {} for f in fmt:gmatch 'field:([^;]+;)' do table.insert(fields, f) end return string.format('struct { %s }', table.concat(fields)) end return M