/* * Copyright (c) Facebook, Inc. * Licensed under the Apache License, Version 2.0 (the "License") */ #include #include #include #include #include #include #include #include #include #include #include #include "PyPerfLoggingHelper.h" #include "PyPerfUtil.h" #include "bcc_elf.h" #include "bcc_proc.h" #include "bcc_syms.h" namespace ebpf { namespace pyperf { extern OffsetConfig kPy36OffsetConfig; extern std::string PYPERF_BPF_PROGRAM; const static int kPerfBufSizePages = 32; const static std::string kPidCfgTableName("pid_config"); const static std::string kProgsTableName("progs"); const static std::string kSamplePerfBufName("events"); const static std::string kOnEventFuncName("on_event"); const static std::string kPythonStackFuncName("read_python_stack"); const static std::string kPythonStackProgIdxFlag("-DPYTHON_STACK_PROG_IDX="); const static int kPythonStackProgIdx = 0; const static std::string kNumCpusFlag("-DNUM_CPUS="); const static std::string kSymbolsHashSizeFlag("-D__SYMBOLS_SIZE__="); const static int kSymbolsHashSize = 16384; namespace { bool getRunningPids(std::vector& output) { auto dir = ::opendir("/proc/"); if (!dir) { std::fprintf(stderr, "Open /proc failed: %d\n", errno); return false; } dirent* result = nullptr; do { if ((result = readdir(dir))) { std::string basename = result->d_name; if (basename == "." || basename == "..") { continue; } std::string fullpath = "/proc/" + basename; struct stat st; if (::stat(fullpath.c_str(), &st) != 0 || !S_ISDIR(st.st_mode)) { continue; } try { auto pid = std::stoi(basename); output.push_back(pid); } catch (const std::exception& e) { continue; } } } while (result); if (::closedir(dir) == -1) { std::fprintf(stderr, "Close /proc failed: %d\n", errno); return false; } return true; } typedef struct { int pid; bool found; uint64_t st; uint64_t en; } FindPythonPathHelper; const static std::string kPy36LibName = "libpython3.6"; int findPythonPathCallback(mod_info *mod, int, void* payload) { auto helper = static_cast(payload); std::string file = mod->name; auto pos = file.rfind("/"); if (pos != std::string::npos) { file = file.substr(pos + 1); } if (file.find(kPy36LibName) == 0) { logInfo(1, "Found Python library %s loaded at %lx-%lx for PID %d\n", mod->name, mod->start_addr, mod->end_addr, helper->pid); helper->found = true; helper->st = mod->start_addr; helper->en = mod->end_addr; return -1; } return 0; } bool allAddrFound(const PidData& data) { return (data.current_state_addr > 0) && (data.tls_key_addr > 0) && (data.gil_locked_addr > 0) && (data.gil_last_holder_addr > 0); } int getAddrOfPythonBinaryCallback(const char* name, uint64_t addr, uint64_t, void* payload) { PidData& data = *static_cast(payload); auto checkAndGetAddr = [&](uintptr_t& targetAddr, const char* targetName) { if (targetAddr == 0 && std::strcmp(name, targetName) == 0) { targetAddr = addr; } }; checkAndGetAddr(data.tls_key_addr, "autoTLSkey"); checkAndGetAddr(data.current_state_addr, "_PyThreadState_Current"); checkAndGetAddr(data.gil_locked_addr, "gil_locked"); checkAndGetAddr(data.gil_last_holder_addr, "gil_last_holder"); if (allAddrFound(data)) { return -1; } return 0; } bool getAddrOfPythonBinary(const std::string& path, PidData& data) { std::memset(&data, 0, sizeof(data)); struct bcc_symbol_option option = {.use_debug_file = 0, .check_debug_file_crc = 0, .lazy_symbolize = 1, .use_symbol_type = (1 << STT_OBJECT)}; bcc_elf_foreach_sym(path.c_str(), &getAddrOfPythonBinaryCallback, &option, &data); return allAddrFound(data); } } // namespace void handleSampleCallback(void* cb_cookie, void* raw_data, int data_size) { auto profiler = static_cast(cb_cookie); profiler->handleSample(raw_data, data_size); } void handleLostSamplesCallback(void* cb_cookie, uint64_t lost_cnt) { auto profiler = static_cast(cb_cookie); profiler->handleLostSamples(lost_cnt); } PyPerfUtil::PyPerfResult PyPerfUtil::init() { std::vector cflags; cflags.emplace_back(kNumCpusFlag + std::to_string(::sysconf(_SC_NPROCESSORS_ONLN))); cflags.emplace_back(kSymbolsHashSizeFlag + std::to_string(kSymbolsHashSize)); cflags.emplace_back(kPythonStackProgIdxFlag + std::to_string(kPythonStackProgIdx)); auto initRes = bpf_.init(PYPERF_BPF_PROGRAM, cflags); if (initRes.code() != 0) { std::fprintf(stderr, "Failed to compiled PyPerf BPF programs: %s\n", initRes.msg().c_str()); return PyPerfResult::INIT_FAIL; } int progFd = -1; auto loadRes = bpf_.load_func(kPythonStackFuncName, BPF_PROG_TYPE_PERF_EVENT, progFd); if (loadRes.code() != 0) { std::fprintf(stderr, "Failed to load BPF program %s: %s\n", kPythonStackFuncName.c_str(), loadRes.msg().c_str()); return PyPerfResult::INIT_FAIL; } auto progTable = bpf_.get_prog_table(kProgsTableName); auto updateRes = progTable.update_value(kPythonStackProgIdx, progFd); if (updateRes.code() != 0) { std::fprintf(stderr, "Failed to set BPF program %s FD %d to program table: %s\n", kPythonStackFuncName.c_str(), progFd, updateRes.msg().c_str()); return PyPerfResult::INIT_FAIL; } std::vector pids; if (!getRunningPids(pids)) { std::fprintf(stderr, "Failed getting running Processes\n"); return PyPerfResult::INIT_FAIL; } // Populate config for each Python Process auto pid_hash = bpf_.get_hash_table(kPidCfgTableName); PidData pidData; for (const auto pid : pids) { if (!tryTargetPid(pid, pidData)) { // Not a Python Process continue; } pid_hash.update_value(pid, pidData); } // Open perf buffer auto openRes = bpf_.open_perf_buffer( kSamplePerfBufName, &handleSampleCallback, &handleLostSamplesCallback, this, kPerfBufSizePages); if (openRes.code() != 0) { std::fprintf(stderr, "Unable to open Perf Buffer: %s\n", openRes.msg().c_str()); return PyPerfResult::PERF_BUF_OPEN_FAIL; } initCompleted_ = true; return PyPerfResult::SUCCESS; } void PyPerfUtil::handleSample(const void* data, int dataSize) { const Event* raw = static_cast(data); samples_.emplace_back(raw, dataSize); totalSamples_++; } void PyPerfUtil::handleLostSamples(int lostCnt) { lostSamples_ += lostCnt; } PyPerfUtil::PyPerfResult PyPerfUtil::profile(int64_t sampleRate, int64_t durationMs, PyPerfSampleProcessor* processor) { if (!initCompleted_) { std::fprintf(stderr, "PyPerfUtil::init not invoked or failed\n"); return PyPerfResult::NO_INIT; } // Attach to CPU cycles auto attachRes = bpf_.attach_perf_event(0, 0, kOnEventFuncName, sampleRate, 0); if (attachRes.code() != 0) { std::fprintf(stderr, "Attach to CPU cycles event failed: %s\n", attachRes.msg().c_str()); return PyPerfResult::EVENT_ATTACH_FAIL; } logInfo(2, "Attached to profiling event\n"); // Get Perf Buffer and poll in a loop for a given duration auto perfBuffer = bpf_.get_perf_buffer(kSamplePerfBufName); if (!perfBuffer) { std::fprintf(stderr, "Failed to get Perf Buffer: %s\n", kSamplePerfBufName.c_str()); return PyPerfResult::PERF_BUF_OPEN_FAIL; } logInfo(2, "Started polling Perf Buffer\n"); auto start = std::chrono::steady_clock::now(); while (std::chrono::steady_clock::now() < start + std::chrono::milliseconds(durationMs)) { perfBuffer->poll(50 /* 50ms timeout */); } logInfo(2, "Profiling duration finished\n"); // Detach the event auto detachRes = bpf_.detach_perf_event(0, 0); if (detachRes.code() != 0) { std::fprintf(stderr, "Detach CPU cycles event failed: %s\n", detachRes.msg().c_str()); return PyPerfResult::EVENT_DETACH_FAIL; } logInfo(2, "Detached from profiling event\n"); // Drain remaining samples logInfo(2, "Draining remaining samples\n"); while (perfBuffer->poll(0) > 0) { } logInfo(2, "Finished draining remaining samples\n"); processor->processSamples(samples_, this); return PyPerfResult::SUCCESS; } std::unordered_map PyPerfUtil::getSymbolMapping() { auto symbolTable = bpf_.get_hash_table("symbols"); std::unordered_map symbols; for (auto& x : symbolTable.get_table_offline()) { auto symbolName = getSymbolName(x.first); logInfo(2, "Symbol ID %d is %s\n", x.second, symbolName.c_str()); symbols.emplace(x.second, std::move(symbolName)); } logInfo(1, "Total %d unique Python symbols\n", symbols.size()); return symbols; } std::string PyPerfUtil::getSymbolName(Symbol& sym) const { std::string nameStr = std::string(sym.name).substr(0, FUNCTION_NAME_LEN); std::string classStr = std::string(sym.classname).substr(0, CLASS_NAME_LEN); if (classStr.size() > 0) { nameStr = classStr + "." + nameStr; } std::string file = std::string(sym.file).substr(0, FILE_NAME_LEN); if (file.empty()) { return nameStr; } if (file[0] == '/') { file = file.substr(1); } if (file.find("./") == 0) { file = file.substr(2); } if (file.find(".py", file.size() - 3) == (file.size() - 3)) { file = file.substr(0, file.size() - 3); } std::replace(file.begin(), file.end(), '/', '.'); return file + "." + nameStr; } bool PyPerfUtil::tryTargetPid(int pid, PidData& data) { FindPythonPathHelper helper{pid, false, 0, 0}; bcc_procutils_each_module(pid, &findPythonPathCallback, &helper); if (!helper.found) { logInfo(2, "PID %d does not contain Python library\n", pid); return false; } char path[256]; int res = std::snprintf(path, sizeof(path), "/proc/%d/map_files/%lx-%lx", pid, helper.st, helper.en); if (res < 0 || size_t(res) >= sizeof(path)) { return false; } if (!getAddrOfPythonBinary(path, data)) { std::fprintf( stderr, "Failed getting addresses in potential Python library in PID %d\n", pid); return false; } data.offsets = kPy36OffsetConfig; data.current_state_addr += helper.st; logInfo(2, "PID %d has _PyThreadState_Current at %lx\n", pid, data.current_state_addr); data.tls_key_addr += helper.st; logInfo(2, "PID %d has autoTLSKey at %lx\n", pid, data.current_state_addr); data.gil_locked_addr += helper.st; logInfo(2, "PID %d has gil_locked at %lx\n", pid, data.current_state_addr); data.gil_last_holder_addr += helper.st; logInfo(2, "PID %d has gil_last_holder at %lx\n", pid, data.current_state_addr); return true; } } // namespace pyperf } // namespace ebpf