diff --git a/include/ddprof_module_lib.hpp b/include/ddprof_module_lib.hpp index 902cbc0c..3912de7a 100644 --- a/include/ddprof_module_lib.hpp +++ b/include/ddprof_module_lib.hpp @@ -30,7 +30,7 @@ struct Segment { }; // From a dso object (and the matching file), attach the module to the dwfl -// object, return the associated Dwfl_Module +// object, return the associated Dwfl_Module. DDRes report_module(Dwfl *dwfl, ProcessAddress_t pc, const Dso &dso, const FileInfoValue &fileInfoValue, DDProfMod &ddprof_mod); diff --git a/include/ddprof_stats.hpp b/include/ddprof_stats.hpp index df318e4d..b8e4ec28 100644 --- a/include/ddprof_stats.hpp +++ b/include/ddprof_stats.hpp @@ -42,7 +42,8 @@ namespace ddprof { X(PPROF_SIZE, "pprof.size", STAT_GAUGE) \ X(PROFILE_DURATION, "profile.duration_ms", STAT_GAUGE) \ X(AGGREGATION_AVG_TIME, "aggregation.avg_time_ns", STAT_GAUGE) \ - X(BACKPOPULATE_COUNT, "backpopulate.count", STAT_GAUGE) + X(BACKPOPULATE_COUNT, "backpopulate.count", STAT_GAUGE) \ + X(ELF_FDS, "elf.fds", STAT_GAUGE) // Expand the enum/index for the individual stats enum DDPROF_STATS : uint8_t { STATS_TABLE(X_ENUM) STATS_LEN }; diff --git a/include/dso.hpp b/include/dso.hpp index 58598ad1..282bd3ec 100644 --- a/include/dso.hpp +++ b/include/dso.hpp @@ -9,8 +9,10 @@ #include "ddprof_file_info-i.hpp" #include "dso_type.hpp" +#include "unique_fd.hpp" #include +#include #include #include #include @@ -21,6 +23,11 @@ namespace ddprof { enum class DsoOrigin : uint8_t { kPerfMmapEvent, kProcMaps }; +struct CachedElfFile { + FileInfoId_t _id{k_file_info_error}; + UniqueFd _fd; +}; + // DSO definition class Dso { public: @@ -41,6 +48,12 @@ class Dso { friend bool operator==(const Dso &, const Dso &) = default; bool intersects(const Dso &o) const; + bool same_mapping(const Dso &o) const { + return _start == o._start && _end == o._end && _offset == o._offset && + _filename == o._filename && _inode == o._inode && _pid == o._pid && + _prot == o._prot && _id == o._id && _type == o._type && + _origin == o._origin; + } std::string to_string() const; std::string format_filename() const; @@ -61,6 +74,13 @@ class Dso { // Beware, end is inclusive ! ProcessAddress_t end() const { return _end; } Offset_t offset() const { return _offset; } + std::shared_ptr get_cached_elf_file() const { + return _cached_elf_file; + } + void + set_cached_elf_file(std::shared_ptr cached_elf_file) const { + _cached_elf_file = std::move(cached_elf_file); + } ProcessAddress_t _start{}; ProcessAddress_t _end{}; // Beware, end is inclusive ! @@ -70,6 +90,7 @@ class Dso { pid_t _pid{-1}; uint32_t _prot{}; mutable FileInfoId_t _id{k_file_info_error}; + mutable std::shared_ptr _cached_elf_file; DsoType _type{DsoType::kUndef}; DsoOrigin _origin{DsoOrigin::kPerfMmapEvent}; }; diff --git a/include/dso_hdr.hpp b/include/dso_hdr.hpp index 391c172b..4890bbf0 100644 --- a/include/dso_hdr.hpp +++ b/include/dso_hdr.hpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -204,6 +205,8 @@ class DsoHdr { int clear_unvisited(const std::unordered_set &visited_pids); + size_t get_fd_cache_size() const; + private: // erase range of elements static void erase_range(DsoMap &map, DsoRange range, const Dso &new_mapping); @@ -217,11 +220,19 @@ class DsoHdr { FileInfoId_t update_id_from_path(const Dso &dso); + std::shared_ptr + get_or_create_cached_elf_file(FileInfoId_t file_info_id); + + void cleanup_expired_fd_cache_entries(); + // Unordered map (by pid) of sorted DSOs DsoPidMap _pid_map; DsoStats _stats; FileInfoInodeMap _file_info_inode_map; FileInfoVector _file_info_vector; + // Weak references let live DSOs own the cached fd through shared_ptr copies + // while this map is only used to find or recreate the shared entry by id. + std::unordered_map> _fd_cache; std::string _path_to_proc; // /proc files can be mounted at various places // (whole host profiling) int _dd_profiling_fd; diff --git a/src/ddprof_module_lib.cc b/src/ddprof_module_lib.cc index 08828197..80d5ddbb 100644 --- a/src/ddprof_module_lib.cc +++ b/src/ddprof_module_lib.cc @@ -237,11 +237,31 @@ DDRes report_module(Dwfl *dwfl, ProcessAddress_t pc, const Dso &dso, return ddres_warn(DD_WHAT_MODULE); } - UniqueFd fd_holder{::open(filepath.c_str(), O_RDONLY | O_CLOEXEC)}; - if (!fd_holder) { + auto cached_elf_file = dso.get_cached_elf_file(); + if (!cached_elf_file) { + LG_WRN("[Mod] Missing cached file state for module (%s)", filepath.c_str()); + return ddres_warn(DD_WHAT_MODULE); + } + + // Open lazily; the shared cache entry keeps the fd alive as long as some DSO + // mapping still references this file. + auto &cached_fd = cached_elf_file->_fd; + if (!cached_fd) { + cached_fd = UniqueFd{::open(filepath.c_str(), O_RDONLY | O_CLOEXEC)}; + LG_DBG("[Mod] Opened and cached fd for %s", filepath.c_str()); + } + if (!cached_fd) { LG_WRN("[Mod] Couldn't open fd to module (%s)", filepath.c_str()); return ddres_warn(DD_WHAT_MODULE); } + // dup with O_CLOEXEC preserved: plain dup() drops close-on-exec, which + // would leak ELF fds into exec'd children. F_DUPFD_CLOEXEC atomically + // duplicates and sets FD_CLOEXEC, matching the original open(O_CLOEXEC). + UniqueFd fd_holder{::fcntl(cached_fd.get(), F_DUPFD_CLOEXEC, 0)}; + if (!fd_holder) { + LG_WRN("[Mod] Couldn't dup fd to module (%s)", filepath.c_str()); + return ddres_warn(DD_WHAT_MODULE); + } LG_DBG("[Mod] Success opening %s, ", filepath.c_str()); // Load the file at a matching DSO address diff --git a/src/ddprof_worker.cc b/src/ddprof_worker.cc index aa139437..34d1f1fb 100644 --- a/src/ddprof_worker.cc +++ b/src/ddprof_worker.cc @@ -132,6 +132,8 @@ DDRes worker_update_stats(DDProfWorkerContext &worker_context, ddprof_stats_set(STATS_DSO_SIZE, dso_hdr.get_nb_dso()); ddprof_stats_set(STATS_BACKPOPULATE_COUNT, dso_hdr.stats().backpopulate_count()); + ddprof_stats_set(STATS_ELF_FDS, + static_cast(dso_hdr.get_fd_cache_size())); ddprof_stats_set( STATS_UNMATCHED_DEALLOCATION_COUNT, worker_context.live_allocation.get_nb_unmatched_deallocations()); diff --git a/src/dso_hdr.cc b/src/dso_hdr.cc index 550df2b4..4ddadb64 100644 --- a/src/dso_hdr.cc +++ b/src/dso_hdr.cc @@ -284,6 +284,9 @@ DsoHdr::DsoFindRes DsoHdr::dso_find_adjust_same(DsoMap &map, const Dso &dso) { FileInfoId_t DsoHdr::get_or_insert_file_info(const Dso &dso) { if (dso._id != k_file_info_undef) { + if (dso._id > k_file_info_error && !dso.get_cached_elf_file()) { + dso.set_cached_elf_file(get_or_create_cached_elf_file(dso._id)); + } // already looked up this dso return dso._id; } @@ -294,6 +297,7 @@ FileInfoId_t DsoHdr::get_or_insert_file_info(const Dso &dso) { FileInfoId_t DsoHdr::update_id_dd_profiling(const Dso &dso) { if (_dd_profiling_file_info != k_file_info_undef) { dso._id = _dd_profiling_file_info; + dso.set_cached_elf_file(get_or_create_cached_elf_file(dso._id)); return dso._id; } @@ -303,6 +307,7 @@ FileInfoId_t DsoHdr::update_id_dd_profiling(const Dso &dso) { dso._id = _file_info_vector.size(); _dd_profiling_file_info = dso._id; _file_info_vector.emplace_back(FileInfo(dso._filename, 0, 0), dso._id); + dso.set_cached_elf_file(get_or_create_cached_elf_file(dso._id)); return _dd_profiling_file_info; } _dd_profiling_file_info = update_id_from_path(dso); @@ -314,6 +319,7 @@ FileInfoId_t DsoHdr::update_id_from_path(const Dso &dso) { FileInfo file_info = find_file_info(dso); if (!file_info._inode) { dso._id = k_file_info_error; + dso.set_cached_elf_file(nullptr); return dso._id; } @@ -336,12 +342,14 @@ FileInfoId_t DsoHdr::update_id_from_path(const Dso &dso) { _file_info_vector[dso._id] = FileInfoValue(std::move(file_info), dso._id); } } + dso.set_cached_elf_file(get_or_create_cached_elf_file(dso._id)); return dso._id; } FileInfoId_t DsoHdr::update_id_from_dso(const Dso &dso) { if (!has_relevant_path(dso._type)) { dso._id = k_file_info_error; // no file associated + dso.set_cached_elf_file(nullptr); return dso._id; } @@ -352,6 +360,28 @@ FileInfoId_t DsoHdr::update_id_from_dso(const Dso &dso) { return update_id_from_path(dso); } +std::shared_ptr +DsoHdr::get_or_create_cached_elf_file(FileInfoId_t file_info_id) { + auto &weak_entry = _fd_cache[file_info_id]; + auto cached_elf_file = weak_entry.lock(); + if (!cached_elf_file) { + cached_elf_file = std::make_shared(); + cached_elf_file->_id = file_info_id; + weak_entry = cached_elf_file; + } + return cached_elf_file; +} + +void DsoHdr::cleanup_expired_fd_cache_entries() { + for (auto it = _fd_cache.begin(); it != _fd_cache.end();) { + if (it->second.expired()) { + it = _fd_cache.erase(it); + } else { + ++it; + } + } +} + bool DsoHdr::maybe_insert_erase_overlap(Dso &&dso, PerfClock::time_point timestamp) { auto &pid_mapping = _pid_map[dso._pid]; @@ -424,7 +454,10 @@ DsoHdr::DsoFindRes DsoHdr::dso_find_or_backpopulate(pid_t pid, return dso_find_or_backpopulate(pid_mapping, pid, addr); } -void DsoHdr::pid_free(int pid) { _pid_map.erase(pid); } +void DsoHdr::pid_free(int pid) { + _pid_map.erase(pid); + cleanup_expired_fd_cache_entries(); +} bool DsoHdr::pid_backpopulate(pid_t pid, int &nb_elts_added) { return pid_backpopulate(_pid_map[pid], pid, nb_elts_added); @@ -566,6 +599,17 @@ int DsoHdr::get_nb_dso() const { return total_nb_elts; } +size_t DsoHdr::get_fd_cache_size() const { + size_t nb_open_fds = 0; + for (const auto &[_, weak_cached_elf_file] : _fd_cache) { + if (auto cached_elf_file = weak_cached_elf_file.lock(); + cached_elf_file && cached_elf_file->_fd) { + ++nb_open_fds; + } + } + return nb_open_fds; +} + void DsoHdr::reset_backpopulate_state(int reset_threshold) { for (auto &[_, pid_mapping] : _pid_map) { auto &backpopulate_state = pid_mapping._backpopulate_state; @@ -632,8 +676,9 @@ int DsoHdr::clear_unvisited(const std::unordered_set &visited_pids) { } } for (const auto &pid : pids_to_clear) { - _pid_map.erase(pid); + pid_free(pid); } + return pids_to_clear.size(); } } // namespace ddprof diff --git a/test/dso-ut.cc b/test/dso-ut.cc index 14e36b60..60374ac0 100644 --- a/test/dso-ut.cc +++ b/test/dso-ut.cc @@ -522,7 +522,7 @@ TEST(DSOTest, elf_load_simple) { auto [it2, found2] = dso_hdr.dso_find_closest(5, 0x2000); ASSERT_TRUE(found2); ASSERT_EQ(it2->first, 0x2000); - ASSERT_EQ(it2->second, dso2); + ASSERT_TRUE(it2->second.same_mapping(dso2)); } TEST(DSOTest, elf_load) { @@ -548,14 +548,14 @@ TEST(DSOTest, elf_load) { auto [it2, found2] = dso_hdr.dso_find_closest(5, 0x2000); ASSERT_TRUE(found2); ASSERT_EQ(it2->first, 0x2000); - ASSERT_EQ(it2->second, dso2); + ASSERT_TRUE(it2->second.same_mapping(dso2)); Dso dso1_right{dso1}; dso1_right.adjust_start(0x4000); auto [it3, found3] = dso_hdr.dso_find_closest(5, 0x4000); ASSERT_TRUE(found3); ASSERT_EQ(it3->first, 0x4000); - ASSERT_EQ(it3->second, dso1_right); + ASSERT_TRUE(it3->second.same_mapping(dso1_right)); } // map 3rd segment @@ -575,19 +575,19 @@ TEST(DSOTest, elf_load) { auto [it2, found2] = dso_hdr.dso_find_closest(5, 0x2000); ASSERT_TRUE(found2); ASSERT_EQ(it2->first, 0x2000); - ASSERT_EQ(it2->second, dso2); + ASSERT_TRUE(it2->second.same_mapping(dso2)); auto [it3, found3] = dso_hdr.dso_find_closest(5, 0x4000); ASSERT_TRUE(found3); ASSERT_EQ(it3->first, 0x4000); - ASSERT_EQ(it3->second, dso3); + ASSERT_TRUE(it3->second.same_mapping(dso3)); Dso dso1_right{dso1}; dso1_right.adjust_start(0x5000); auto [it4, found4] = dso_hdr.dso_find_closest(5, 0x5000); ASSERT_TRUE(found3); ASSERT_EQ(it4->first, 0x5000); - ASSERT_EQ(it4->second, dso1_right); + ASSERT_TRUE(it4->second.same_mapping(dso1_right)); } // anonymous mapping at the end @@ -606,17 +606,17 @@ TEST(DSOTest, elf_load) { auto [it2, found2] = dso_hdr.dso_find_closest(5, 0x2000); ASSERT_TRUE(found2); ASSERT_EQ(it2->first, 0x2000); - ASSERT_EQ(it2->second, dso2); + ASSERT_TRUE(it2->second.same_mapping(dso2)); auto [it3, found3] = dso_hdr.dso_find_closest(5, 0x4000); ASSERT_TRUE(found3); ASSERT_EQ(it3->first, 0x4000); - ASSERT_EQ(it3->second, dso3); + ASSERT_TRUE(it3->second.same_mapping(dso3)); auto [it4, found4] = dso_hdr.dso_find_closest(5, 0x5000); ASSERT_TRUE(found3); ASSERT_EQ(it4->first, 0x5000); - ASSERT_EQ(it4->second, dso4); + ASSERT_TRUE(it4->second.same_mapping(dso4)); } } @@ -641,7 +641,7 @@ TEST(DSOTest, elf_load_single_segment) { auto [it2, found2] = dso_hdr.dso_find_closest(5, 0x5000); ASSERT_TRUE(found2); ASSERT_EQ(it2->first, 0x5000); - ASSERT_EQ(it2->second, dso2); + ASSERT_TRUE(it2->second.same_mapping(dso2)); } } diff --git a/test/dwfl_module-ut.cc b/test/dwfl_module-ut.cc index 10bbf536..ea7d4dde 100644 --- a/test/dwfl_module-ut.cc +++ b/test/dwfl_module-ut.cc @@ -54,61 +54,66 @@ TEST(DwflModule, inconsistency_test) { int nb_fds_start = count_fds(my_pid); printf("-- Start open file descriptors: %d\n", nb_fds_start); LogHandle handle; - // Load DSOs from our unit test - ElfAddress_t ip = _THIS_IP_; - DsoHdr dso_hdr; - DsoHdr::DsoFindRes find_res = dso_hdr.dso_find_or_backpopulate(my_pid, ip); - UniqueElf unique_elf = create_elf_from_self(); - // Check that we found the DSO matching this IP - ASSERT_TRUE(find_res.second); + // Scope dso_hdr and dwfl_wrapper together so the fd cache (_fd_cache inside + // dso_hdr) is released before the fd count check below. { - DwflWrapper dwfl_wrapper; - dwfl_wrapper.attach(my_pid, unique_elf, nullptr); - // retrieve the map associated to pid - DsoHdr::DsoMap &dso_map = dso_hdr.get_pid_mapping(my_pid)._map; - for (auto it = dso_map.begin(); it != dso_map.end(); ++it) { - Dso &dso = it->second; - if (!has_relevant_path(dso._type) || !dso.is_executable()) { - continue; // skip non exec / non standard (anon/vdso...) - } - - FileInfoId_t file_info_id = dso_hdr.get_or_insert_file_info(dso); - ASSERT_TRUE(file_info_id > k_file_info_error); - - const FileInfoValue &file_info_value = - dso_hdr.get_file_info_value(file_info_id); - DDProfMod *ddprof_mod = nullptr; - auto res = dwfl_wrapper.register_mod(dso._start, it->second, - file_info_value, &ddprof_mod); - - ASSERT_TRUE(IsDDResOK(res)); - ASSERT_TRUE(ddprof_mod->_mod); - if (find_res.first == it) { - std::array elf_addr{ip - ddprof_mod->_sym_bias}; - blaze_symbolize_src_elf src_elf{ - .type_size = sizeof(blaze_symbolize_src_elf), - .path = dso._filename.c_str(), - .debug_syms = true, - .reserved = {}, - }; - const blaze_syms *blaze_syms = blaze_symbolize_elf_virt_offsets( - symbolizer, &src_elf, elf_addr.data(), elf_addr.size()); - - ASSERT_TRUE(blaze_syms); - ASSERT_GE(blaze_syms->cnt, 1); - defer { blaze_syms_free(blaze_syms); }; - // we don't have demangling at this step - EXPECT_TRUE( - strcmp("_ZN6ddprof34DwflModule_inconsistency_test_Test8TestBodyEv", - blaze_syms->syms[0].name) == 0); - // Only expect build-id on this binary (as we can not force it on - // others) - EXPECT_FALSE(ddprof_mod->_build_id.empty()); + // Load DSOs from our unit test + ElfAddress_t ip = _THIS_IP_; + DsoHdr dso_hdr; + DsoHdr::DsoFindRes find_res = dso_hdr.dso_find_or_backpopulate(my_pid, ip); + UniqueElf unique_elf = create_elf_from_self(); + // Check that we found the DSO matching this IP + ASSERT_TRUE(find_res.second); + { + DwflWrapper dwfl_wrapper; + dwfl_wrapper.attach(my_pid, unique_elf, nullptr); + // retrieve the map associated to pid + DsoHdr::DsoMap &dso_map = dso_hdr.get_pid_mapping(my_pid)._map; + for (auto it = dso_map.begin(); it != dso_map.end(); ++it) { + Dso &dso = it->second; + if (!has_relevant_path(dso._type) || !dso.is_executable()) { + continue; // skip non exec / non standard (anon/vdso...) + } + + FileInfoId_t file_info_id = dso_hdr.get_or_insert_file_info(dso); + ASSERT_TRUE(file_info_id > k_file_info_error); + + const FileInfoValue &file_info_value = + dso_hdr.get_file_info_value(file_info_id); + DDProfMod *ddprof_mod = nullptr; + auto res = dwfl_wrapper.register_mod(dso._start, it->second, + file_info_value, &ddprof_mod); + + ASSERT_TRUE(IsDDResOK(res)); + ASSERT_TRUE(ddprof_mod->_mod); + if (find_res.first == it) { + std::array elf_addr{ip - ddprof_mod->_sym_bias}; + blaze_symbolize_src_elf src_elf{ + .type_size = sizeof(blaze_symbolize_src_elf), + .path = dso._filename.c_str(), + .debug_syms = true, + .reserved = {}, + }; + const blaze_syms *blaze_syms = blaze_symbolize_elf_virt_offsets( + symbolizer, &src_elf, elf_addr.data(), elf_addr.size()); + + ASSERT_TRUE(blaze_syms); + ASSERT_GE(blaze_syms->cnt, 1); + defer { blaze_syms_free(blaze_syms); }; + // we don't have demangling at this step + EXPECT_TRUE( + strcmp( + "_ZN6ddprof34DwflModule_inconsistency_test_Test8TestBodyEv", + blaze_syms->syms[0].name) == 0); + // Only expect build-id on this binary (as we can not force it on + // others) + EXPECT_FALSE(ddprof_mod->_build_id.empty()); + } + // check that we loaded all mods matching the DSOs + EXPECT_EQ(ddprof_mod->_status, DDProfMod::kUnknown); } - // check that we loaded all mods matching the DSOs - EXPECT_EQ(ddprof_mod->_status, DDProfMod::kUnknown); - } - } + } // dwfl_wrapper destroyed: dup'd fds closed + } // dso_hdr destroyed: fd_cache fds closed blaze_symbolizer_free(symbolizer); // int nb_fds_end = count_fds(my_pid); printf("-- End open file descriptors: %d\n", nb_fds_end); @@ -160,6 +165,8 @@ TEST(DwflModule, short_lived) { } // Wait for the first PID to die waitpid(child_pid, nullptr, 0); + dso_hdr.pid_free(child_pid); + EXPECT_EQ(dso_hdr.get_fd_cache_size(), 0); pid_t second_child_pid = fork(); if (second_child_pid == 0) { @@ -173,7 +180,7 @@ TEST(DwflModule, short_lived) { { UniqueElf unique_elf = create_elf_from_self(); DwflWrapper dwfl_wrapper; - dwfl_wrapper.attach(child_pid, unique_elf, nullptr); + dwfl_wrapper.attach(second_child_pid, unique_elf, nullptr); // retrieve the map associated to pid DsoHdr::DsoMap &dso_map = dso_hdr.get_pid_mapping(second_child_pid)._map;