From 838d65fb305edc9ad0e4c9513e743ae2a24c2d95 Mon Sep 17 00:00:00 2001 From: Gong Yuchen Date: Tue, 27 Sep 2022 15:45:13 +0800 Subject: [PATCH] mm: page_alloc: Add a tracepoint to trace the call of __alloc_pages() and export symbols /proc/meminfo is the main way for user to know the physical memory usage, but it cannot know every operation of allocating physical pages. Some kernel modules can call alloc_pages to get physical pages directly, which part of physical memory will not be counted by /proc/meminfo. In order to trace the specific process of physical page allocation and release, one solution is to use ebpf to trace the calls of __alloc_pages and __free_pages, a better solution is to insert a new modules into kernel and register tracepoint handlers to obtain the physical page allocation information. So i also export the relative tracepoint symbols so that kernel module can use them. Physical pages allocation may be a time-consuming operation, so i added a new tracepoint named mm_page_alloc_enter at the entrance of __alloc_pages. Before that, a tracepoint has been added in the kernel at the end of the page allocation operation. The tracepoints can be used to calculate the time consumption of the allocation operation. Signed-off-by: Gong Yuchen --- Documentation/trace/events-kmem.rst | 9 +++++---- include/trace/events/kmem.h | 20 ++++++++++++++++++++ mm/page_alloc.c | 8 ++++++++ 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/Documentation/trace/events-kmem.rst b/Documentation/trace/events-kmem.rst index 68fa75247488..2a7cb5a2824c 100644 --- a/Documentation/trace/events-kmem.rst +++ b/Documentation/trace/events-kmem.rst @@ -45,15 +45,16 @@ but the call_site can usually be used to extrapolate that information. 3. Page allocation ================== :: - mm_page_alloc page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s + mm_page_alloc_enter order=%d gfp_flags=%s mm_page_alloc_zone_locked page=%p pfn=%lu order=%u migratetype=%d cpu=%d percpu_refill=%d mm_page_free page=%p pfn=%lu order=%d mm_page_free_batched page=%p pfn=%lu order=%d cold=%d -These four events deal with page allocation and freeing. mm_page_alloc is -a simple indicator of page allocator activity. Pages may be allocated from -the per-CPU allocator (high performance) or the buddy allocator. +These five events deal with page allocation and freeing. mm_page_alloc_enter +is the entry point of page allocation and can be used to analyze the time cost +of page allocation. mm_page_alloc is a simple indicator of page allocator activity. +Pages may be allocated from the per-CPU allocator (high performance) or the buddy allocator. If pages are allocated directly from the buddy allocator, the mm_page_alloc_zone_locked event is triggered. This event is important as high diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index f65b1f6db22d..1f0309123455 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -190,6 +190,26 @@ TRACE_EVENT(mm_page_free_batched, __entry->pfn) ); +TRACE_EVENT(mm_page_alloc_enter, + TP_PROTO(unsigned int order, gfp_t gfp_flags), + + TP_ARGS(order, gfp_flags), + + TP_STRUCT__entry( + __field( unsigned int, order ) + __field( gfp_t, gfp_flags ) + ), + + TP_fast_assign( + __entry->order = order; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("order=%d gfp_flags=%s", + __entry->order, + show_gfp_flags(__entry->gfp_flags)) +); + TRACE_EVENT(mm_page_alloc, TP_PROTO(struct page *page, unsigned int order, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ff6fffec8770..2054dcbeb1b1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -81,6 +81,12 @@ #include "shuffle.h" #include "page_reporting.h" +EXPORT_TRACEPOINT_SYMBOL_GPL(mm_page_alloc_enter); +EXPORT_TRACEPOINT_SYMBOL_GPL(mm_page_alloc); +EXPORT_TRACEPOINT_SYMBOL_GPL(mm_page_alloc_zone_locked); +EXPORT_TRACEPOINT_SYMBOL_GPL(mm_page_free); +EXPORT_TRACEPOINT_SYMBOL_GPL(mm_page_free_batched); + /* Free Page Internal flags: for internal, non-pcp variants of free_pages(). */ typedef int __bitwise fpi_t; @@ -5157,6 +5163,8 @@ static inline void prepare_before_alloc(gfp_t *gfp_mask) struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid, nodemask_t *nodemask) { + trace_mm_page_alloc_enter(order, gfp); + struct page *page; unsigned int alloc_flags = ALLOC_WMARK_LOW; gfp_t alloc_gfp; /* The gfp_t that was actually used for allocation */ -- Gitee