From 23b6039d109183990740b937a9db86b666f5d527 Mon Sep 17 00:00:00 2001 From: Zimo Ji Date: Mon, 29 Sep 2025 23:02:02 +0800 Subject: [PATCH] [InstrProf] Auto merge raw profile file in PGO Enabling clang merge raw profile file(s) into .profdata file automatica- lly. We introduced two new clang options `-fprofile-use-dir=` and `-fprofile- instr-use-dir=`. These two options can automatically merge all files in the specific directory with same weight. Also, if we use a raw profile file as the argument of `-fprofile-use=` or `-fprofile-instr-use=`. It can also be auto-merged. ** We've detected environment inconsistency between our local machine and remote CI/CD machine. However, I failed to recurrence issues on my machine. Therefore, the `-fprofile-use-dir=` and `-fprofile-instr-use-dir=` may be unavailable on some specific machines. I would be extremely appreciate if someone can help us fix the issue.** --- clang/include/clang/Basic/CodeGenOptions.h | 3 + .../clang/Basic/DiagnosticDriverKinds.td | 8 + clang/include/clang/Driver/Options.td | 13 ++ clang/lib/Driver/ToolChains/Clang.cpp | 9 ++ clang/lib/Driver/ToolChains/CommonArgs.cpp | 3 +- clang/lib/Frontend/CompilerInvocation.cpp | 139 +++++++++++++++++- clang/test/Misc/warning-flags.c | 3 +- compiler-rt/test/profile/instrprof-basic.c | 2 + compiler-rt/test/profile/lit.cfg.py | 3 + 9 files changed, 177 insertions(+), 6 deletions(-) diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index f2a707a8ba8d..e5cc4d8a7a59 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -291,6 +291,9 @@ public: /// Name of the profile file to use as input for -fprofile-instr-use std::string ProfileInstrumentUsePath; + /// Name of the profile directory to use as input for -fprofile-instr-use-dir and -fprofile-use-dir + std::string ProfileInstrumentUseDirPath; + /// Name of the profile remapping file to apply to the profile data supplied /// by -fprofile-sample-use or -fprofile-instr-use. std::string ProfileRemappingFile; diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 3d8240f8357b..c2d6caf313f9 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -827,4 +827,12 @@ def err_drv_triple_version_invalid : Error< def warn_missing_include_dirs : Warning< "no such include directory: '%0'">, InGroup, DefaultIgnore; + +def err_automerge_profraw : Error< + "error occurred when tried to auto merge profraw files" +>; + +def warn_automerge_profraw : Warning< + "failed to open file when auto merging" +>; } diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 2c3f45b693e6..df86644e4a7c 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1792,6 +1792,10 @@ def fprofile_instr_use : Flag<["-"], "fprofile-instr-use">, Group, def fprofile_instr_use_EQ : Joined<["-"], "fprofile-instr-use=">, Group, Visibility<[ClangOption, CLOption]>, HelpText<"Use instrumentation data for profile-guided optimization">; +def fprofile_instr_use_dir_EQ : Joined<["-"], "fprofile-instr-use-dir=">, + Group, Visibility<[ClangOption, CLOption]>, + MetaVarName<"">, + HelpText<"Use instrumentation data in the directory for profile-guided optimization">; def fprofile_remapping_file_EQ : Joined<["-"], "fprofile-remapping-file=">, Group, Visibility<[ClangOption, CC1Option, CLOption]>, MetaVarName<"">, @@ -1838,6 +1842,11 @@ def fprofile_use_EQ : Joined<["-"], "fprofile-use=">, Visibility<[ClangOption, CLOption]>, MetaVarName<"">, HelpText<"Use instrumentation data for profile-guided optimization. If pathname is a directory, it reads from /default.profdata. Otherwise, it reads from file .">; +def fprofile_use_dir_EQ : Joined<["-"], "fprofile-use-dir=">, + Group, + Visibility<[ClangOption, CLOption]>, + MetaVarName<"">, + HelpText<"Use instrumentation data in the directory for profile-guided optimization">; def fno_profile_instr_generate : Flag<["-"], "fno-profile-instr-generate">, Group, Visibility<[ClangOption, CLOption]>, HelpText<"Disable generation of profile instrumentation.">; @@ -7365,6 +7374,10 @@ def fprofile_instrument_use_path_EQ : Joined<["-"], "fprofile-instrument-use-path=">, HelpText<"Specify the profile path in PGO use compilation">, MarshallingInfoString>; +def fprofile_instrument_use_dir_path_EQ : + Joined<["-"],"fprofile-instrument-use-dir-path=">, + HelpText<"Specify the profraw directory path in auto-merge PGO use compilation">, + MarshallingInfoString>; def flto_visibility_public_std: Flag<["-"], "flto-visibility-public-std">, HelpText<"Use public LTO visibility for classes in std and stdext namespaces">, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 0ff0b048227e..c572b74324ef 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -691,6 +691,15 @@ static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C, llvm::sys::path::append(Path, "default.profdata"); CmdArgs.push_back( Args.MakeArgString(Twine("-fprofile-instrument-use-path=") + Path)); + } else if (ProfileUseArg->getOption().matches( + options::OPT_fprofile_use_dir_EQ)|| + ProfileUseArg->getOption().matches(options::OPT_fprofile_instr_use_dir_EQ)) { + SmallString<128> Path( + ProfileUseArg->getNumValues() == 0 ? "" : ProfileUseArg->getValue()); + if (Path.empty() || llvm::sys::fs::is_directory(Path)) + llvm::sys::path::append(Path, "."); + CmdArgs.push_back(Args.MakeArgString( + Twine("-fprofile-instrument-use-dir-path=") + Path)); } } diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 019df16a909f..1ee2d64c00b0 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -1721,7 +1721,8 @@ Arg *tools::getLastProfileUseArg(const ArgList &Args) { auto *ProfileUseArg = Args.getLastArg( options::OPT_fprofile_instr_use, options::OPT_fprofile_instr_use_EQ, options::OPT_fprofile_use, options::OPT_fprofile_use_EQ, - options::OPT_fno_profile_instr_use); + options::OPT_fno_profile_instr_use, + options::OPT_fprofile_instr_use_dir_EQ, options::OPT_fprofile_use_dir_EQ); if (ProfileUseArg && ProfileUseArg->getOption().matches(options::OPT_fno_profile_instr_use)) diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 17759d7da615..72e18071ed7f 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -66,10 +66,13 @@ #include "llvm/Option/OptSpecifier.h" #include "llvm/Option/OptTable.h" #include "llvm/Option/Option.h" +#include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/InstrProfReader.h" +#include "llvm/ProfileData/InstrProfWriter.h" #include "llvm/Remarks/HotnessThresholdParser.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h" @@ -80,6 +83,7 @@ #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/Regex.h" +#include "llvm/Support/Signals.h" #include "llvm/Support/VersionTuple.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" @@ -1475,7 +1479,107 @@ static std::string serializeXRayInstrumentationBundle(const XRayInstrSet &S) { return Buffer; } -// Set the profile kind using fprofile-instrument-use-path. +static bool isProfRawFile(const Twine Path, llvm::vfs::FileSystem &FS) { + auto ReaderOrErr = llvm::InstrProfReader::create(Path, FS); + if (auto Err = ReaderOrErr.takeError()) { + llvm::consumeError(std::move(Err)); + return false; + } + + auto *Reader = ReaderOrErr->get(); + if (dynamic_cast(Reader) != nullptr + || dynamic_cast(Reader) != nullptr) { + return true; + } + return false; +} + +static Expected +mergeAll(const SmallVectorImpl &FileNames, llvm::vfs::FileSystem &FS, + DiagnosticsEngine &Diags) { + std::error_code EC; + llvm::InstrProfWriter Writer; + + for (StringRef FileName : FileNames) { + auto ReaderOrErr = llvm::InstrProfReader::create(FileName, FS); + if (llvm::Error E = ReaderOrErr.takeError()) { + Diags.Report(diag::err_automerge_profraw) << llvm::toString(std::move(E)); + return E; + } + auto Reader = std::move(ReaderOrErr.get()); + if (llvm::Error E = Writer.mergeProfileKind(Reader->getProfileKind())) { + Diags.Report(diag::err_automerge_profraw) + << FileName << " can't be auto-merged, abort."; + return E; + } + for (auto I : *Reader) { + Writer.addRecord(std::move(I), [&](llvm::Error E) { + Diags.Report(diag::err_automerge_profraw) << FileName; + return; + }); + } + std::vector BinaryIds; + if (llvm::Error E = Reader->readBinaryIds(BinaryIds)) { + Diags.Report(diag::err_automerge_profraw) << " failed to read binary id"; + return llvm::errorCodeToError(llvm::errc::no_such_file_or_directory); + } + Writer.addBinaryIds(BinaryIds); // TODO: Add memprof format support. + } + + int FD; + llvm::SmallString<128> TempFileName; + EC = llvm::sys::fs::createUniqueFile("./autoconv-temp-%%%%%%.profdata", FD, + TempFileName); + if(EC) + return llvm::errorCodeToError(EC); + llvm::raw_fd_ostream OF(FD, true); + llvm::sys::RemoveFileOnSignal(TempFileName); + + if (llvm::Error E = Writer.write(OF)) { + Diags.Report(diag::err_automerge_profraw) + << " failed to write to profdata file."; + return E; + } + + return std::string(TempFileName.begin(), TempFileName.end()); +} + +static Expected mergeSingleProfRawFile(const Twine &ProfRawName, + llvm::vfs::FileSystem &FS, + DiagnosticsEngine &Diags) { + return mergeAll(llvm::SmallVector({ProfRawName.str()}), FS, + Diags); +} + +static Expected mergeAllFromDirectory(const Twine &ProfileDirName, + llvm::vfs::FileSystem &FS, + DiagnosticsEngine &Diags) { + llvm::SmallVector filenames; + llvm::SmallVector filenameRefs; + + llvm::sys::fs::file_status Status; + llvm::sys::fs::status(ProfileDirName,Status); + if(!llvm::sys::fs::exists(Status)){ + Diags.Report(diag::err_automerge_profraw)<<" no such directory: "<path())) { + filenames.push_back(F->path()); + filenameRefs.push_back(filenames.back()); + } + } + if (EC) + return llvm::errorCodeToError(llvm::errc::io_error); + } + + return mergeAll(filenameRefs, FS, Diags); +} + static void setPGOUseInstrumentor(CodeGenOptions &Opts, const Twine &ProfileName, llvm::vfs::FileSystem &FS, @@ -4923,9 +5027,36 @@ bool CompilerInvocation::CreateFromArgsImpl( auto FS = createVFSFromOverlayFiles(Res.getHeaderSearchOpts().VFSOverlayFiles, Diags, llvm::vfs::getRealFileSystem()); - setPGOUseInstrumentor(Res.getCodeGenOpts(), - Res.getCodeGenOpts().ProfileInstrumentUsePath, *FS, - Diags); + // Use an auxiliary function to pre-merge if profraw or directory is + // provided with. + std::string ProfileName; + if (isProfRawFile(Res.getCodeGenOpts().ProfileInstrumentUsePath, *FS)) { + auto ProfileNameOrErr = mergeSingleProfRawFile( + Res.getCodeGenOpts().ProfileInstrumentUsePath, *FS, Diags); + if (llvm::Error E = ProfileNameOrErr.takeError()) { + ProfileName = Res.getCodeGenOpts().ProfileInstrumentUsePath; + } else { + ProfileName = ProfileNameOrErr.get(); + Res.getCodeGenOpts().ProfileInstrumentUsePath = ProfileName; + } + } else { + ProfileName = Res.getCodeGenOpts().ProfileInstrumentUsePath; + } + + setPGOUseInstrumentor(Res.getCodeGenOpts(), ProfileName, *FS, Diags); + } else if (!Res.getCodeGenOpts().ProfileInstrumentUseDirPath.empty()) { + auto FS = + createVFSFromOverlayFiles(Res.getHeaderSearchOpts().VFSOverlayFiles, + Diags, llvm::vfs::getRealFileSystem()); + auto ProfileNameOrErr = mergeAllFromDirectory( + Res.getCodeGenOpts().ProfileInstrumentUseDirPath, *FS, Diags); + std::string ProfileName; + if (llvm::Error E = ProfileNameOrErr.takeError()) { + llvm::consumeError(std::move(E)); + } else { + ProfileName = ProfileNameOrErr.get(); + Res.getCodeGenOpts().ProfileInstrumentUsePath = ProfileName; + } } FixupInvocation(Res, Diags, Args, DashX); diff --git a/clang/test/Misc/warning-flags.c b/clang/test/Misc/warning-flags.c index cdbe1e95cba9..8a39b141a78a 100644 --- a/clang/test/Misc/warning-flags.c +++ b/clang/test/Misc/warning-flags.c @@ -18,7 +18,7 @@ This test serves two purposes: The list of warnings below should NEVER grow. It should gradually shrink to 0. -CHECK: Warnings without flags (65): +CHECK: Warnings without flags (66): CHECK-NEXT: ext_expected_semi_decl_list CHECK-NEXT: ext_missing_whitespace_after_macro_name @@ -35,6 +35,7 @@ CHECK-NEXT: pp_poisoning_existing_macro CHECK-NEXT: warn_accessor_property_type_mismatch CHECK-NEXT: warn_arcmt_nsalloc_realloc CHECK-NEXT: warn_asm_label_on_auto_decl +CHECK-NEXT: warn_automerge_profraw CHECK-NEXT: warn_c_kext CHECK-NEXT: warn_call_wrong_number_of_arguments CHECK-NEXT: warn_case_empty_range diff --git a/compiler-rt/test/profile/instrprof-basic.c b/compiler-rt/test/profile/instrprof-basic.c index 702f521ba4ed..b3d5b7eef1a9 100644 --- a/compiler-rt/test/profile/instrprof-basic.c +++ b/compiler-rt/test/profile/instrprof-basic.c @@ -3,6 +3,7 @@ // RUN: llvm-profdata merge -o %t.profdata %t.profraw // RUN: llvm-profdata show --all-functions %t.profdata | FileCheck %s --check-prefix=PROFCNT // RUN: %clang_profuse=%t.profdata -o - -S -emit-llvm %s | FileCheck %s --check-prefix=COMMON --check-prefix=ORIG +// RUN: %clang_profuse=%t.profraw -o - -S -emit-llvm %s | FileCheck %s --check-prefix=COMMON --check-prefix=ORIG // // RUN: rm -fr %t.dir1 // RUN: mkdir -p %t.dir1 @@ -11,6 +12,7 @@ // RUN: llvm-profdata merge -o %t.em.profdata %t.dir1 // RUN: llvm-profdata show --all-functions %t.em.profdata | FileCheck %s --check-prefix=PROFCNT // RUN: %clang_profuse=%t.em.profdata -o - -S -emit-llvm %s | FileCheck %s --check-prefix=COMMON --check-prefix=MERGE +// RUN: %clang_profusedir=%t.dir1 -o - -S -emit-llvm %s | FileCheck %s --check-prefix=COMMON --check-prefix=MERGE // // RUN: rm -fr %t.dir2 // RUN: mkdir -p %t.dir2 diff --git a/compiler-rt/test/profile/lit.cfg.py b/compiler-rt/test/profile/lit.cfg.py index d3ba115731c5..fff1995aed4f 100644 --- a/compiler-rt/test/profile/lit.cfg.py +++ b/compiler-rt/test/profile/lit.cfg.py @@ -140,6 +140,9 @@ config.substitutions.append( config.substitutions.append( ("%clang_profuse=", build_invocation(clang_cflags) + " -fprofile-instr-use=") ) +config.substitutions.append( + ("%clang_profusedir=", build_invocation(clang_cflags) + " -fprofile-instr-use-dir=") +) config.substitutions.append( ("%clangxx_profuse=", build_invocation(clang_cxxflags) + " -fprofile-instr-use=") ) -- Gitee