Skip to content

Commit be5c169

Browse files
kinkeliushuyu
authored andcommitted
Add preliminary support for LLVM 19
1 parent 3bd54c1 commit be5c169

18 files changed

+11011
-8
lines changed

cmake/Modules/FindLLVM.cmake

+5-7
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@
3232
# We also want an user-specified LLVM_ROOT_DIR to take precedence over the
3333
# system default locations such as /usr/local/bin. Executing find_program()
3434
# multiples times is the approach recommended in the docs.
35-
set(llvm_config_names llvm-config-18.1 llvm-config181 llvm-config-18
35+
set(llvm_config_names llvm-config-19.1 llvm-config191 llvm-config-19
36+
llvm-config-18.1 llvm-config181 llvm-config-18
3637
llvm-config-17.0 llvm-config170 llvm-config-17
3738
llvm-config-16.0 llvm-config160 llvm-config-16
3839
llvm-config-15.0 llvm-config150 llvm-config-15
@@ -46,9 +47,11 @@ if(APPLE)
4647
# extra fallbacks for MacPorts & Homebrew
4748
find_program(LLVM_CONFIG
4849
NAMES ${llvm_config_names}
49-
PATHS /opt/local/libexec/llvm-18/bin /opt/local/libexec/llvm-17/bin
50+
PATHS /opt/local/libexec/llvm-19/bin
51+
/opt/local/libexec/llvm-18/bin /opt/local/libexec/llvm-17/bin
5052
/opt/local/libexec/llvm-16/bin /opt/local/libexec/llvm-15/bin
5153
/opt/local/libexec/llvm/bin
54+
/usr/local/opt/llvm@19/bin
5255
/usr/local/opt/llvm@18/bin /usr/local/opt/llvm@17/bin
5356
/usr/local/opt/llvm@16/bin /usr/local/opt/llvm@15/bin
5457
/usr/local/opt/llvm/bin
@@ -138,11 +141,6 @@ else()
138141
string(REPLACE "-llibxml2.tbd" "-lxml2" LLVM_LDFLAGS ${LLVM_LDFLAGS})
139142
endif()
140143

141-
if(${LLVM_VERSION_MAJOR} LESS "15")
142-
# Versions below 15.0 do not support component windowsdriver
143-
list(REMOVE_ITEM LLVM_FIND_COMPONENTS "windowsdriver")
144-
endif()
145-
146144
llvm_set(LIBRARY_DIRS libdir true)
147145
llvm_set_libs(LIBRARIES libs "${LLVM_FIND_COMPONENTS}")
148146
# LLVM bug: llvm-config --libs tablegen returns -lLLVM-3.8.0

tests/lit.site.cfg.in

+1-1
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ config.available_features.add("llvm%d" % config.llvm_version)
8686
# config.llvm_version: 309, 400, 500, ...
8787
# plusoneable_llvmversion: 39, 40, 50, ...
8888
plusoneable_llvmversion = config.llvm_version // 10 + config.llvm_version%10
89-
for version in range(140, plusoneable_llvmversion+1):
89+
for version in range(150, plusoneable_llvmversion+1):
9090
config.available_features.add("atleast_llvm%d0%d" % (version//10, version%10))
9191
for version in range(plusoneable_llvmversion, 201):
9292
config.available_features.add("atmost_llvm%d0%d" % (version//10, version%10))

tools/ldc-profdata/llvm-profdata-19.1.cpp

+3,383
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
2+
set(LLVM_LINK_COMPONENTS
3+
AllTargetsDescs
4+
AllTargetsDisassemblers
5+
AllTargetsInfos
6+
DebugInfoDWARF
7+
Core
8+
MC
9+
IPO
10+
MCDisassembler
11+
Object
12+
ProfileData
13+
Support
14+
Symbolize
15+
TargetParser
16+
)
17+
18+
add_llvm_tool(llvm-profgen
19+
llvm-profgen.cpp
20+
PerfReader.cpp
21+
CSPreInliner.cpp
22+
ProfiledBinary.cpp
23+
ProfileGenerator.cpp
24+
MissingFrameInferrer.cpp
25+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,316 @@
1+
//===-- CSPreInliner.cpp - Profile guided preinliner -------------- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "CSPreInliner.h"
10+
#include "ProfiledBinary.h"
11+
#include "llvm/ADT/SCCIterator.h"
12+
#include "llvm/ADT/Statistic.h"
13+
#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
14+
#include "llvm/Transforms/IPO/SampleProfile.h"
15+
#include <cstdint>
16+
#include <queue>
17+
18+
#define DEBUG_TYPE "cs-preinliner"
19+
20+
using namespace llvm;
21+
using namespace sampleprof;
22+
23+
STATISTIC(PreInlNumCSInlined,
24+
"Number of functions inlined with context sensitive profile");
25+
STATISTIC(PreInlNumCSNotInlined,
26+
"Number of functions not inlined with context sensitive profile");
27+
STATISTIC(PreInlNumCSInlinedHitMinLimit,
28+
"Number of functions with FDO inline stopped due to min size limit");
29+
STATISTIC(PreInlNumCSInlinedHitMaxLimit,
30+
"Number of functions with FDO inline stopped due to max size limit");
31+
STATISTIC(
32+
PreInlNumCSInlinedHitGrowthLimit,
33+
"Number of functions with FDO inline stopped due to growth size limit");
34+
35+
// The switches specify inline thresholds used in SampleProfileLoader inlining.
36+
// TODO: the actual threshold to be tuned here because the size here is based
37+
// on machine code not LLVM IR.
38+
namespace llvm {
39+
cl::opt<bool> EnableCSPreInliner(
40+
"csspgo-preinliner", cl::Hidden, cl::init(true),
41+
cl::desc("Run a global pre-inliner to merge context profile based on "
42+
"estimated global top-down inline decisions"));
43+
44+
cl::opt<bool> UseContextCostForPreInliner(
45+
"use-context-cost-for-preinliner", cl::Hidden, cl::init(true),
46+
cl::desc("Use context-sensitive byte size cost for preinliner decisions"));
47+
} // namespace llvm
48+
49+
static cl::opt<bool> SamplePreInlineReplay(
50+
"csspgo-replay-preinline", cl::Hidden, cl::init(false),
51+
cl::desc(
52+
"Replay previous inlining and adjust context profile accordingly"));
53+
54+
static cl::opt<int> CSPreinlMultiplierForPrevInl(
55+
"csspgo-preinliner-multiplier-for-previous-inlining", cl::Hidden,
56+
cl::init(100),
57+
cl::desc(
58+
"Multiplier to bump up callsite threshold for previous inlining."));
59+
60+
CSPreInliner::CSPreInliner(SampleContextTracker &Tracker,
61+
ProfiledBinary &Binary, ProfileSummary *Summary)
62+
: UseContextCost(UseContextCostForPreInliner),
63+
// TODO: Pass in a guid-to-name map in order for
64+
// ContextTracker.getFuncNameFor to work, if `Profiles` can have md5 codes
65+
// as their profile context.
66+
ContextTracker(Tracker), Binary(Binary), Summary(Summary) {
67+
// Set default preinliner hot/cold call site threshold tuned with CSSPGO.
68+
// for good performance with reasonable profile size.
69+
if (!SampleHotCallSiteThreshold.getNumOccurrences())
70+
SampleHotCallSiteThreshold = 1500;
71+
if (!SampleColdCallSiteThreshold.getNumOccurrences())
72+
SampleColdCallSiteThreshold = 0;
73+
if (!ProfileInlineLimitMax.getNumOccurrences())
74+
ProfileInlineLimitMax = 50000;
75+
}
76+
77+
std::vector<FunctionId> CSPreInliner::buildTopDownOrder() {
78+
std::vector<FunctionId> Order;
79+
// Trim cold edges to get a more stable call graph. This allows for a more
80+
// stable top-down order which in turns helps the stablity of the generated
81+
// profile from run to run.
82+
uint64_t ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
83+
(Summary->getDetailedSummary()));
84+
ProfiledCallGraph ProfiledCG(ContextTracker, ColdCountThreshold);
85+
86+
// Now that we have a profiled call graph, construct top-down order
87+
// by building up SCC and reversing SCC order.
88+
scc_iterator<ProfiledCallGraph *> I = scc_begin(&ProfiledCG);
89+
while (!I.isAtEnd()) {
90+
auto Range = *I;
91+
if (SortProfiledSCC) {
92+
// Sort nodes in one SCC based on callsite hotness.
93+
scc_member_iterator<ProfiledCallGraph *> SI(*I);
94+
Range = *SI;
95+
}
96+
for (auto *Node : Range) {
97+
if (Node != ProfiledCG.getEntryNode())
98+
Order.push_back(Node->Name);
99+
}
100+
++I;
101+
}
102+
std::reverse(Order.begin(), Order.end());
103+
104+
return Order;
105+
}
106+
107+
bool CSPreInliner::getInlineCandidates(ProfiledCandidateQueue &CQueue,
108+
const FunctionSamples *CallerSamples) {
109+
assert(CallerSamples && "Expect non-null caller samples");
110+
111+
// Ideally we want to consider everything a function calls, but as far as
112+
// context profile is concerned, only those frames that are children of
113+
// current one in the trie is relavent. So we walk the trie instead of call
114+
// targets from function profile.
115+
ContextTrieNode *CallerNode =
116+
ContextTracker.getContextNodeForProfile(CallerSamples);
117+
118+
bool HasNewCandidate = false;
119+
for (auto &Child : CallerNode->getAllChildContext()) {
120+
ContextTrieNode *CalleeNode = &Child.second;
121+
FunctionSamples *CalleeSamples = CalleeNode->getFunctionSamples();
122+
if (!CalleeSamples)
123+
continue;
124+
125+
// Call site count is more reliable, so we look up the corresponding call
126+
// target profile in caller's context profile to retrieve call site count.
127+
uint64_t CalleeEntryCount = CalleeSamples->getHeadSamplesEstimate();
128+
uint64_t CallsiteCount = 0;
129+
LineLocation Callsite = CalleeNode->getCallSiteLoc();
130+
if (auto CallTargets = CallerSamples->findCallTargetMapAt(Callsite)) {
131+
auto It = CallTargets->find(CalleeSamples->getFunction());
132+
if (It != CallTargets->end())
133+
CallsiteCount = It->second;
134+
}
135+
136+
// TODO: call site and callee entry count should be mostly consistent, add
137+
// check for that.
138+
HasNewCandidate = true;
139+
uint32_t CalleeSize = getFuncSize(CalleeNode);
140+
CQueue.emplace(CalleeSamples, std::max(CallsiteCount, CalleeEntryCount),
141+
CalleeSize);
142+
}
143+
144+
return HasNewCandidate;
145+
}
146+
147+
uint32_t CSPreInliner::getFuncSize(const ContextTrieNode *ContextNode) {
148+
if (UseContextCost)
149+
return Binary.getFuncSizeForContext(ContextNode);
150+
151+
return ContextNode->getFunctionSamples()->getBodySamples().size();
152+
}
153+
154+
bool CSPreInliner::shouldInline(ProfiledInlineCandidate &Candidate) {
155+
bool WasInlined =
156+
Candidate.CalleeSamples->getContext().hasAttribute(ContextWasInlined);
157+
// If replay inline is requested, simply follow the inline decision of the
158+
// profiled binary.
159+
if (SamplePreInlineReplay)
160+
return WasInlined;
161+
162+
unsigned int SampleThreshold = SampleColdCallSiteThreshold;
163+
uint64_t ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
164+
(Summary->getDetailedSummary()));
165+
166+
if (Candidate.CallsiteCount <= ColdCountThreshold)
167+
SampleThreshold = SampleColdCallSiteThreshold;
168+
else {
169+
// Linearly adjust threshold based on normalized hotness, i.e, a value in
170+
// [0,1]. Use 10% cutoff instead of the max count as the normalization
171+
// upperbound for stability.
172+
double NormalizationUpperBound =
173+
ProfileSummaryBuilder::getEntryForPercentile(
174+
Summary->getDetailedSummary(), 100000 /* 10% */)
175+
.MinCount;
176+
double NormalizationLowerBound = ColdCountThreshold;
177+
double NormalizedHotness =
178+
(Candidate.CallsiteCount - NormalizationLowerBound) /
179+
(NormalizationUpperBound - NormalizationLowerBound);
180+
if (NormalizedHotness > 1.0)
181+
NormalizedHotness = 1.0;
182+
// Add 1 to ensure hot callsites get a non-zero threshold, which could
183+
// happen when SampleColdCallSiteThreshold is 0. This is when we do not
184+
// want any inlining for cold callsites.
185+
SampleThreshold = SampleHotCallSiteThreshold * NormalizedHotness * 100 +
186+
SampleColdCallSiteThreshold + 1;
187+
// Bump up the threshold to favor previous compiler inline decision. The
188+
// compiler has more insight and knowledge about functions based on their IR
189+
// and attribures and should be able to make a more reasonable inline
190+
// decision.
191+
if (WasInlined)
192+
SampleThreshold *= CSPreinlMultiplierForPrevInl;
193+
}
194+
195+
return (Candidate.SizeCost < SampleThreshold);
196+
}
197+
198+
void CSPreInliner::processFunction(const FunctionId Name) {
199+
FunctionSamples *FSamples = ContextTracker.getBaseSamplesFor(Name);
200+
if (!FSamples)
201+
return;
202+
203+
unsigned FuncSize =
204+
getFuncSize(ContextTracker.getContextNodeForProfile(FSamples));
205+
unsigned FuncFinalSize = FuncSize;
206+
unsigned SizeLimit = FuncSize * ProfileInlineGrowthLimit;
207+
SizeLimit = std::min(SizeLimit, (unsigned)ProfileInlineLimitMax);
208+
SizeLimit = std::max(SizeLimit, (unsigned)ProfileInlineLimitMin);
209+
210+
LLVM_DEBUG(dbgs() << "Process " << Name
211+
<< " for context-sensitive pre-inlining (pre-inline size: "
212+
<< FuncSize << ", size limit: " << SizeLimit << ")\n");
213+
214+
ProfiledCandidateQueue CQueue;
215+
getInlineCandidates(CQueue, FSamples);
216+
217+
while (!CQueue.empty() && FuncFinalSize < SizeLimit) {
218+
ProfiledInlineCandidate Candidate = CQueue.top();
219+
CQueue.pop();
220+
bool ShouldInline = false;
221+
if ((ShouldInline = shouldInline(Candidate))) {
222+
// We mark context as inlined as the corresponding context profile
223+
// won't be merged into that function's base profile.
224+
++PreInlNumCSInlined;
225+
ContextTracker.markContextSamplesInlined(Candidate.CalleeSamples);
226+
Candidate.CalleeSamples->getContext().setAttribute(
227+
ContextShouldBeInlined);
228+
FuncFinalSize += Candidate.SizeCost;
229+
getInlineCandidates(CQueue, Candidate.CalleeSamples);
230+
} else {
231+
++PreInlNumCSNotInlined;
232+
}
233+
LLVM_DEBUG(
234+
dbgs() << (ShouldInline ? " Inlined" : " Outlined")
235+
<< " context profile for: "
236+
<< ContextTracker.getContextString(*Candidate.CalleeSamples)
237+
<< " (callee size: " << Candidate.SizeCost
238+
<< ", call count:" << Candidate.CallsiteCount << ")\n");
239+
}
240+
241+
if (!CQueue.empty()) {
242+
if (SizeLimit == (unsigned)ProfileInlineLimitMax)
243+
++PreInlNumCSInlinedHitMaxLimit;
244+
else if (SizeLimit == (unsigned)ProfileInlineLimitMin)
245+
++PreInlNumCSInlinedHitMinLimit;
246+
else
247+
++PreInlNumCSInlinedHitGrowthLimit;
248+
}
249+
250+
LLVM_DEBUG({
251+
if (!CQueue.empty())
252+
dbgs() << " Inline candidates ignored due to size limit (inliner "
253+
"original size: "
254+
<< FuncSize << ", inliner final size: " << FuncFinalSize
255+
<< ", size limit: " << SizeLimit << ")\n";
256+
257+
while (!CQueue.empty()) {
258+
ProfiledInlineCandidate Candidate = CQueue.top();
259+
CQueue.pop();
260+
bool WasInlined =
261+
Candidate.CalleeSamples->getContext().hasAttribute(ContextWasInlined);
262+
dbgs() << " "
263+
<< ContextTracker.getContextString(*Candidate.CalleeSamples)
264+
<< " (candidate size:" << Candidate.SizeCost
265+
<< ", call count: " << Candidate.CallsiteCount << ", previously "
266+
<< (WasInlined ? "inlined)\n" : "not inlined)\n");
267+
}
268+
});
269+
}
270+
271+
void CSPreInliner::run() {
272+
#ifndef NDEBUG
273+
auto printProfileNames = [](SampleContextTracker &ContextTracker,
274+
bool IsInput) {
275+
uint32_t Size = 0;
276+
for (auto *Node : ContextTracker) {
277+
FunctionSamples *FSamples = Node->getFunctionSamples();
278+
if (FSamples) {
279+
Size++;
280+
dbgs() << " [" << ContextTracker.getContextString(Node) << "] "
281+
<< FSamples->getTotalSamples() << ":"
282+
<< FSamples->getHeadSamples() << "\n";
283+
}
284+
}
285+
dbgs() << (IsInput ? "Input" : "Output") << " context-sensitive profiles ("
286+
<< Size << " total):\n";
287+
};
288+
#endif
289+
290+
LLVM_DEBUG(printProfileNames(ContextTracker, true));
291+
292+
// Execute global pre-inliner to estimate a global top-down inline
293+
// decision and merge profiles accordingly. This helps with profile
294+
// merge for ThinLTO otherwise we won't be able to merge profiles back
295+
// to base profile across module/thin-backend boundaries.
296+
// It also helps better compress context profile to control profile
297+
// size, as we now only need context profile for functions going to
298+
// be inlined.
299+
for (FunctionId FuncName : buildTopDownOrder()) {
300+
processFunction(FuncName);
301+
}
302+
303+
// Not inlined context profiles are merged into its base, so we can
304+
// trim out such profiles from the output.
305+
for (auto *Node : ContextTracker) {
306+
FunctionSamples *FProfile = Node->getFunctionSamples();
307+
if (FProfile &&
308+
(Node->getParentContext() != &ContextTracker.getRootContext() &&
309+
!FProfile->getContext().hasState(InlinedContext))) {
310+
Node->setFunctionSamples(nullptr);
311+
}
312+
}
313+
FunctionSamples::ProfileIsPreInlined = true;
314+
315+
LLVM_DEBUG(printProfileNames(ContextTracker, false));
316+
}

0 commit comments

Comments
 (0)