1//===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This pass exposes codegen information to IR-level passes. Every
10/// transformation that uses codegen information is broken into three parts:
11/// 1. The IR-level analysis pass.
12/// 2. The IR-level transformation interface which provides the needed
13/// information.
14/// 3. Codegen-level implementation which uses target-specific hooks.
15///
16/// This file defines #2, which is the interface that IR-level transformations
17/// use for querying the codegen.
18///
19//===----------------------------------------------------------------------===//
20
21#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23
24#include "llvm/ADT/SmallBitVector.h"
25#include "llvm/IR/FMF.h"
26#include "llvm/IR/InstrTypes.h"
27#include "llvm/IR/PassManager.h"
28#include "llvm/Pass.h"
29#include "llvm/Support/AtomicOrdering.h"
30#include "llvm/Support/BranchProbability.h"
31#include "llvm/Support/InstructionCost.h"
32#include <functional>
33#include <optional>
34#include <utility>
35
36namespace llvm {
37
38namespace Intrinsic {
39typedef unsigned ID;
40}
41
42class AllocaInst;
43class AssumptionCache;
44class BlockFrequencyInfo;
45class DominatorTree;
46class BranchInst;
47class CallBase;
48class Function;
49class GlobalValue;
50class InstCombiner;
51class OptimizationRemarkEmitter;
52class InterleavedAccessInfo;
53class IntrinsicInst;
54class LoadInst;
55class Loop;
56class LoopInfo;
57class LoopVectorizationLegality;
58class ProfileSummaryInfo;
59class RecurrenceDescriptor;
60class SCEV;
61class ScalarEvolution;
62class StoreInst;
63class SwitchInst;
64class TargetLibraryInfo;
65class Type;
66class User;
67class Value;
68class VPIntrinsic;
69struct KnownBits;
70
71/// Information about a load/store intrinsic defined by the target.
72struct MemIntrinsicInfo {
73 /// This is the pointer that the intrinsic is loading from or storing to.
74 /// If this is non-null, then analysis/optimization passes can assume that
75 /// this intrinsic is functionally equivalent to a load/store from this
76 /// pointer.
77 Value *PtrVal = nullptr;
78
79 // Ordering for atomic operations.
80 AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
81
82 // Same Id is set by the target for corresponding load/store intrinsics.
83 unsigned short MatchingId = 0;
84
85 bool ReadMem = false;
86 bool WriteMem = false;
87 bool IsVolatile = false;
88
89 bool isUnordered() const {
90 return (Ordering == AtomicOrdering::NotAtomic ||
91 Ordering == AtomicOrdering::Unordered) &&
92 !IsVolatile;
93 }
94};
95
96/// Attributes of a target dependent hardware loop.
97struct HardwareLoopInfo {
98 HardwareLoopInfo() = delete;
99 HardwareLoopInfo(Loop *L);
100 Loop *L = nullptr;
101 BasicBlock *ExitBlock = nullptr;
102 BranchInst *ExitBranch = nullptr;
103 const SCEV *ExitCount = nullptr;
104 IntegerType *CountType = nullptr;
105 Value *LoopDecrement = nullptr; // Decrement the loop counter by this
106 // value in every iteration.
107 bool IsNestingLegal = false; // Can a hardware loop be a parent to
108 // another hardware loop?
109 bool CounterInReg = false; // Should loop counter be updated in
110 // the loop via a phi?
111 bool PerformEntryTest = false; // Generate the intrinsic which also performs
112 // icmp ne zero on the loop counter value and
113 // produces an i1 to guard the loop entry.
114 bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI,
115 DominatorTree &DT, bool ForceNestedLoop = false,
116 bool ForceHardwareLoopPHI = false);
117 bool canAnalyze(LoopInfo &LI);
118};
119
120class IntrinsicCostAttributes {
121 const IntrinsicInst *II = nullptr;
122 Type *RetTy = nullptr;
123 Intrinsic::ID IID;
124 SmallVector<Type *, 4> ParamTys;
125 SmallVector<const Value *, 4> Arguments;
126 FastMathFlags FMF;
127 // If ScalarizationCost is UINT_MAX, the cost of scalarizing the
128 // arguments and the return value will be computed based on types.
129 InstructionCost ScalarizationCost = InstructionCost::getInvalid();
130
131public:
132 IntrinsicCostAttributes(
133 Intrinsic::ID Id, const CallBase &CI,
134 InstructionCost ScalarCost = InstructionCost::getInvalid(),
135 bool TypeBasedOnly = false);
136
137 IntrinsicCostAttributes(
138 Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys,
139 FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
140 InstructionCost ScalarCost = InstructionCost::getInvalid());
141
142 IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
143 ArrayRef<const Value *> Args);
144
145 IntrinsicCostAttributes(
146 Intrinsic::ID Id, Type *RTy, ArrayRef<const Value *> Args,
147 ArrayRef<Type *> Tys, FastMathFlags Flags = FastMathFlags(),
148 const IntrinsicInst *I = nullptr,
149 InstructionCost ScalarCost = InstructionCost::getInvalid());
150
151 Intrinsic::ID getID() const { return IID; }
152 const IntrinsicInst *getInst() const { return II; }
153 Type *getReturnType() const { return RetTy; }
154 FastMathFlags getFlags() const { return FMF; }
155 InstructionCost getScalarizationCost() const { return ScalarizationCost; }
156 const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; }
157 const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
158
159 bool isTypeBasedOnly() const {
160 return Arguments.empty();
161 }
162
163 bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
164};
165
166enum class TailFoldingStyle {
167 /// Don't use tail folding
168 None,
169 /// Use predicate only to mask operations on data in the loop.
170 /// When the VL is not known to be a power-of-2, this method requires a
171 /// runtime overflow check for the i + VL in the loop because it compares the
172 /// scalar induction variable against the tripcount rounded up by VL which may
173 /// overflow. When the VL is a power-of-2, both the increment and uprounded
174 /// tripcount will overflow to 0, which does not require a runtime check
175 /// since the loop is exited when the loop induction variable equals the
176 /// uprounded trip-count, which are both 0.
177 Data,
178 /// Same as Data, but avoids using the get.active.lane.mask intrinsic to
179 /// calculate the mask and instead implements this with a
180 /// splat/stepvector/cmp.
181 /// FIXME: Can this kind be removed now that SelectionDAGBuilder expands the
182 /// active.lane.mask intrinsic when it is not natively supported?
183 DataWithoutLaneMask,
184 /// Use predicate to control both data and control flow.
185 /// This method always requires a runtime overflow check for the i + VL
186 /// increment inside the loop, because it uses the result direclty in the
187 /// active.lane.mask to calculate the mask for the next iteration. If the
188 /// increment overflows, the mask is no longer correct.
189 DataAndControlFlow,
190 /// Use predicate to control both data and control flow, but modify
191 /// the trip count so that a runtime overflow check can be avoided
192 /// and such that the scalar epilogue loop can always be removed.
193 DataAndControlFlowWithoutRuntimeCheck,
194 /// Use predicated EVL instructions for tail-folding.
195 /// Indicates that VP intrinsics should be used.
196 DataWithEVL,
197};
198
199struct TailFoldingInfo {
200 TargetLibraryInfo *TLI;
201 LoopVectorizationLegality *LVL;
202 InterleavedAccessInfo *IAI;
203 TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL,
204 InterleavedAccessInfo *IAI)
205 : TLI(TLI), LVL(LVL), IAI(IAI) {}
206};
207
208class TargetTransformInfo;
209typedef TargetTransformInfo TTI;
210
211/// This pass provides access to the codegen interfaces that are needed
212/// for IR-level transformations.
213class TargetTransformInfo {
214public:
215 /// Construct a TTI object using a type implementing the \c Concept
216 /// API below.
217 ///
218 /// This is used by targets to construct a TTI wrapping their target-specific
219 /// implementation that encodes appropriate costs for their target.
220 template <typename T> TargetTransformInfo(T Impl);
221
222 /// Construct a baseline TTI object using a minimal implementation of
223 /// the \c Concept API below.
224 ///
225 /// The TTI implementation will reflect the information in the DataLayout
226 /// provided if non-null.
227 explicit TargetTransformInfo(const DataLayout &DL);
228
229 // Provide move semantics.
230 TargetTransformInfo(TargetTransformInfo &&Arg);
231 TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
232
233 // We need to define the destructor out-of-line to define our sub-classes
234 // out-of-line.
235 ~TargetTransformInfo();
236
237 /// Handle the invalidation of this information.
238 ///
239 /// When used as a result of \c TargetIRAnalysis this method will be called
240 /// when the function this was computed for changes. When it returns false,
241 /// the information is preserved across those changes.
242 bool invalidate(Function &, const PreservedAnalyses &,
243 FunctionAnalysisManager::Invalidator &) {
244 // FIXME: We should probably in some way ensure that the subtarget
245 // information for a function hasn't changed.
246 return false;
247 }
248
249 /// \name Generic Target Information
250 /// @{
251
252 /// The kind of cost model.
253 ///
254 /// There are several different cost models that can be customized by the
255 /// target. The normalization of each cost model may be target specific.
256 /// e.g. TCK_SizeAndLatency should be comparable to target thresholds such as
257 /// those derived from MCSchedModel::LoopMicroOpBufferSize etc.
258 enum TargetCostKind {
259 TCK_RecipThroughput, ///< Reciprocal throughput.
260 TCK_Latency, ///< The latency of instruction.
261 TCK_CodeSize, ///< Instruction code size.
262 TCK_SizeAndLatency ///< The weighted sum of size and latency.
263 };
264
265 /// Underlying constants for 'cost' values in this interface.
266 ///
267 /// Many APIs in this interface return a cost. This enum defines the
268 /// fundamental values that should be used to interpret (and produce) those
269 /// costs. The costs are returned as an int rather than a member of this
270 /// enumeration because it is expected that the cost of one IR instruction
271 /// may have a multiplicative factor to it or otherwise won't fit directly
272 /// into the enum. Moreover, it is common to sum or average costs which works
273 /// better as simple integral values. Thus this enum only provides constants.
274 /// Also note that the returned costs are signed integers to make it natural
275 /// to add, subtract, and test with zero (a common boundary condition). It is
276 /// not expected that 2^32 is a realistic cost to be modeling at any point.
277 ///
278 /// Note that these costs should usually reflect the intersection of code-size
279 /// cost and execution cost. A free instruction is typically one that folds
280 /// into another instruction. For example, reg-to-reg moves can often be
281 /// skipped by renaming the registers in the CPU, but they still are encoded
282 /// and thus wouldn't be considered 'free' here.
283 enum TargetCostConstants {
284 TCC_Free = 0, ///< Expected to fold away in lowering.
285 TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
286 TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
287 };
288
289 /// Estimate the cost of a GEP operation when lowered.
290 ///
291 /// \p PointeeType is the source element type of the GEP.
292 /// \p Ptr is the base pointer operand.
293 /// \p Operands is the list of indices following the base pointer.
294 ///
295 /// \p AccessType is a hint as to what type of memory might be accessed by
296 /// users of the GEP. getGEPCost will use it to determine if the GEP can be
297 /// folded into the addressing mode of a load/store. If AccessType is null,
298 /// then the resulting target type based off of PointeeType will be used as an
299 /// approximation.
300 InstructionCost
301 getGEPCost(Type *PointeeType, const Value *Ptr,
302 ArrayRef<const Value *> Operands, Type *AccessType = nullptr,
303 TargetCostKind CostKind = TCK_SizeAndLatency) const;
304
305 /// Describe known properties for a set of pointers.
306 struct PointersChainInfo {
307 /// All the GEPs in a set have same base address.
308 unsigned IsSameBaseAddress : 1;
309 /// These properties only valid if SameBaseAddress is set.
310 /// True if all pointers are separated by a unit stride.
311 unsigned IsUnitStride : 1;
312 /// True if distance between any two neigbouring pointers is a known value.
313 unsigned IsKnownStride : 1;
314 unsigned Reserved : 29;
315
316 bool isSameBase() const { return IsSameBaseAddress; }
317 bool isUnitStride() const { return IsSameBaseAddress && IsUnitStride; }
318 bool isKnownStride() const { return IsSameBaseAddress && IsKnownStride; }
319
320 static PointersChainInfo getUnitStride() {
321 return {/*IsSameBaseAddress=*/.IsSameBaseAddress: 1, /*IsUnitStride=*/.IsUnitStride: 1,
322 /*IsKnownStride=*/.IsKnownStride: 1, .Reserved: 0};
323 }
324 static PointersChainInfo getKnownStride() {
325 return {/*IsSameBaseAddress=*/.IsSameBaseAddress: 1, /*IsUnitStride=*/.IsUnitStride: 0,
326 /*IsKnownStride=*/.IsKnownStride: 1, .Reserved: 0};
327 }
328 static PointersChainInfo getUnknownStride() {
329 return {/*IsSameBaseAddress=*/.IsSameBaseAddress: 1, /*IsUnitStride=*/.IsUnitStride: 0,
330 /*IsKnownStride=*/.IsKnownStride: 0, .Reserved: 0};
331 }
332 };
333 static_assert(sizeof(PointersChainInfo) == 4, "Was size increase justified?");
334
335 /// Estimate the cost of a chain of pointers (typically pointer operands of a
336 /// chain of loads or stores within same block) operations set when lowered.
337 /// \p AccessTy is the type of the loads/stores that will ultimately use the
338 /// \p Ptrs.
339 InstructionCost
340 getPointersChainCost(ArrayRef<const Value *> Ptrs, const Value *Base,
341 const PointersChainInfo &Info, Type *AccessTy,
342 TargetCostKind CostKind = TTI::TCK_RecipThroughput
343
344 ) const;
345
346 /// \returns A value by which our inlining threshold should be multiplied.
347 /// This is primarily used to bump up the inlining threshold wholesale on
348 /// targets where calls are unusually expensive.
349 ///
350 /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
351 /// individual classes of instructions would be better.
352 unsigned getInliningThresholdMultiplier() const;
353
354 unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const;
355 unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const;
356
357 /// \returns A value to be added to the inlining threshold.
358 unsigned adjustInliningThreshold(const CallBase *CB) const;
359
360 /// \returns The cost of having an Alloca in the caller if not inlined, to be
361 /// added to the threshold
362 unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const;
363
364 /// \returns Vector bonus in percent.
365 ///
366 /// Vector bonuses: We want to more aggressively inline vector-dense kernels
367 /// and apply this bonus based on the percentage of vector instructions. A
368 /// bonus is applied if the vector instructions exceed 50% and half that
369 /// amount is applied if it exceeds 10%. Note that these bonuses are some what
370 /// arbitrary and evolved over time by accident as much as because they are
371 /// principled bonuses.
372 /// FIXME: It would be nice to base the bonus values on something more
373 /// scientific. A target may has no bonus on vector instructions.
374 int getInlinerVectorBonusPercent() const;
375
376 /// \return the expected cost of a memcpy, which could e.g. depend on the
377 /// source/destination type and alignment and the number of bytes copied.
378 InstructionCost getMemcpyCost(const Instruction *I) const;
379
380 /// Returns the maximum memset / memcpy size in bytes that still makes it
381 /// profitable to inline the call.
382 uint64_t getMaxMemIntrinsicInlineSizeThreshold() const;
383
384 /// \return The estimated number of case clusters when lowering \p 'SI'.
385 /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
386 /// table.
387 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
388 unsigned &JTSize,
389 ProfileSummaryInfo *PSI,
390 BlockFrequencyInfo *BFI) const;
391
392 /// Estimate the cost of a given IR user when lowered.
393 ///
394 /// This can estimate the cost of either a ConstantExpr or Instruction when
395 /// lowered.
396 ///
397 /// \p Operands is a list of operands which can be a result of transformations
398 /// of the current operands. The number of the operands on the list must equal
399 /// to the number of the current operands the IR user has. Their order on the
400 /// list must be the same as the order of the current operands the IR user
401 /// has.
402 ///
403 /// The returned cost is defined in terms of \c TargetCostConstants, see its
404 /// comments for a detailed explanation of the cost values.
405 InstructionCost getInstructionCost(const User *U,
406 ArrayRef<const Value *> Operands,
407 TargetCostKind CostKind) const;
408
409 /// This is a helper function which calls the three-argument
410 /// getInstructionCost with \p Operands which are the current operands U has.
411 InstructionCost getInstructionCost(const User *U,
412 TargetCostKind CostKind) const {
413 SmallVector<const Value *, 4> Operands(U->operand_values());
414 return getInstructionCost(U, Operands, CostKind);
415 }
416
417 /// If a branch or a select condition is skewed in one direction by more than
418 /// this factor, it is very likely to be predicted correctly.
419 BranchProbability getPredictableBranchThreshold() const;
420
421 /// Return true if branch divergence exists.
422 ///
423 /// Branch divergence has a significantly negative impact on GPU performance
424 /// when threads in the same wavefront take different paths due to conditional
425 /// branches.
426 ///
427 /// If \p F is passed, provides a context function. If \p F is known to only
428 /// execute in a single threaded environment, the target may choose to skip
429 /// uniformity analysis and assume all values are uniform.
430 bool hasBranchDivergence(const Function *F = nullptr) const;
431
432 /// Returns whether V is a source of divergence.
433 ///
434 /// This function provides the target-dependent information for
435 /// the target-independent UniformityAnalysis.
436 bool isSourceOfDivergence(const Value *V) const;
437
438 // Returns true for the target specific
439 // set of operations which produce uniform result
440 // even taking non-uniform arguments
441 bool isAlwaysUniform(const Value *V) const;
442
443 /// Query the target whether the specified address space cast from FromAS to
444 /// ToAS is valid.
445 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
446
447 /// Return false if a \p AS0 address cannot possibly alias a \p AS1 address.
448 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const;
449
450 /// Returns the address space ID for a target's 'flat' address space. Note
451 /// this is not necessarily the same as addrspace(0), which LLVM sometimes
452 /// refers to as the generic address space. The flat address space is a
453 /// generic address space that can be used access multiple segments of memory
454 /// with different address spaces. Access of a memory location through a
455 /// pointer with this address space is expected to be legal but slower
456 /// compared to the same memory location accessed through a pointer with a
457 /// different address space.
458 //
459 /// This is for targets with different pointer representations which can
460 /// be converted with the addrspacecast instruction. If a pointer is converted
461 /// to this address space, optimizations should attempt to replace the access
462 /// with the source address space.
463 ///
464 /// \returns ~0u if the target does not have such a flat address space to
465 /// optimize away.
466 unsigned getFlatAddressSpace() const;
467
468 /// Return any intrinsic address operand indexes which may be rewritten if
469 /// they use a flat address space pointer.
470 ///
471 /// \returns true if the intrinsic was handled.
472 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
473 Intrinsic::ID IID) const;
474
475 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
476
477 /// Return true if globals in this address space can have initializers other
478 /// than `undef`.
479 bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const;
480
481 unsigned getAssumedAddrSpace(const Value *V) const;
482
483 bool isSingleThreaded() const;
484
485 std::pair<const Value *, unsigned>
486 getPredicatedAddrSpace(const Value *V) const;
487
488 /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
489 /// NewV, which has a different address space. This should happen for every
490 /// operand index that collectFlatAddressOperands returned for the intrinsic.
491 /// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
492 /// new value (which may be the original \p II with modified operands).
493 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
494 Value *NewV) const;
495
496 /// Test whether calls to a function lower to actual program function
497 /// calls.
498 ///
499 /// The idea is to test whether the program is likely to require a 'call'
500 /// instruction or equivalent in order to call the given function.
501 ///
502 /// FIXME: It's not clear that this is a good or useful query API. Client's
503 /// should probably move to simpler cost metrics using the above.
504 /// Alternatively, we could split the cost interface into distinct code-size
505 /// and execution-speed costs. This would allow modelling the core of this
506 /// query more accurately as a call is a single small instruction, but
507 /// incurs significant execution cost.
508 bool isLoweredToCall(const Function *F) const;
509
510 struct LSRCost {
511 /// TODO: Some of these could be merged. Also, a lexical ordering
512 /// isn't always optimal.
513 unsigned Insns;
514 unsigned NumRegs;
515 unsigned AddRecCost;
516 unsigned NumIVMuls;
517 unsigned NumBaseAdds;
518 unsigned ImmCost;
519 unsigned SetupCost;
520 unsigned ScaleCost;
521 };
522
523 /// Parameters that control the generic loop unrolling transformation.
524 struct UnrollingPreferences {
525 /// The cost threshold for the unrolled loop. Should be relative to the
526 /// getInstructionCost values returned by this API, and the expectation is
527 /// that the unrolled loop's instructions when run through that interface
528 /// should not exceed this cost. However, this is only an estimate. Also,
529 /// specific loops may be unrolled even with a cost above this threshold if
530 /// deemed profitable. Set this to UINT_MAX to disable the loop body cost
531 /// restriction.
532 unsigned Threshold;
533 /// If complete unrolling will reduce the cost of the loop, we will boost
534 /// the Threshold by a certain percent to allow more aggressive complete
535 /// unrolling. This value provides the maximum boost percentage that we
536 /// can apply to Threshold (The value should be no less than 100).
537 /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
538 /// MaxPercentThresholdBoost / 100)
539 /// E.g. if complete unrolling reduces the loop execution time by 50%
540 /// then we boost the threshold by the factor of 2x. If unrolling is not
541 /// expected to reduce the running time, then we do not increase the
542 /// threshold.
543 unsigned MaxPercentThresholdBoost;
544 /// The cost threshold for the unrolled loop when optimizing for size (set
545 /// to UINT_MAX to disable).
546 unsigned OptSizeThreshold;
547 /// The cost threshold for the unrolled loop, like Threshold, but used
548 /// for partial/runtime unrolling (set to UINT_MAX to disable).
549 unsigned PartialThreshold;
550 /// The cost threshold for the unrolled loop when optimizing for size, like
551 /// OptSizeThreshold, but used for partial/runtime unrolling (set to
552 /// UINT_MAX to disable).
553 unsigned PartialOptSizeThreshold;
554 /// A forced unrolling factor (the number of concatenated bodies of the
555 /// original loop in the unrolled loop body). When set to 0, the unrolling
556 /// transformation will select an unrolling factor based on the current cost
557 /// threshold and other factors.
558 unsigned Count;
559 /// Default unroll count for loops with run-time trip count.
560 unsigned DefaultUnrollRuntimeCount;
561 // Set the maximum unrolling factor. The unrolling factor may be selected
562 // using the appropriate cost threshold, but may not exceed this number
563 // (set to UINT_MAX to disable). This does not apply in cases where the
564 // loop is being fully unrolled.
565 unsigned MaxCount;
566 /// Set the maximum upper bound of trip count. Allowing the MaxUpperBound
567 /// to be overrided by a target gives more flexiblity on certain cases.
568 /// By default, MaxUpperBound uses UnrollMaxUpperBound which value is 8.
569 unsigned MaxUpperBound;
570 /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
571 /// applies even if full unrolling is selected. This allows a target to fall
572 /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
573 unsigned FullUnrollMaxCount;
574 // Represents number of instructions optimized when "back edge"
575 // becomes "fall through" in unrolled loop.
576 // For now we count a conditional branch on a backedge and a comparison
577 // feeding it.
578 unsigned BEInsns;
579 /// Allow partial unrolling (unrolling of loops to expand the size of the
580 /// loop body, not only to eliminate small constant-trip-count loops).
581 bool Partial;
582 /// Allow runtime unrolling (unrolling of loops to expand the size of the
583 /// loop body even when the number of loop iterations is not known at
584 /// compile time).
585 bool Runtime;
586 /// Allow generation of a loop remainder (extra iterations after unroll).
587 bool AllowRemainder;
588 /// Allow emitting expensive instructions (such as divisions) when computing
589 /// the trip count of a loop for runtime unrolling.
590 bool AllowExpensiveTripCount;
591 /// Apply loop unroll on any kind of loop
592 /// (mainly to loops that fail runtime unrolling).
593 bool Force;
594 /// Allow using trip count upper bound to unroll loops.
595 bool UpperBound;
596 /// Allow unrolling of all the iterations of the runtime loop remainder.
597 bool UnrollRemainder;
598 /// Allow unroll and jam. Used to enable unroll and jam for the target.
599 bool UnrollAndJam;
600 /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
601 /// value above is used during unroll and jam for the outer loop size.
602 /// This value is used in the same manner to limit the size of the inner
603 /// loop.
604 unsigned UnrollAndJamInnerLoopThreshold;
605 /// Don't allow loop unrolling to simulate more than this number of
606 /// iterations when checking full unroll profitability
607 unsigned MaxIterationsCountToAnalyze;
608 /// Don't disable runtime unroll for the loops which were vectorized.
609 bool UnrollVectorizedLoop = false;
610 };
611
612 /// Get target-customized preferences for the generic loop unrolling
613 /// transformation. The caller will initialize UP with the current
614 /// target-independent defaults.
615 void getUnrollingPreferences(Loop *L, ScalarEvolution &,
616 UnrollingPreferences &UP,
617 OptimizationRemarkEmitter *ORE) const;
618
619 /// Query the target whether it would be profitable to convert the given loop
620 /// into a hardware loop.
621 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
622 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
623 HardwareLoopInfo &HWLoopInfo) const;
624
625 /// Query the target whether it would be prefered to create a predicated
626 /// vector loop, which can avoid the need to emit a scalar epilogue loop.
627 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const;
628
629 /// Query the target what the preferred style of tail folding is.
630 /// \param IVUpdateMayOverflow Tells whether it is known if the IV update
631 /// may (or will never) overflow for the suggested VF/UF in the given loop.
632 /// Targets can use this information to select a more optimal tail folding
633 /// style. The value conservatively defaults to true, such that no assumptions
634 /// are made on overflow.
635 TailFoldingStyle
636 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const;
637
638 // Parameters that control the loop peeling transformation
639 struct PeelingPreferences {
640 /// A forced peeling factor (the number of bodied of the original loop
641 /// that should be peeled off before the loop body). When set to 0, the
642 /// a peeling factor based on profile information and other factors.
643 unsigned PeelCount;
644 /// Allow peeling off loop iterations.
645 bool AllowPeeling;
646 /// Allow peeling off loop iterations for loop nests.
647 bool AllowLoopNestsPeeling;
648 /// Allow peeling basing on profile. Uses to enable peeling off all
649 /// iterations basing on provided profile.
650 /// If the value is true the peeling cost model can decide to peel only
651 /// some iterations and in this case it will set this to false.
652 bool PeelProfiledIterations;
653 };
654
655 /// Get target-customized preferences for the generic loop peeling
656 /// transformation. The caller will initialize \p PP with the current
657 /// target-independent defaults with information from \p L and \p SE.
658 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
659 PeelingPreferences &PP) const;
660
661 /// Targets can implement their own combinations for target-specific
662 /// intrinsics. This function will be called from the InstCombine pass every
663 /// time a target-specific intrinsic is encountered.
664 ///
665 /// \returns std::nullopt to not do anything target specific or a value that
666 /// will be returned from the InstCombiner. It is possible to return null and
667 /// stop further processing of the intrinsic by returning nullptr.
668 std::optional<Instruction *> instCombineIntrinsic(InstCombiner & IC,
669 IntrinsicInst & II) const;
670 /// Can be used to implement target-specific instruction combining.
671 /// \see instCombineIntrinsic
672 std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
673 InstCombiner & IC, IntrinsicInst & II, APInt DemandedMask,
674 KnownBits & Known, bool &KnownBitsComputed) const;
675 /// Can be used to implement target-specific instruction combining.
676 /// \see instCombineIntrinsic
677 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
678 InstCombiner & IC, IntrinsicInst & II, APInt DemandedElts,
679 APInt & UndefElts, APInt & UndefElts2, APInt & UndefElts3,
680 std::function<void(Instruction *, unsigned, APInt, APInt &)>
681 SimplifyAndSetOp) const;
682 /// @}
683
684 /// \name Scalar Target Information
685 /// @{
686
687 /// Flags indicating the kind of support for population count.
688 ///
689 /// Compared to the SW implementation, HW support is supposed to
690 /// significantly boost the performance when the population is dense, and it
691 /// may or may not degrade performance if the population is sparse. A HW
692 /// support is considered as "Fast" if it can outperform, or is on a par
693 /// with, SW implementation when the population is sparse; otherwise, it is
694 /// considered as "Slow".
695 enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
696
697 /// Return true if the specified immediate is legal add immediate, that
698 /// is the target has add instructions which can add a register with the
699 /// immediate without having to materialize the immediate into a register.
700 bool isLegalAddImmediate(int64_t Imm) const;
701
702 /// Return true if adding the specified scalable immediate is legal, that is
703 /// the target has add instructions which can add a register with the
704 /// immediate (multiplied by vscale) without having to materialize the
705 /// immediate into a register.
706 bool isLegalAddScalableImmediate(int64_t Imm) const;
707
708 /// Return true if the specified immediate is legal icmp immediate,
709 /// that is the target has icmp instructions which can compare a register
710 /// against the immediate without having to materialize the immediate into a
711 /// register.
712 bool isLegalICmpImmediate(int64_t Imm) const;
713
714 /// Return true if the addressing mode represented by AM is legal for
715 /// this target, for a load/store of the specified type.
716 /// The type may be VoidTy, in which case only return true if the addressing
717 /// mode is legal for a load/store of any legal type.
718 /// If target returns true in LSRWithInstrQueries(), I may be valid.
719 /// \param ScalableOffset represents a quantity of bytes multiplied by vscale,
720 /// an invariant value known only at runtime. Most targets should not accept
721 /// a scalable offset.
722 ///
723 /// TODO: Handle pre/postinc as well.
724 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
725 bool HasBaseReg, int64_t Scale,
726 unsigned AddrSpace = 0, Instruction *I = nullptr,
727 int64_t ScalableOffset = 0) const;
728
729 /// Return true if LSR cost of C1 is lower than C2.
730 bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
731 const TargetTransformInfo::LSRCost &C2) const;
732
733 /// Return true if LSR major cost is number of registers. Targets which
734 /// implement their own isLSRCostLess and unset number of registers as major
735 /// cost should return false, otherwise return true.
736 bool isNumRegsMajorCostOfLSR() const;
737
738 /// Return true if LSR should attempts to replace a use of an otherwise dead
739 /// primary IV in the latch condition with another IV available in the loop.
740 /// When successful, makes the primary IV dead.
741 bool shouldFoldTerminatingConditionAfterLSR() const;
742
743 /// \returns true if LSR should not optimize a chain that includes \p I.
744 bool isProfitableLSRChainElement(Instruction *I) const;
745
746 /// Return true if the target can fuse a compare and branch.
747 /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
748 /// calculation for the instructions in a loop.
749 bool canMacroFuseCmp() const;
750
751 /// Return true if the target can save a compare for loop count, for example
752 /// hardware loop saves a compare.
753 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
754 DominatorTree *DT, AssumptionCache *AC,
755 TargetLibraryInfo *LibInfo) const;
756
757 enum AddressingModeKind {
758 AMK_PreIndexed,
759 AMK_PostIndexed,
760 AMK_None
761 };
762
763 /// Return the preferred addressing mode LSR should make efforts to generate.
764 AddressingModeKind getPreferredAddressingMode(const Loop *L,
765 ScalarEvolution *SE) const;
766
767 /// Return true if the target supports masked store.
768 bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
769 /// Return true if the target supports masked load.
770 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;
771
772 /// Return true if the target supports nontemporal store.
773 bool isLegalNTStore(Type *DataType, Align Alignment) const;
774 /// Return true if the target supports nontemporal load.
775 bool isLegalNTLoad(Type *DataType, Align Alignment) const;
776
777 /// \Returns true if the target supports broadcasting a load to a vector of
778 /// type <NumElements x ElementTy>.
779 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const;
780
781 /// Return true if the target supports masked scatter.
782 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
783 /// Return true if the target supports masked gather.
784 bool isLegalMaskedGather(Type *DataType, Align Alignment) const;
785 /// Return true if the target forces scalarizing of llvm.masked.gather
786 /// intrinsics.
787 bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const;
788 /// Return true if the target forces scalarizing of llvm.masked.scatter
789 /// intrinsics.
790 bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const;
791
792 /// Return true if the target supports masked compress store.
793 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const;
794 /// Return true if the target supports masked expand load.
795 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const;
796
797 /// Return true if the target supports strided load.
798 bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const;
799
800 /// Return true if this is an alternating opcode pattern that can be lowered
801 /// to a single instruction on the target. In X86 this is for the addsub
802 /// instruction which corrsponds to a Shuffle + Fadd + FSub pattern in IR.
803 /// This function expectes two opcodes: \p Opcode1 and \p Opcode2 being
804 /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
805 /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
806 /// \p VecTy is the vector type of the instruction to be generated.
807 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
808 const SmallBitVector &OpcodeMask) const;
809
810 /// Return true if we should be enabling ordered reductions for the target.
811 bool enableOrderedReductions() const;
812
813 /// Return true if the target has a unified operation to calculate division
814 /// and remainder. If so, the additional implicit multiplication and
815 /// subtraction required to calculate a remainder from division are free. This
816 /// can enable more aggressive transformations for division and remainder than
817 /// would typically be allowed using throughput or size cost models.
818 bool hasDivRemOp(Type *DataType, bool IsSigned) const;
819
820 /// Return true if the given instruction (assumed to be a memory access
821 /// instruction) has a volatile variant. If that's the case then we can avoid
822 /// addrspacecast to generic AS for volatile loads/stores. Default
823 /// implementation returns false, which prevents address space inference for
824 /// volatile loads/stores.
825 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
826
827 /// Return true if target doesn't mind addresses in vectors.
828 bool prefersVectorizedAddressing() const;
829
830 /// Return the cost of the scaling factor used in the addressing
831 /// mode represented by AM for this target, for a load/store
832 /// of the specified type.
833 /// If the AM is supported, the return value must be >= 0.
834 /// If the AM is not supported, it returns a negative value.
835 /// TODO: Handle pre/postinc as well.
836 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
837 int64_t BaseOffset, bool HasBaseReg,
838 int64_t Scale,
839 unsigned AddrSpace = 0) const;
840
841 /// Return true if the loop strength reduce pass should make
842 /// Instruction* based TTI queries to isLegalAddressingMode(). This is
843 /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
844 /// immediate offset and no index register.
845 bool LSRWithInstrQueries() const;
846
847 /// Return true if it's free to truncate a value of type Ty1 to type
848 /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
849 /// by referencing its sub-register AX.
850 bool isTruncateFree(Type *Ty1, Type *Ty2) const;
851
852 /// Return true if it is profitable to hoist instruction in the
853 /// then/else to before if.
854 bool isProfitableToHoist(Instruction *I) const;
855
856 bool useAA() const;
857
858 /// Return true if this type is legal.
859 bool isTypeLegal(Type *Ty) const;
860
861 /// Returns the estimated number of registers required to represent \p Ty.
862 unsigned getRegUsageForType(Type *Ty) const;
863
864 /// Return true if switches should be turned into lookup tables for the
865 /// target.
866 bool shouldBuildLookupTables() const;
867
868 /// Return true if switches should be turned into lookup tables
869 /// containing this constant value for the target.
870 bool shouldBuildLookupTablesForConstant(Constant *C) const;
871
872 /// Return true if lookup tables should be turned into relative lookup tables.
873 bool shouldBuildRelLookupTables() const;
874
875 /// Return true if the input function which is cold at all call sites,
876 /// should use coldcc calling convention.
877 bool useColdCCForColdCall(Function &F) const;
878
879 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
880 /// are set if the demanded result elements need to be inserted and/or
881 /// extracted from vectors.
882 InstructionCost getScalarizationOverhead(VectorType *Ty,
883 const APInt &DemandedElts,
884 bool Insert, bool Extract,
885 TTI::TargetCostKind CostKind) const;
886
887 /// Estimate the overhead of scalarizing an instructions unique
888 /// non-constant operands. The (potentially vector) types to use for each of
889 /// argument are passes via Tys.
890 InstructionCost
891 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
892 ArrayRef<Type *> Tys,
893 TTI::TargetCostKind CostKind) const;
894
895 /// If target has efficient vector element load/store instructions, it can
896 /// return true here so that insertion/extraction costs are not added to
897 /// the scalarization cost of a load/store.
898 bool supportsEfficientVectorElementLoadStore() const;
899
900 /// If the target supports tail calls.
901 bool supportsTailCalls() const;
902
903 /// If target supports tail call on \p CB
904 bool supportsTailCallFor(const CallBase *CB) const;
905
906 /// Don't restrict interleaved unrolling to small loops.
907 bool enableAggressiveInterleaving(bool LoopHasReductions) const;
908
909 /// Returns options for expansion of memcmp. IsZeroCmp is
910 // true if this is the expansion of memcmp(p1, p2, s) == 0.
911 struct MemCmpExpansionOptions {
912 // Return true if memcmp expansion is enabled.
913 operator bool() const { return MaxNumLoads > 0; }
914
915 // Maximum number of load operations.
916 unsigned MaxNumLoads = 0;
917
918 // The list of available load sizes (in bytes), sorted in decreasing order.
919 SmallVector<unsigned, 8> LoadSizes;
920
921 // For memcmp expansion when the memcmp result is only compared equal or
922 // not-equal to 0, allow up to this number of load pairs per block. As an
923 // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
924 // a0 = load2bytes &a[0]
925 // b0 = load2bytes &b[0]
926 // a2 = load1byte &a[2]
927 // b2 = load1byte &b[2]
928 // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
929 unsigned NumLoadsPerBlock = 1;
930
931 // Set to true to allow overlapping loads. For example, 7-byte compares can
932 // be done with two 4-byte compares instead of 4+2+1-byte compares. This
933 // requires all loads in LoadSizes to be doable in an unaligned way.
934 bool AllowOverlappingLoads = false;
935
936 // Sometimes, the amount of data that needs to be compared is smaller than
937 // the standard register size, but it cannot be loaded with just one load
938 // instruction. For example, if the size of the memory comparison is 6
939 // bytes, we can handle it more efficiently by loading all 6 bytes in a
940 // single block and generating an 8-byte number, instead of generating two
941 // separate blocks with conditional jumps for 4 and 2 byte loads. This
942 // approach simplifies the process and produces the comparison result as
943 // normal. This array lists the allowed sizes of memcmp tails that can be
944 // merged into one block
945 SmallVector<unsigned, 4> AllowedTailExpansions;
946 };
947 MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
948 bool IsZeroCmp) const;
949
950 /// Should the Select Optimization pass be enabled and ran.
951 bool enableSelectOptimize() const;
952
953 /// Should the Select Optimization pass treat the given instruction like a
954 /// select, potentially converting it to a conditional branch. This can
955 /// include select-like instructions like or(zext(c), x) that can be converted
956 /// to selects.
957 bool shouldTreatInstructionLikeSelect(const Instruction *I) const;
958
959 /// Enable matching of interleaved access groups.
960 bool enableInterleavedAccessVectorization() const;
961
962 /// Enable matching of interleaved access groups that contain predicated
963 /// accesses or gaps and therefore vectorized using masked
964 /// vector loads/stores.
965 bool enableMaskedInterleavedAccessVectorization() const;
966
967 /// Indicate that it is potentially unsafe to automatically vectorize
968 /// floating-point operations because the semantics of vector and scalar
969 /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
970 /// does not support IEEE-754 denormal numbers, while depending on the
971 /// platform, scalar floating-point math does.
972 /// This applies to floating-point math operations and calls, not memory
973 /// operations, shuffles, or casts.
974 bool isFPVectorizationPotentiallyUnsafe() const;
975
976 /// Determine if the target supports unaligned memory accesses.
977 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
978 unsigned AddressSpace = 0,
979 Align Alignment = Align(1),
980 unsigned *Fast = nullptr) const;
981
982 /// Return hardware support for population count.
983 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
984
985 /// Return true if the hardware has a fast square-root instruction.
986 bool haveFastSqrt(Type *Ty) const;
987
988 /// Return true if the cost of the instruction is too high to speculatively
989 /// execute and should be kept behind a branch.
990 /// This normally just wraps around a getInstructionCost() call, but some
991 /// targets might report a low TCK_SizeAndLatency value that is incompatible
992 /// with the fixed TCC_Expensive value.
993 /// NOTE: This assumes the instruction passes isSafeToSpeculativelyExecute().
994 bool isExpensiveToSpeculativelyExecute(const Instruction *I) const;
995
996 /// Return true if it is faster to check if a floating-point value is NaN
997 /// (or not-NaN) versus a comparison against a constant FP zero value.
998 /// Targets should override this if materializing a 0.0 for comparison is
999 /// generally as cheap as checking for ordered/unordered.
1000 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
1001
1002 /// Return the expected cost of supporting the floating point operation
1003 /// of the specified type.
1004 InstructionCost getFPOpCost(Type *Ty) const;
1005
1006 /// Return the expected cost of materializing for the given integer
1007 /// immediate of the specified type.
1008 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
1009 TargetCostKind CostKind) const;
1010
1011 /// Return the expected cost of materialization for the given integer
1012 /// immediate of the specified type for a given instruction. The cost can be
1013 /// zero if the immediate can be folded into the specified instruction.
1014 InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1015 const APInt &Imm, Type *Ty,
1016 TargetCostKind CostKind,
1017 Instruction *Inst = nullptr) const;
1018 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
1019 const APInt &Imm, Type *Ty,
1020 TargetCostKind CostKind) const;
1021
1022 /// Return the expected cost for the given integer when optimising
1023 /// for size. This is different than the other integer immediate cost
1024 /// functions in that it is subtarget agnostic. This is useful when you e.g.
1025 /// target one ISA such as Aarch32 but smaller encodings could be possible
1026 /// with another such as Thumb. This return value is used as a penalty when
1027 /// the total costs for a constant is calculated (the bigger the cost, the
1028 /// more beneficial constant hoisting is).
1029 InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1030 const APInt &Imm, Type *Ty) const;
1031
1032 /// It can be advantageous to detach complex constants from their uses to make
1033 /// their generation cheaper. This hook allows targets to report when such
1034 /// transformations might negatively effect the code generation of the
1035 /// underlying operation. The motivating example is divides whereby hoisting
1036 /// constants prevents the code generator's ability to transform them into
1037 /// combinations of simpler operations.
1038 bool preferToKeepConstantsAttached(const Instruction &Inst,
1039 const Function &Fn) const;
1040
1041 /// @}
1042
1043 /// \name Vector Target Information
1044 /// @{
1045
1046 /// The various kinds of shuffle patterns for vector queries.
1047 enum ShuffleKind {
1048 SK_Broadcast, ///< Broadcast element 0 to all other elements.
1049 SK_Reverse, ///< Reverse the order of the vector.
1050 SK_Select, ///< Selects elements from the corresponding lane of
1051 ///< either source operand. This is equivalent to a
1052 ///< vector select with a constant condition operand.
1053 SK_Transpose, ///< Transpose two vectors.
1054 SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
1055 SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
1056 SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
1057 ///< with any shuffle mask.
1058 SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
1059 ///< shuffle mask.
1060 SK_Splice ///< Concatenates elements from the first input vector
1061 ///< with elements of the second input vector. Returning
1062 ///< a vector of the same type as the input vectors.
1063 ///< Index indicates start offset in first input vector.
1064 };
1065
1066 /// Additional information about an operand's possible values.
1067 enum OperandValueKind {
1068 OK_AnyValue, // Operand can have any value.
1069 OK_UniformValue, // Operand is uniform (splat of a value).
1070 OK_UniformConstantValue, // Operand is uniform constant.
1071 OK_NonUniformConstantValue // Operand is a non uniform constant value.
1072 };
1073
1074 /// Additional properties of an operand's values.
1075 enum OperandValueProperties {
1076 OP_None = 0,
1077 OP_PowerOf2 = 1,
1078 OP_NegatedPowerOf2 = 2,
1079 };
1080
1081 // Describe the values an operand can take. We're in the process
1082 // of migrating uses of OperandValueKind and OperandValueProperties
1083 // to use this class, and then will change the internal representation.
1084 struct OperandValueInfo {
1085 OperandValueKind Kind = OK_AnyValue;
1086 OperandValueProperties Properties = OP_None;
1087
1088 bool isConstant() const {
1089 return Kind == OK_UniformConstantValue || Kind == OK_NonUniformConstantValue;
1090 }
1091 bool isUniform() const {
1092 return Kind == OK_UniformConstantValue || Kind == OK_UniformValue;
1093 }
1094 bool isPowerOf2() const {
1095 return Properties == OP_PowerOf2;
1096 }
1097 bool isNegatedPowerOf2() const {
1098 return Properties == OP_NegatedPowerOf2;
1099 }
1100
1101 OperandValueInfo getNoProps() const {
1102 return {.Kind: Kind, .Properties: OP_None};
1103 }
1104 };
1105
1106 /// \return the number of registers in the target-provided register class.
1107 unsigned getNumberOfRegisters(unsigned ClassID) const;
1108
1109 /// \return the target-provided register class ID for the provided type,
1110 /// accounting for type promotion and other type-legalization techniques that
1111 /// the target might apply. However, it specifically does not account for the
1112 /// scalarization or splitting of vector types. Should a vector type require
1113 /// scalarization or splitting into multiple underlying vector registers, that
1114 /// type should be mapped to a register class containing no registers.
1115 /// Specifically, this is designed to provide a simple, high-level view of the
1116 /// register allocation later performed by the backend. These register classes
1117 /// don't necessarily map onto the register classes used by the backend.
1118 /// FIXME: It's not currently possible to determine how many registers
1119 /// are used by the provided type.
1120 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
1121
1122 /// \return the target-provided register class name
1123 const char *getRegisterClassName(unsigned ClassID) const;
1124
1125 enum RegisterKind { RGK_Scalar, RGK_FixedWidthVector, RGK_ScalableVector };
1126
1127 /// \return The width of the largest scalar or vector register type.
1128 TypeSize getRegisterBitWidth(RegisterKind K) const;
1129
1130 /// \return The width of the smallest vector register type.
1131 unsigned getMinVectorRegisterBitWidth() const;
1132
1133 /// \return The maximum value of vscale if the target specifies an
1134 /// architectural maximum vector length, and std::nullopt otherwise.
1135 std::optional<unsigned> getMaxVScale() const;
1136
1137 /// \return the value of vscale to tune the cost model for.
1138 std::optional<unsigned> getVScaleForTuning() const;
1139
1140 /// \return true if vscale is known to be a power of 2
1141 bool isVScaleKnownToBeAPowerOfTwo() const;
1142
1143 /// \return True if the vectorization factor should be chosen to
1144 /// make the vector of the smallest element type match the size of a
1145 /// vector register. For wider element types, this could result in
1146 /// creating vectors that span multiple vector registers.
1147 /// If false, the vectorization factor will be chosen based on the
1148 /// size of the widest element type.
1149 /// \p K Register Kind for vectorization.
1150 bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const;
1151
1152 /// \return The minimum vectorization factor for types of given element
1153 /// bit width, or 0 if there is no minimum VF. The returned value only
1154 /// applies when shouldMaximizeVectorBandwidth returns true.
1155 /// If IsScalable is true, the returned ElementCount must be a scalable VF.
1156 ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
1157
1158 /// \return The maximum vectorization factor for types of given element
1159 /// bit width and opcode, or 0 if there is no maximum VF.
1160 /// Currently only used by the SLP vectorizer.
1161 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
1162
1163 /// \return The minimum vectorization factor for the store instruction. Given
1164 /// the initial estimation of the minimum vector factor and store value type,
1165 /// it tries to find possible lowest VF, which still might be profitable for
1166 /// the vectorization.
1167 /// \param VF Initial estimation of the minimum vector factor.
1168 /// \param ScalarMemTy Scalar memory type of the store operation.
1169 /// \param ScalarValTy Scalar type of the stored value.
1170 /// Currently only used by the SLP vectorizer.
1171 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1172 Type *ScalarValTy) const;
1173
1174 /// \return True if it should be considered for address type promotion.
1175 /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
1176 /// profitable without finding other extensions fed by the same input.
1177 bool shouldConsiderAddressTypePromotion(
1178 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
1179
1180 /// \return The size of a cache line in bytes.
1181 unsigned getCacheLineSize() const;
1182
1183 /// The possible cache levels
1184 enum class CacheLevel {
1185 L1D, // The L1 data cache
1186 L2D, // The L2 data cache
1187
1188 // We currently do not model L3 caches, as their sizes differ widely between
1189 // microarchitectures. Also, we currently do not have a use for L3 cache
1190 // size modeling yet.
1191 };
1192
1193 /// \return The size of the cache level in bytes, if available.
1194 std::optional<unsigned> getCacheSize(CacheLevel Level) const;
1195
1196 /// \return The associativity of the cache level, if available.
1197 std::optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
1198
1199 /// \return The minimum architectural page size for the target.
1200 std::optional<unsigned> getMinPageSize() const;
1201
1202 /// \return How much before a load we should place the prefetch
1203 /// instruction. This is currently measured in number of
1204 /// instructions.
1205 unsigned getPrefetchDistance() const;
1206
1207 /// Some HW prefetchers can handle accesses up to a certain constant stride.
1208 /// Sometimes prefetching is beneficial even below the HW prefetcher limit,
1209 /// and the arguments provided are meant to serve as a basis for deciding this
1210 /// for a particular loop.
1211 ///
1212 /// \param NumMemAccesses Number of memory accesses in the loop.
1213 /// \param NumStridedMemAccesses Number of the memory accesses that
1214 /// ScalarEvolution could find a known stride
1215 /// for.
1216 /// \param NumPrefetches Number of software prefetches that will be
1217 /// emitted as determined by the addresses
1218 /// involved and the cache line size.
1219 /// \param HasCall True if the loop contains a call.
1220 ///
1221 /// \return This is the minimum stride in bytes where it makes sense to start
1222 /// adding SW prefetches. The default is 1, i.e. prefetch with any
1223 /// stride.
1224 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1225 unsigned NumStridedMemAccesses,
1226 unsigned NumPrefetches, bool HasCall) const;
1227
1228 /// \return The maximum number of iterations to prefetch ahead. If
1229 /// the required number of iterations is more than this number, no
1230 /// prefetching is performed.
1231 unsigned getMaxPrefetchIterationsAhead() const;
1232
1233 /// \return True if prefetching should also be done for writes.
1234 bool enableWritePrefetching() const;
1235
1236 /// \return if target want to issue a prefetch in address space \p AS.
1237 bool shouldPrefetchAddressSpace(unsigned AS) const;
1238
1239 /// \return The maximum interleave factor that any transform should try to
1240 /// perform for this target. This number depends on the level of parallelism
1241 /// and the number of execution units in the CPU.
1242 unsigned getMaxInterleaveFactor(ElementCount VF) const;
1243
1244 /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
1245 static OperandValueInfo getOperandInfo(const Value *V);
1246
1247 /// This is an approximation of reciprocal throughput of a math/logic op.
1248 /// A higher cost indicates less expected throughput.
1249 /// From Agner Fog's guides, reciprocal throughput is "the average number of
1250 /// clock cycles per instruction when the instructions are not part of a
1251 /// limiting dependency chain."
1252 /// Therefore, costs should be scaled to account for multiple execution units
1253 /// on the target that can process this type of instruction. For example, if
1254 /// there are 5 scalar integer units and 2 vector integer units that can
1255 /// calculate an 'add' in a single cycle, this model should indicate that the
1256 /// cost of the vector add instruction is 2.5 times the cost of the scalar
1257 /// add instruction.
1258 /// \p Args is an optional argument which holds the instruction operands
1259 /// values so the TTI can analyze those values searching for special
1260 /// cases or optimizations based on those values.
1261 /// \p CxtI is the optional original context instruction, if one exists, to
1262 /// provide even more information.
1263 /// \p TLibInfo is used to search for platform specific vector library
1264 /// functions for instructions that might be converted to calls (e.g. frem).
1265 InstructionCost getArithmeticInstrCost(
1266 unsigned Opcode, Type *Ty,
1267 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1268 TTI::OperandValueInfo Opd1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
1269 TTI::OperandValueInfo Opd2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
1270 ArrayRef<const Value *> Args = std::nullopt,
1271 const Instruction *CxtI = nullptr,
1272 const TargetLibraryInfo *TLibInfo = nullptr) const;
1273
1274 /// Returns the cost estimation for alternating opcode pattern that can be
1275 /// lowered to a single instruction on the target. In X86 this is for the
1276 /// addsub instruction which corrsponds to a Shuffle + Fadd + FSub pattern in
1277 /// IR. This function expects two opcodes: \p Opcode1 and \p Opcode2 being
1278 /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
1279 /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
1280 /// \p VecTy is the vector type of the instruction to be generated.
1281 InstructionCost getAltInstrCost(
1282 VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
1283 const SmallBitVector &OpcodeMask,
1284 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1285
1286 /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
1287 /// The exact mask may be passed as Mask, or else the array will be empty.
1288 /// The index and subtype parameters are used by the subvector insertion and
1289 /// extraction shuffle kinds to show the insert/extract point and the type of
1290 /// the subvector being inserted/extracted. The operands of the shuffle can be
1291 /// passed through \p Args, which helps improve the cost estimation in some
1292 /// cases, like in broadcast loads.
1293 /// NOTE: For subvector extractions Tp represents the source type.
1294 InstructionCost getShuffleCost(
1295 ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask = std::nullopt,
1296 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, int Index = 0,
1297 VectorType *SubTp = nullptr, ArrayRef<const Value *> Args = std::nullopt,
1298 const Instruction *CxtI = nullptr) const;
1299
1300 /// Represents a hint about the context in which a cast is used.
1301 ///
1302 /// For zext/sext, the context of the cast is the operand, which must be a
1303 /// load of some kind. For trunc, the context is of the cast is the single
1304 /// user of the instruction, which must be a store of some kind.
1305 ///
1306 /// This enum allows the vectorizer to give getCastInstrCost an idea of the
1307 /// type of cast it's dealing with, as not every cast is equal. For instance,
1308 /// the zext of a load may be free, but the zext of an interleaving load can
1309 //// be (very) expensive!
1310 ///
1311 /// See \c getCastContextHint to compute a CastContextHint from a cast
1312 /// Instruction*. Callers can use it if they don't need to override the
1313 /// context and just want it to be calculated from the instruction.
1314 ///
1315 /// FIXME: This handles the types of load/store that the vectorizer can
1316 /// produce, which are the cases where the context instruction is most
1317 /// likely to be incorrect. There are other situations where that can happen
1318 /// too, which might be handled here but in the long run a more general
1319 /// solution of costing multiple instructions at the same times may be better.
1320 enum class CastContextHint : uint8_t {
1321 None, ///< The cast is not used with a load/store of any kind.
1322 Normal, ///< The cast is used with a normal load/store.
1323 Masked, ///< The cast is used with a masked load/store.
1324 GatherScatter, ///< The cast is used with a gather/scatter.
1325 Interleave, ///< The cast is used with an interleaved load/store.
1326 Reversed, ///< The cast is used with a reversed load/store.
1327 };
1328
1329 /// Calculates a CastContextHint from \p I.
1330 /// This should be used by callers of getCastInstrCost if they wish to
1331 /// determine the context from some instruction.
1332 /// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
1333 /// or if it's another type of cast.
1334 static CastContextHint getCastContextHint(const Instruction *I);
1335
1336 /// \return The expected cost of cast instructions, such as bitcast, trunc,
1337 /// zext, etc. If there is an existing instruction that holds Opcode, it
1338 /// may be passed in the 'I' parameter.
1339 InstructionCost
1340 getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1341 TTI::CastContextHint CCH,
1342 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
1343 const Instruction *I = nullptr) const;
1344
1345 /// \return The expected cost of a sign- or zero-extended vector extract. Use
1346 /// Index = -1 to indicate that there is no information about the index value.
1347 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1348 VectorType *VecTy,
1349 unsigned Index) const;
1350
1351 /// \return The expected cost of control-flow related instructions such as
1352 /// Phi, Ret, Br, Switch.
1353 InstructionCost
1354 getCFInstrCost(unsigned Opcode,
1355 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
1356 const Instruction *I = nullptr) const;
1357
1358 /// \returns The expected cost of compare and select instructions. If there
1359 /// is an existing instruction that holds Opcode, it may be passed in the
1360 /// 'I' parameter. The \p VecPred parameter can be used to indicate the select
1361 /// is using a compare with the specified predicate as condition. When vector
1362 /// types are passed, \p VecPred must be used for all lanes.
1363 InstructionCost
1364 getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1365 CmpInst::Predicate VecPred,
1366 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1367 const Instruction *I = nullptr) const;
1368
1369 /// \return The expected cost of vector Insert and Extract.
1370 /// Use -1 to indicate that there is no information on the index value.
1371 /// This is used when the instruction is not available; a typical use
1372 /// case is to provision the cost of vectorization/scalarization in
1373 /// vectorizer passes.
1374 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1375 TTI::TargetCostKind CostKind,
1376 unsigned Index = -1, Value *Op0 = nullptr,
1377 Value *Op1 = nullptr) const;
1378
1379 /// \return The expected cost of vector Insert and Extract.
1380 /// This is used when instruction is available, and implementation
1381 /// asserts 'I' is not nullptr.
1382 ///
1383 /// A typical suitable use case is cost estimation when vector instruction
1384 /// exists (e.g., from basic blocks during transformation).
1385 InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
1386 TTI::TargetCostKind CostKind,
1387 unsigned Index = -1) const;
1388
1389 /// \return The cost of replication shuffle of \p VF elements typed \p EltTy
1390 /// \p ReplicationFactor times.
1391 ///
1392 /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
1393 /// <0,0,0,1,1,1,2,2,2,3,3,3>
1394 InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
1395 int VF,
1396 const APInt &DemandedDstElts,
1397 TTI::TargetCostKind CostKind);
1398
1399 /// \return The cost of Load and Store instructions.
1400 InstructionCost
1401 getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1402 unsigned AddressSpace,
1403 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1404 OperandValueInfo OpdInfo = {.Kind: OK_AnyValue, .Properties: OP_None},
1405 const Instruction *I = nullptr) const;
1406
1407 /// \return The cost of VP Load and Store instructions.
1408 InstructionCost
1409 getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1410 unsigned AddressSpace,
1411 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1412 const Instruction *I = nullptr) const;
1413
1414 /// \return The cost of masked Load and Store instructions.
1415 InstructionCost getMaskedMemoryOpCost(
1416 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
1417 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1418
1419 /// \return The cost of Gather or Scatter operation
1420 /// \p Opcode - is a type of memory access Load or Store
1421 /// \p DataTy - a vector type of the data to be loaded or stored
1422 /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1423 /// \p VariableMask - true when the memory access is predicated with a mask
1424 /// that is not a compile-time constant
1425 /// \p Alignment - alignment of single element
1426 /// \p I - the optional original context instruction, if one exists, e.g. the
1427 /// load/store to transform or the call to the gather/scatter intrinsic
1428 InstructionCost getGatherScatterOpCost(
1429 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1430 Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1431 const Instruction *I = nullptr) const;
1432
1433 /// \return The cost of strided memory operations.
1434 /// \p Opcode - is a type of memory access Load or Store
1435 /// \p DataTy - a vector type of the data to be loaded or stored
1436 /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1437 /// \p VariableMask - true when the memory access is predicated with a mask
1438 /// that is not a compile-time constant
1439 /// \p Alignment - alignment of single element
1440 /// \p I - the optional original context instruction, if one exists, e.g. the
1441 /// load/store to transform or the call to the gather/scatter intrinsic
1442 InstructionCost getStridedMemoryOpCost(
1443 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1444 Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1445 const Instruction *I = nullptr) const;
1446
1447 /// \return The cost of the interleaved memory operation.
1448 /// \p Opcode is the memory operation code
1449 /// \p VecTy is the vector type of the interleaved access.
1450 /// \p Factor is the interleave factor
1451 /// \p Indices is the indices for interleaved load members (as interleaved
1452 /// load allows gaps)
1453 /// \p Alignment is the alignment of the memory operation
1454 /// \p AddressSpace is address space of the pointer.
1455 /// \p UseMaskForCond indicates if the memory access is predicated.
1456 /// \p UseMaskForGaps indicates if gaps should be masked.
1457 InstructionCost getInterleavedMemoryOpCost(
1458 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1459 Align Alignment, unsigned AddressSpace,
1460 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1461 bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
1462
1463 /// A helper function to determine the type of reduction algorithm used
1464 /// for a given \p Opcode and set of FastMathFlags \p FMF.
1465 static bool requiresOrderedReduction(std::optional<FastMathFlags> FMF) {
1466 return FMF && !(*FMF).allowReassoc();
1467 }
1468
1469 /// Calculate the cost of vector reduction intrinsics.
1470 ///
1471 /// This is the cost of reducing the vector value of type \p Ty to a scalar
1472 /// value using the operation denoted by \p Opcode. The FastMathFlags
1473 /// parameter \p FMF indicates what type of reduction we are performing:
1474 /// 1. Tree-wise. This is the typical 'fast' reduction performed that
1475 /// involves successively splitting a vector into half and doing the
1476 /// operation on the pair of halves until you have a scalar value. For
1477 /// example:
1478 /// (v0, v1, v2, v3)
1479 /// ((v0+v2), (v1+v3), undef, undef)
1480 /// ((v0+v2+v1+v3), undef, undef, undef)
1481 /// This is the default behaviour for integer operations, whereas for
1482 /// floating point we only do this if \p FMF indicates that
1483 /// reassociation is allowed.
1484 /// 2. Ordered. For a vector with N elements this involves performing N
1485 /// operations in lane order, starting with an initial scalar value, i.e.
1486 /// result = InitVal + v0
1487 /// result = result + v1
1488 /// result = result + v2
1489 /// result = result + v3
1490 /// This is only the case for FP operations and when reassociation is not
1491 /// allowed.
1492 ///
1493 InstructionCost getArithmeticReductionCost(
1494 unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
1495 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1496
1497 InstructionCost getMinMaxReductionCost(
1498 Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF = FastMathFlags(),
1499 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1500
1501 /// Calculate the cost of an extended reduction pattern, similar to
1502 /// getArithmeticReductionCost of an Add reduction with multiply and optional
1503 /// extensions. This is the cost of as:
1504 /// ResTy vecreduce.add(mul (A, B)).
1505 /// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)).
1506 InstructionCost getMulAccReductionCost(
1507 bool IsUnsigned, Type *ResTy, VectorType *Ty,
1508 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1509
1510 /// Calculate the cost of an extended reduction pattern, similar to
1511 /// getArithmeticReductionCost of a reduction with an extension.
1512 /// This is the cost of as:
1513 /// ResTy vecreduce.opcode(ext(Ty A)).
1514 InstructionCost getExtendedReductionCost(
1515 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1516 FastMathFlags FMF,
1517 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1518
1519 /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
1520 /// Three cases are handled: 1. scalar instruction 2. vector instruction
1521 /// 3. scalar instruction which is to be vectorized.
1522 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
1523 TTI::TargetCostKind CostKind) const;
1524
1525 /// \returns The cost of Call instructions.
1526 InstructionCost getCallInstrCost(
1527 Function *F, Type *RetTy, ArrayRef<Type *> Tys,
1528 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;
1529
1530 /// \returns The number of pieces into which the provided type must be
1531 /// split during legalization. Zero is returned when the answer is unknown.
1532 unsigned getNumberOfParts(Type *Tp) const;
1533
1534 /// \returns The cost of the address computation. For most targets this can be
1535 /// merged into the instruction indexing mode. Some targets might want to
1536 /// distinguish between address computation for memory operations on vector
1537 /// types and scalar types. Such targets should override this function.
1538 /// The 'SE' parameter holds pointer for the scalar evolution object which
1539 /// is used in order to get the Ptr step value in case of constant stride.
1540 /// The 'Ptr' parameter holds SCEV of the access pointer.
1541 InstructionCost getAddressComputationCost(Type *Ty,
1542 ScalarEvolution *SE = nullptr,
1543 const SCEV *Ptr = nullptr) const;
1544
1545 /// \returns The cost, if any, of keeping values of the given types alive
1546 /// over a callsite.
1547 ///
1548 /// Some types may require the use of register classes that do not have
1549 /// any callee-saved registers, so would require a spill and fill.
1550 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
1551
1552 /// \returns True if the intrinsic is a supported memory intrinsic. Info
1553 /// will contain additional information - whether the intrinsic may write
1554 /// or read to memory, volatility and the pointer. Info is undefined
1555 /// if false is returned.
1556 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
1557
1558 /// \returns The maximum element size, in bytes, for an element
1559 /// unordered-atomic memory intrinsic.
1560 unsigned getAtomicMemIntrinsicMaxElementSize() const;
1561
1562 /// \returns A value which is the result of the given memory intrinsic. New
1563 /// instructions may be created to extract the result from the given intrinsic
1564 /// memory operation. Returns nullptr if the target cannot create a result
1565 /// from the given intrinsic.
1566 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1567 Type *ExpectedType) const;
1568
1569 /// \returns The type to use in a loop expansion of a memcpy call.
1570 Type *getMemcpyLoopLoweringType(
1571 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
1572 unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
1573 std::optional<uint32_t> AtomicElementSize = std::nullopt) const;
1574
1575 /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1576 /// \param RemainingBytes The number of bytes to copy.
1577 ///
1578 /// Calculates the operand types to use when copying \p RemainingBytes of
1579 /// memory, where source and destination alignments are \p SrcAlign and
1580 /// \p DestAlign respectively.
1581 void getMemcpyLoopResidualLoweringType(
1582 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1583 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1584 unsigned SrcAlign, unsigned DestAlign,
1585 std::optional<uint32_t> AtomicCpySize = std::nullopt) const;
1586
1587 /// \returns True if the two functions have compatible attributes for inlining
1588 /// purposes.
1589 bool areInlineCompatible(const Function *Caller,
1590 const Function *Callee) const;
1591
1592 /// Returns a penalty for invoking call \p Call in \p F.
1593 /// For example, if a function F calls a function G, which in turn calls
1594 /// function H, then getInlineCallPenalty(F, H()) would return the
1595 /// penalty of calling H from F, e.g. after inlining G into F.
1596 /// \p DefaultCallPenalty is passed to give a default penalty that
1597 /// the target can amend or override.
1598 unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
1599 unsigned DefaultCallPenalty) const;
1600
1601 /// \returns True if the caller and callee agree on how \p Types will be
1602 /// passed to or returned from the callee.
1603 /// to the callee.
1604 /// \param Types List of types to check.
1605 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
1606 const ArrayRef<Type *> &Types) const;
1607
1608 /// The type of load/store indexing.
1609 enum MemIndexedMode {
1610 MIM_Unindexed, ///< No indexing.
1611 MIM_PreInc, ///< Pre-incrementing.
1612 MIM_PreDec, ///< Pre-decrementing.
1613 MIM_PostInc, ///< Post-incrementing.
1614 MIM_PostDec ///< Post-decrementing.
1615 };
1616
1617 /// \returns True if the specified indexed load for the given type is legal.
1618 bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1619
1620 /// \returns True if the specified indexed store for the given type is legal.
1621 bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1622
1623 /// \returns The bitwidth of the largest vector type that should be used to
1624 /// load/store in the given address space.
1625 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1626
1627 /// \returns True if the load instruction is legal to vectorize.
1628 bool isLegalToVectorizeLoad(LoadInst *LI) const;
1629
1630 /// \returns True if the store instruction is legal to vectorize.
1631 bool isLegalToVectorizeStore(StoreInst *SI) const;
1632
1633 /// \returns True if it is legal to vectorize the given load chain.
1634 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
1635 unsigned AddrSpace) const;
1636
1637 /// \returns True if it is legal to vectorize the given store chain.
1638 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
1639 unsigned AddrSpace) const;
1640
1641 /// \returns True if it is legal to vectorize the given reduction kind.
1642 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
1643 ElementCount VF) const;
1644
1645 /// \returns True if the given type is supported for scalable vectors
1646 bool isElementTypeLegalForScalableVector(Type *Ty) const;
1647
1648 /// \returns The new vector factor value if the target doesn't support \p
1649 /// SizeInBytes loads or has a better vector factor.
1650 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1651 unsigned ChainSizeInBytes,
1652 VectorType *VecTy) const;
1653
1654 /// \returns The new vector factor value if the target doesn't support \p
1655 /// SizeInBytes stores or has a better vector factor.
1656 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1657 unsigned ChainSizeInBytes,
1658 VectorType *VecTy) const;
1659
1660 /// Flags describing the kind of vector reduction.
1661 struct ReductionFlags {
1662 ReductionFlags() = default;
1663 bool IsMaxOp =
1664 false; ///< If the op a min/max kind, true if it's a max operation.
1665 bool IsSigned = false; ///< Whether the operation is a signed int reduction.
1666 bool NoNaN =
1667 false; ///< If op is an fp min/max, whether NaNs may be present.
1668 };
1669
1670 /// \returns True if the target prefers reductions in loop.
1671 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1672 ReductionFlags Flags) const;
1673
1674 /// \returns True if the target prefers reductions select kept in the loop
1675 /// when tail folding. i.e.
1676 /// loop:
1677 /// p = phi (0, s)
1678 /// a = add (p, x)
1679 /// s = select (mask, a, p)
1680 /// vecreduce.add(s)
1681 ///
1682 /// As opposed to the normal scheme of p = phi (0, a) which allows the select
1683 /// to be pulled out of the loop. If the select(.., add, ..) can be predicated
1684 /// by the target, this can lead to cleaner code generation.
1685 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1686 ReductionFlags Flags) const;
1687
1688 /// Return true if the loop vectorizer should consider vectorizing an
1689 /// otherwise scalar epilogue loop.
1690 bool preferEpilogueVectorization() const;
1691
1692 /// \returns True if the target wants to expand the given reduction intrinsic
1693 /// into a shuffle sequence.
1694 bool shouldExpandReduction(const IntrinsicInst *II) const;
1695
1696 /// \returns the size cost of rematerializing a GlobalValue address relative
1697 /// to a stack reload.
1698 unsigned getGISelRematGlobalCost() const;
1699
1700 /// \returns the lower bound of a trip count to decide on vectorization
1701 /// while tail-folding.
1702 unsigned getMinTripCountTailFoldingThreshold() const;
1703
1704 /// \returns True if the target supports scalable vectors.
1705 bool supportsScalableVectors() const;
1706
1707 /// \return true when scalable vectorization is preferred.
1708 bool enableScalableVectorization() const;
1709
1710 /// \name Vector Predication Information
1711 /// @{
1712 /// Whether the target supports the %evl parameter of VP intrinsic efficiently
1713 /// in hardware, for the given opcode and type/alignment. (see LLVM Language
1714 /// Reference - "Vector Predication Intrinsics").
1715 /// Use of %evl is discouraged when that is not the case.
1716 bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1717 Align Alignment) const;
1718
1719 struct VPLegalization {
1720 enum VPTransform {
1721 // keep the predicating parameter
1722 Legal = 0,
1723 // where legal, discard the predicate parameter
1724 Discard = 1,
1725 // transform into something else that is also predicating
1726 Convert = 2
1727 };
1728
1729 // How to transform the EVL parameter.
1730 // Legal: keep the EVL parameter as it is.
1731 // Discard: Ignore the EVL parameter where it is safe to do so.
1732 // Convert: Fold the EVL into the mask parameter.
1733 VPTransform EVLParamStrategy;
1734
1735 // How to transform the operator.
1736 // Legal: The target supports this operator.
1737 // Convert: Convert this to a non-VP operation.
1738 // The 'Discard' strategy is invalid.
1739 VPTransform OpStrategy;
1740
1741 bool shouldDoNothing() const {
1742 return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
1743 }
1744 VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)
1745 : EVLParamStrategy(EVLParamStrategy), OpStrategy(OpStrategy) {}
1746 };
1747
1748 /// \returns How the target needs this vector-predicated operation to be
1749 /// transformed.
1750 VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const;
1751 /// @}
1752
1753 /// \returns Whether a 32-bit branch instruction is available in Arm or Thumb
1754 /// state.
1755 ///
1756 /// Used by the LowerTypeTests pass, which constructs an IR inline assembler
1757 /// node containing a jump table in a format suitable for the target, so it
1758 /// needs to know what format of jump table it can legally use.
1759 ///
1760 /// For non-Arm targets, this function isn't used. It defaults to returning
1761 /// false, but it shouldn't matter what it returns anyway.
1762 bool hasArmWideBranch(bool Thumb) const;
1763
1764 /// \return The maximum number of function arguments the target supports.
1765 unsigned getMaxNumArgs() const;
1766
1767 /// @}
1768
1769private:
1770 /// The abstract base class used to type erase specific TTI
1771 /// implementations.
1772 class Concept;
1773
1774 /// The template model for the base class which wraps a concrete
1775 /// implementation in a type erased interface.
1776 template <typename T> class Model;
1777
1778 std::unique_ptr<Concept> TTIImpl;
1779};
1780
1781class TargetTransformInfo::Concept {
1782public:
1783 virtual ~Concept() = 0;
1784 virtual const DataLayout &getDataLayout() const = 0;
1785 virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
1786 ArrayRef<const Value *> Operands,
1787 Type *AccessType,
1788 TTI::TargetCostKind CostKind) = 0;
1789 virtual InstructionCost
1790 getPointersChainCost(ArrayRef<const Value *> Ptrs, const Value *Base,
1791 const TTI::PointersChainInfo &Info, Type *AccessTy,
1792 TTI::TargetCostKind CostKind) = 0;
1793 virtual unsigned getInliningThresholdMultiplier() const = 0;
1794 virtual unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const = 0;
1795 virtual unsigned
1796 getInliningCostBenefitAnalysisProfitableMultiplier() const = 0;
1797 virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
1798 virtual int getInlinerVectorBonusPercent() const = 0;
1799 virtual unsigned getCallerAllocaCost(const CallBase *CB,
1800 const AllocaInst *AI) const = 0;
1801 virtual InstructionCost getMemcpyCost(const Instruction *I) = 0;
1802 virtual uint64_t getMaxMemIntrinsicInlineSizeThreshold() const = 0;
1803 virtual unsigned
1804 getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
1805 ProfileSummaryInfo *PSI,
1806 BlockFrequencyInfo *BFI) = 0;
1807 virtual InstructionCost getInstructionCost(const User *U,
1808 ArrayRef<const Value *> Operands,
1809 TargetCostKind CostKind) = 0;
1810 virtual BranchProbability getPredictableBranchThreshold() = 0;
1811 virtual bool hasBranchDivergence(const Function *F = nullptr) = 0;
1812 virtual bool isSourceOfDivergence(const Value *V) = 0;
1813 virtual bool isAlwaysUniform(const Value *V) = 0;
1814 virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1815 virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const = 0;
1816 virtual unsigned getFlatAddressSpace() = 0;
1817 virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1818 Intrinsic::ID IID) const = 0;
1819 virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1820 virtual bool
1821 canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0;
1822 virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
1823 virtual bool isSingleThreaded() const = 0;
1824 virtual std::pair<const Value *, unsigned>
1825 getPredicatedAddrSpace(const Value *V) const = 0;
1826 virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
1827 Value *OldV,
1828 Value *NewV) const = 0;
1829 virtual bool isLoweredToCall(const Function *F) = 0;
1830 virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
1831 UnrollingPreferences &UP,
1832 OptimizationRemarkEmitter *ORE) = 0;
1833 virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
1834 PeelingPreferences &PP) = 0;
1835 virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1836 AssumptionCache &AC,
1837 TargetLibraryInfo *LibInfo,
1838 HardwareLoopInfo &HWLoopInfo) = 0;
1839 virtual bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) = 0;
1840 virtual TailFoldingStyle
1841 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) = 0;
1842 virtual std::optional<Instruction *> instCombineIntrinsic(
1843 InstCombiner &IC, IntrinsicInst &II) = 0;
1844 virtual std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
1845 InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask,
1846 KnownBits & Known, bool &KnownBitsComputed) = 0;
1847 virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
1848 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts,
1849 APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
1850 std::function<void(Instruction *, unsigned, APInt, APInt &)>
1851 SimplifyAndSetOp) = 0;
1852 virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1853 virtual bool isLegalAddScalableImmediate(int64_t Imm) = 0;
1854 virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1855 virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1856 int64_t BaseOffset, bool HasBaseReg,
1857 int64_t Scale, unsigned AddrSpace,
1858 Instruction *I,
1859 int64_t ScalableOffset) = 0;
1860 virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
1861 const TargetTransformInfo::LSRCost &C2) = 0;
1862 virtual bool isNumRegsMajorCostOfLSR() = 0;
1863 virtual bool shouldFoldTerminatingConditionAfterLSR() const = 0;
1864 virtual bool isProfitableLSRChainElement(Instruction *I) = 0;
1865 virtual bool canMacroFuseCmp() = 0;
1866 virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
1867 LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1868 TargetLibraryInfo *LibInfo) = 0;
1869 virtual AddressingModeKind
1870 getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const = 0;
1871 virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
1872 virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
1873 virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
1874 virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
1875 virtual bool isLegalBroadcastLoad(Type *ElementTy,
1876 ElementCount NumElements) const = 0;
1877 virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
1878 virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
1879 virtual bool forceScalarizeMaskedGather(VectorType *DataType,
1880 Align Alignment) = 0;
1881 virtual bool forceScalarizeMaskedScatter(VectorType *DataType,
1882 Align Alignment) = 0;
1883 virtual bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) = 0;
1884 virtual bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) = 0;
1885 virtual bool isLegalStridedLoadStore(Type *DataType, Align Alignment) = 0;
1886 virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
1887 unsigned Opcode1,
1888 const SmallBitVector &OpcodeMask) const = 0;
1889 virtual bool enableOrderedReductions() = 0;
1890 virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1891 virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1892 virtual bool prefersVectorizedAddressing() = 0;
1893 virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
1894 int64_t BaseOffset,
1895 bool HasBaseReg, int64_t Scale,
1896 unsigned AddrSpace) = 0;
1897 virtual bool LSRWithInstrQueries() = 0;
1898 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1899 virtual bool isProfitableToHoist(Instruction *I) = 0;
1900 virtual bool useAA() = 0;
1901 virtual bool isTypeLegal(Type *Ty) = 0;
1902 virtual unsigned getRegUsageForType(Type *Ty) = 0;
1903 virtual bool shouldBuildLookupTables() = 0;
1904 virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
1905 virtual bool shouldBuildRelLookupTables() = 0;
1906 virtual bool useColdCCForColdCall(Function &F) = 0;
1907 virtual InstructionCost getScalarizationOverhead(VectorType *Ty,
1908 const APInt &DemandedElts,
1909 bool Insert, bool Extract,
1910 TargetCostKind CostKind) = 0;
1911 virtual InstructionCost
1912 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1913 ArrayRef<Type *> Tys,
1914 TargetCostKind CostKind) = 0;
1915 virtual bool supportsEfficientVectorElementLoadStore() = 0;
1916 virtual bool supportsTailCalls() = 0;
1917 virtual bool supportsTailCallFor(const CallBase *CB) = 0;
1918 virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1919 virtual MemCmpExpansionOptions
1920 enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
1921 virtual bool enableSelectOptimize() = 0;
1922 virtual bool shouldTreatInstructionLikeSelect(const Instruction *I) = 0;
1923 virtual bool enableInterleavedAccessVectorization() = 0;
1924 virtual bool enableMaskedInterleavedAccessVectorization() = 0;
1925 virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
1926 virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1927 unsigned BitWidth,
1928 unsigned AddressSpace,
1929 Align Alignment,
1930 unsigned *Fast) = 0;
1931 virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1932 virtual bool haveFastSqrt(Type *Ty) = 0;
1933 virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I) = 0;
1934 virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1935 virtual InstructionCost getFPOpCost(Type *Ty) = 0;
1936 virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1937 const APInt &Imm, Type *Ty) = 0;
1938 virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
1939 TargetCostKind CostKind) = 0;
1940 virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1941 const APInt &Imm, Type *Ty,
1942 TargetCostKind CostKind,
1943 Instruction *Inst = nullptr) = 0;
1944 virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
1945 const APInt &Imm, Type *Ty,
1946 TargetCostKind CostKind) = 0;
1947 virtual bool preferToKeepConstantsAttached(const Instruction &Inst,
1948 const Function &Fn) const = 0;
1949 virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
1950 virtual unsigned getRegisterClassForType(bool Vector,
1951 Type *Ty = nullptr) const = 0;
1952 virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
1953 virtual TypeSize getRegisterBitWidth(RegisterKind K) const = 0;
1954 virtual unsigned getMinVectorRegisterBitWidth() const = 0;
1955 virtual std::optional<unsigned> getMaxVScale() const = 0;
1956 virtual std::optional<unsigned> getVScaleForTuning() const = 0;
1957 virtual bool isVScaleKnownToBeAPowerOfTwo() const = 0;
1958 virtual bool
1959 shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const = 0;
1960 virtual ElementCount getMinimumVF(unsigned ElemWidth,
1961 bool IsScalable) const = 0;
1962 virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
1963 virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1964 Type *ScalarValTy) const = 0;
1965 virtual bool shouldConsiderAddressTypePromotion(
1966 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1967 virtual unsigned getCacheLineSize() const = 0;
1968 virtual std::optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
1969 virtual std::optional<unsigned> getCacheAssociativity(CacheLevel Level)
1970 const = 0;
1971 virtual std::optional<unsigned> getMinPageSize() const = 0;
1972
1973 /// \return How much before a load we should place the prefetch
1974 /// instruction. This is currently measured in number of
1975 /// instructions.
1976 virtual unsigned getPrefetchDistance() const = 0;
1977
1978 /// \return Some HW prefetchers can handle accesses up to a certain
1979 /// constant stride. This is the minimum stride in bytes where it
1980 /// makes sense to start adding SW prefetches. The default is 1,
1981 /// i.e. prefetch with any stride. Sometimes prefetching is beneficial
1982 /// even below the HW prefetcher limit, and the arguments provided are
1983 /// meant to serve as a basis for deciding this for a particular loop.
1984 virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1985 unsigned NumStridedMemAccesses,
1986 unsigned NumPrefetches,
1987 bool HasCall) const = 0;
1988
1989 /// \return The maximum number of iterations to prefetch ahead. If
1990 /// the required number of iterations is more than this number, no
1991 /// prefetching is performed.
1992 virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
1993
1994 /// \return True if prefetching should also be done for writes.
1995 virtual bool enableWritePrefetching() const = 0;
1996
1997 /// \return if target want to issue a prefetch in address space \p AS.
1998 virtual bool shouldPrefetchAddressSpace(unsigned AS) const = 0;
1999
2000 virtual unsigned getMaxInterleaveFactor(ElementCount VF) = 0;
2001 virtual InstructionCost getArithmeticInstrCost(
2002 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2003 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2004 ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
2005 virtual InstructionCost getAltInstrCost(
2006 VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
2007 const SmallBitVector &OpcodeMask,
2008 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const = 0;
2009
2010 virtual InstructionCost
2011 getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask,
2012 TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
2013 ArrayRef<const Value *> Args, const Instruction *CxtI) = 0;
2014 virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
2015 Type *Src, CastContextHint CCH,
2016 TTI::TargetCostKind CostKind,
2017 const Instruction *I) = 0;
2018 virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2019 VectorType *VecTy,
2020 unsigned Index) = 0;
2021 virtual InstructionCost getCFInstrCost(unsigned Opcode,
2022 TTI::TargetCostKind CostKind,
2023 const Instruction *I = nullptr) = 0;
2024 virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
2025 Type *CondTy,
2026 CmpInst::Predicate VecPred,
2027 TTI::TargetCostKind CostKind,
2028 const Instruction *I) = 0;
2029 virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2030 TTI::TargetCostKind CostKind,
2031 unsigned Index, Value *Op0,
2032 Value *Op1) = 0;
2033 virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
2034 TTI::TargetCostKind CostKind,
2035 unsigned Index) = 0;
2036
2037 virtual InstructionCost
2038 getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
2039 const APInt &DemandedDstElts,
2040 TTI::TargetCostKind CostKind) = 0;
2041
2042 virtual InstructionCost
2043 getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2044 unsigned AddressSpace, TTI::TargetCostKind CostKind,
2045 OperandValueInfo OpInfo, const Instruction *I) = 0;
2046 virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src,
2047 Align Alignment,
2048 unsigned AddressSpace,
2049 TTI::TargetCostKind CostKind,
2050 const Instruction *I) = 0;
2051 virtual InstructionCost
2052 getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2053 unsigned AddressSpace,
2054 TTI::TargetCostKind CostKind) = 0;
2055 virtual InstructionCost
2056 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2057 bool VariableMask, Align Alignment,
2058 TTI::TargetCostKind CostKind,
2059 const Instruction *I = nullptr) = 0;
2060 virtual InstructionCost
2061 getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2062 bool VariableMask, Align Alignment,
2063 TTI::TargetCostKind CostKind,
2064 const Instruction *I = nullptr) = 0;
2065
2066 virtual InstructionCost getInterleavedMemoryOpCost(
2067 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
2068 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2069 bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
2070 virtual InstructionCost
2071 getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
2072 std::optional<FastMathFlags> FMF,
2073 TTI::TargetCostKind CostKind) = 0;
2074 virtual InstructionCost
2075 getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
2076 TTI::TargetCostKind CostKind) = 0;
2077 virtual InstructionCost getExtendedReductionCost(
2078 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
2079 FastMathFlags FMF,
2080 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = 0;
2081 virtual InstructionCost getMulAccReductionCost(
2082 bool IsUnsigned, Type *ResTy, VectorType *Ty,
2083 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = 0;
2084 virtual InstructionCost
2085 getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2086 TTI::TargetCostKind CostKind) = 0;
2087 virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy,
2088 ArrayRef<Type *> Tys,
2089 TTI::TargetCostKind CostKind) = 0;
2090 virtual unsigned getNumberOfParts(Type *Tp) = 0;
2091 virtual InstructionCost
2092 getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr) = 0;
2093 virtual InstructionCost
2094 getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
2095 virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2096 MemIntrinsicInfo &Info) = 0;
2097 virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
2098 virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2099 Type *ExpectedType) = 0;
2100 virtual Type *getMemcpyLoopLoweringType(
2101 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
2102 unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
2103 std::optional<uint32_t> AtomicElementSize) const = 0;
2104
2105 virtual void getMemcpyLoopResidualLoweringType(
2106 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2107 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2108 unsigned SrcAlign, unsigned DestAlign,
2109 std::optional<uint32_t> AtomicCpySize) const = 0;
2110 virtual bool areInlineCompatible(const Function *Caller,
2111 const Function *Callee) const = 0;
2112 virtual unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
2113 unsigned DefaultCallPenalty) const = 0;
2114 virtual bool areTypesABICompatible(const Function *Caller,
2115 const Function *Callee,
2116 const ArrayRef<Type *> &Types) const = 0;
2117 virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
2118 virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0;
2119 virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
2120 virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
2121 virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
2122 virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
2123 Align Alignment,
2124 unsigned AddrSpace) const = 0;
2125 virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
2126 Align Alignment,
2127 unsigned AddrSpace) const = 0;
2128 virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
2129 ElementCount VF) const = 0;
2130 virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
2131 virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2132 unsigned ChainSizeInBytes,
2133 VectorType *VecTy) const = 0;
2134 virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2135 unsigned ChainSizeInBytes,
2136 VectorType *VecTy) const = 0;
2137 virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2138 ReductionFlags) const = 0;
2139 virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2140 ReductionFlags) const = 0;
2141 virtual bool preferEpilogueVectorization() const = 0;
2142
2143 virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
2144 virtual unsigned getGISelRematGlobalCost() const = 0;
2145 virtual unsigned getMinTripCountTailFoldingThreshold() const = 0;
2146 virtual bool enableScalableVectorization() const = 0;
2147 virtual bool supportsScalableVectors() const = 0;
2148 virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2149 Align Alignment) const = 0;
2150 virtual VPLegalization
2151 getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
2152 virtual bool hasArmWideBranch(bool Thumb) const = 0;
2153 virtual unsigned getMaxNumArgs() const = 0;
2154};
2155
2156template <typename T>
2157class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
2158 T Impl;
2159
2160public:
2161 Model(T Impl) : Impl(std::move(Impl)) {}
2162 ~Model() override = default;
2163
2164 const DataLayout &getDataLayout() const override {
2165 return Impl.getDataLayout();
2166 }
2167
2168 InstructionCost
2169 getGEPCost(Type *PointeeType, const Value *Ptr,
2170 ArrayRef<const Value *> Operands, Type *AccessType,
2171 TargetTransformInfo::TargetCostKind CostKind) override {
2172 return Impl.getGEPCost(PointeeType, Ptr, Operands, AccessType, CostKind);
2173 }
2174 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2175 const Value *Base,
2176 const PointersChainInfo &Info,
2177 Type *AccessTy,
2178 TargetCostKind CostKind) override {
2179 return Impl.getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
2180 }
2181 unsigned getInliningThresholdMultiplier() const override {
2182 return Impl.getInliningThresholdMultiplier();
2183 }
2184 unsigned adjustInliningThreshold(const CallBase *CB) override {
2185 return Impl.adjustInliningThreshold(CB);
2186 }
2187 unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const override {
2188 return Impl.getInliningCostBenefitAnalysisSavingsMultiplier();
2189 }
2190 unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const override {
2191 return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
2192 }
2193 int getInlinerVectorBonusPercent() const override {
2194 return Impl.getInlinerVectorBonusPercent();
2195 }
2196 unsigned getCallerAllocaCost(const CallBase *CB,
2197 const AllocaInst *AI) const override {
2198 return Impl.getCallerAllocaCost(CB, AI);
2199 }
2200 InstructionCost getMemcpyCost(const Instruction *I) override {
2201 return Impl.getMemcpyCost(I);
2202 }
2203
2204 uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override {
2205 return Impl.getMaxMemIntrinsicInlineSizeThreshold();
2206 }
2207
2208 InstructionCost getInstructionCost(const User *U,
2209 ArrayRef<const Value *> Operands,
2210 TargetCostKind CostKind) override {
2211 return Impl.getInstructionCost(U, Operands, CostKind);
2212 }
2213 BranchProbability getPredictableBranchThreshold() override {
2214 return Impl.getPredictableBranchThreshold();
2215 }
2216 bool hasBranchDivergence(const Function *F = nullptr) override {
2217 return Impl.hasBranchDivergence(F);
2218 }
2219 bool isSourceOfDivergence(const Value *V) override {
2220 return Impl.isSourceOfDivergence(V);
2221 }
2222
2223 bool isAlwaysUniform(const Value *V) override {
2224 return Impl.isAlwaysUniform(V);
2225 }
2226
2227 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
2228 return Impl.isValidAddrSpaceCast(FromAS, ToAS);
2229 }
2230
2231 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const override {
2232 return Impl.addrspacesMayAlias(AS0, AS1);
2233 }
2234
2235 unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
2236
2237 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2238 Intrinsic::ID IID) const override {
2239 return Impl.collectFlatAddressOperands(OpIndexes, IID);
2240 }
2241
2242 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
2243 return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2244 }
2245
2246 bool
2247 canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
2248 return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2249 }
2250
2251 unsigned getAssumedAddrSpace(const Value *V) const override {
2252 return Impl.getAssumedAddrSpace(V);
2253 }
2254
2255 bool isSingleThreaded() const override { return Impl.isSingleThreaded(); }
2256
2257 std::pair<const Value *, unsigned>
2258 getPredicatedAddrSpace(const Value *V) const override {
2259 return Impl.getPredicatedAddrSpace(V);
2260 }
2261
2262 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
2263 Value *NewV) const override {
2264 return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
2265 }
2266
2267 bool isLoweredToCall(const Function *F) override {
2268 return Impl.isLoweredToCall(F);
2269 }
2270 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2271 UnrollingPreferences &UP,
2272 OptimizationRemarkEmitter *ORE) override {
2273 return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2274 }
2275 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2276 PeelingPreferences &PP) override {
2277 return Impl.getPeelingPreferences(L, SE, PP);
2278 }
2279 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2280 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2281 HardwareLoopInfo &HWLoopInfo) override {
2282 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2283 }
2284 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) override {
2285 return Impl.preferPredicateOverEpilogue(TFI);
2286 }
2287 TailFoldingStyle
2288 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) override {
2289 return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2290 }
2291 std::optional<Instruction *>
2292 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) override {
2293 return Impl.instCombineIntrinsic(IC, II);
2294 }
2295 std::optional<Value *>
2296 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
2297 APInt DemandedMask, KnownBits &Known,
2298 bool &KnownBitsComputed) override {
2299 return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
2300 KnownBitsComputed);
2301 }
2302 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2303 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
2304 APInt &UndefElts2, APInt &UndefElts3,
2305 std::function<void(Instruction *, unsigned, APInt, APInt &)>
2306 SimplifyAndSetOp) override {
2307 return Impl.simplifyDemandedVectorEltsIntrinsic(
2308 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2309 SimplifyAndSetOp);
2310 }
2311 bool isLegalAddImmediate(int64_t Imm) override {
2312 return Impl.isLegalAddImmediate(Imm);
2313 }
2314 bool isLegalAddScalableImmediate(int64_t Imm) override {
2315 return Impl.isLegalAddScalableImmediate(Imm);
2316 }
2317 bool isLegalICmpImmediate(int64_t Imm) override {
2318 return Impl.isLegalICmpImmediate(Imm);
2319 }
2320 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2321 bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
2322 Instruction *I, int64_t ScalableOffset) override {
2323 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2324 AddrSpace, I, ScalableOffset);
2325 }
2326 bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
2327 const TargetTransformInfo::LSRCost &C2) override {
2328 return Impl.isLSRCostLess(C1, C2);
2329 }
2330 bool isNumRegsMajorCostOfLSR() override {
2331 return Impl.isNumRegsMajorCostOfLSR();
2332 }
2333 bool shouldFoldTerminatingConditionAfterLSR() const override {
2334 return Impl.shouldFoldTerminatingConditionAfterLSR();
2335 }
2336 bool isProfitableLSRChainElement(Instruction *I) override {
2337 return Impl.isProfitableLSRChainElement(I);
2338 }
2339 bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
2340 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2341 DominatorTree *DT, AssumptionCache *AC,
2342 TargetLibraryInfo *LibInfo) override {
2343 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2344 }
2345 AddressingModeKind
2346 getPreferredAddressingMode(const Loop *L,
2347 ScalarEvolution *SE) const override {
2348 return Impl.getPreferredAddressingMode(L, SE);
2349 }
2350 bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
2351 return Impl.isLegalMaskedStore(DataType, Alignment);
2352 }
2353 bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
2354 return Impl.isLegalMaskedLoad(DataType, Alignment);
2355 }
2356 bool isLegalNTStore(Type *DataType, Align Alignment) override {
2357 return Impl.isLegalNTStore(DataType, Alignment);
2358 }
2359 bool isLegalNTLoad(Type *DataType, Align Alignment) override {
2360 return Impl.isLegalNTLoad(DataType, Alignment);
2361 }
2362 bool isLegalBroadcastLoad(Type *ElementTy,
2363 ElementCount NumElements) const override {
2364 return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2365 }
2366 bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
2367 return Impl.isLegalMaskedScatter(DataType, Alignment);
2368 }
2369 bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
2370 return Impl.isLegalMaskedGather(DataType, Alignment);
2371 }
2372 bool forceScalarizeMaskedGather(VectorType *DataType,
2373 Align Alignment) override {
2374 return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2375 }
2376 bool forceScalarizeMaskedScatter(VectorType *DataType,
2377 Align Alignment) override {
2378 return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2379 }
2380 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) override {
2381 return Impl.isLegalMaskedCompressStore(DataType, Alignment);
2382 }
2383 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) override {
2384 return Impl.isLegalMaskedExpandLoad(DataType, Alignment);
2385 }
2386 bool isLegalStridedLoadStore(Type *DataType, Align Alignment) override {
2387 return Impl.isLegalStridedLoadStore(DataType, Alignment);
2388 }
2389 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
2390 const SmallBitVector &OpcodeMask) const override {
2391 return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2392 }
2393 bool enableOrderedReductions() override {
2394 return Impl.enableOrderedReductions();
2395 }
2396 bool hasDivRemOp(Type *DataType, bool IsSigned) override {
2397 return Impl.hasDivRemOp(DataType, IsSigned);
2398 }
2399 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
2400 return Impl.hasVolatileVariant(I, AddrSpace);
2401 }
2402 bool prefersVectorizedAddressing() override {
2403 return Impl.prefersVectorizedAddressing();
2404 }
2405 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2406 int64_t BaseOffset, bool HasBaseReg,
2407 int64_t Scale,
2408 unsigned AddrSpace) override {
2409 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2410 AddrSpace);
2411 }
2412 bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
2413 bool isTruncateFree(Type *Ty1, Type *Ty2) override {
2414 return Impl.isTruncateFree(Ty1, Ty2);
2415 }
2416 bool isProfitableToHoist(Instruction *I) override {
2417 return Impl.isProfitableToHoist(I);
2418 }
2419 bool useAA() override { return Impl.useAA(); }
2420 bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
2421 unsigned getRegUsageForType(Type *Ty) override {
2422 return Impl.getRegUsageForType(Ty);
2423 }
2424 bool shouldBuildLookupTables() override {
2425 return Impl.shouldBuildLookupTables();
2426 }
2427 bool shouldBuildLookupTablesForConstant(Constant *C) override {
2428 return Impl.shouldBuildLookupTablesForConstant(C);
2429 }
2430 bool shouldBuildRelLookupTables() override {
2431 return Impl.shouldBuildRelLookupTables();
2432 }
2433 bool useColdCCForColdCall(Function &F) override {
2434 return Impl.useColdCCForColdCall(F);
2435 }
2436
2437 InstructionCost getScalarizationOverhead(VectorType *Ty,
2438 const APInt &DemandedElts,
2439 bool Insert, bool Extract,
2440 TargetCostKind CostKind) override {
2441 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2442 CostKind);
2443 }
2444 InstructionCost
2445 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2446 ArrayRef<Type *> Tys,
2447 TargetCostKind CostKind) override {
2448 return Impl.getOperandsScalarizationOverhead(Args, Tys, CostKind);
2449 }
2450
2451 bool supportsEfficientVectorElementLoadStore() override {
2452 return Impl.supportsEfficientVectorElementLoadStore();
2453 }
2454
2455 bool supportsTailCalls() override { return Impl.supportsTailCalls(); }
2456 bool supportsTailCallFor(const CallBase *CB) override {
2457 return Impl.supportsTailCallFor(CB);
2458 }
2459
2460 bool enableAggressiveInterleaving(bool LoopHasReductions) override {
2461 return Impl.enableAggressiveInterleaving(LoopHasReductions);
2462 }
2463 MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
2464 bool IsZeroCmp) const override {
2465 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2466 }
2467 bool enableSelectOptimize() override {
2468 return Impl.enableSelectOptimize();
2469 }
2470 bool shouldTreatInstructionLikeSelect(const Instruction *I) override {
2471 return Impl.shouldTreatInstructionLikeSelect(I);
2472 }
2473 bool enableInterleavedAccessVectorization() override {
2474 return Impl.enableInterleavedAccessVectorization();
2475 }
2476 bool enableMaskedInterleavedAccessVectorization() override {
2477 return Impl.enableMaskedInterleavedAccessVectorization();
2478 }
2479 bool isFPVectorizationPotentiallyUnsafe() override {
2480 return Impl.isFPVectorizationPotentiallyUnsafe();
2481 }
2482 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
2483 unsigned AddressSpace, Align Alignment,
2484 unsigned *Fast) override {
2485 return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
2486 Alignment, Fast);
2487 }
2488 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
2489 return Impl.getPopcntSupport(IntTyWidthInBit);
2490 }
2491 bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
2492
2493 bool isExpensiveToSpeculativelyExecute(const Instruction* I) override {
2494 return Impl.isExpensiveToSpeculativelyExecute(I);
2495 }
2496
2497 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
2498 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2499 }
2500
2501 InstructionCost getFPOpCost(Type *Ty) override {
2502 return Impl.getFPOpCost(Ty);
2503 }
2504
2505 InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
2506 const APInt &Imm, Type *Ty) override {
2507 return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
2508 }
2509 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
2510 TargetCostKind CostKind) override {
2511 return Impl.getIntImmCost(Imm, Ty, CostKind);
2512 }
2513 InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
2514 const APInt &Imm, Type *Ty,
2515 TargetCostKind CostKind,
2516 Instruction *Inst = nullptr) override {
2517 return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
2518 }
2519 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
2520 const APInt &Imm, Type *Ty,
2521 TargetCostKind CostKind) override {
2522 return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
2523 }
2524 bool preferToKeepConstantsAttached(const Instruction &Inst,
2525 const Function &Fn) const override {
2526 return Impl.preferToKeepConstantsAttached(Inst, Fn);
2527 }
2528 unsigned getNumberOfRegisters(unsigned ClassID) const override {
2529 return Impl.getNumberOfRegisters(ClassID);
2530 }
2531 unsigned getRegisterClassForType(bool Vector,
2532 Type *Ty = nullptr) const override {
2533 return Impl.getRegisterClassForType(Vector, Ty);
2534 }
2535 const char *getRegisterClassName(unsigned ClassID) const override {
2536 return Impl.getRegisterClassName(ClassID);
2537 }
2538 TypeSize getRegisterBitWidth(RegisterKind K) const override {
2539 return Impl.getRegisterBitWidth(K);
2540 }
2541 unsigned getMinVectorRegisterBitWidth() const override {
2542 return Impl.getMinVectorRegisterBitWidth();
2543 }
2544 std::optional<unsigned> getMaxVScale() const override {
2545 return Impl.getMaxVScale();
2546 }
2547 std::optional<unsigned> getVScaleForTuning() const override {
2548 return Impl.getVScaleForTuning();
2549 }
2550 bool isVScaleKnownToBeAPowerOfTwo() const override {
2551 return Impl.isVScaleKnownToBeAPowerOfTwo();
2552 }
2553 bool shouldMaximizeVectorBandwidth(
2554 TargetTransformInfo::RegisterKind K) const override {
2555 return Impl.shouldMaximizeVectorBandwidth(K);
2556 }
2557 ElementCount getMinimumVF(unsigned ElemWidth,
2558 bool IsScalable) const override {
2559 return Impl.getMinimumVF(ElemWidth, IsScalable);
2560 }
2561 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
2562 return Impl.getMaximumVF(ElemWidth, Opcode);
2563 }
2564 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
2565 Type *ScalarValTy) const override {
2566 return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2567 }
2568 bool shouldConsiderAddressTypePromotion(
2569 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
2570 return Impl.shouldConsiderAddressTypePromotion(
2571 I, AllowPromotionWithoutCommonHeader);
2572 }
2573 unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
2574 std::optional<unsigned> getCacheSize(CacheLevel Level) const override {
2575 return Impl.getCacheSize(Level);
2576 }
2577 std::optional<unsigned>
2578 getCacheAssociativity(CacheLevel Level) const override {
2579 return Impl.getCacheAssociativity(Level);
2580 }
2581
2582 std::optional<unsigned> getMinPageSize() const override {
2583 return Impl.getMinPageSize();
2584 }
2585
2586 /// Return the preferred prefetch distance in terms of instructions.
2587 ///
2588 unsigned getPrefetchDistance() const override {
2589 return Impl.getPrefetchDistance();
2590 }
2591
2592 /// Return the minimum stride necessary to trigger software
2593 /// prefetching.
2594 ///
2595 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
2596 unsigned NumStridedMemAccesses,
2597 unsigned NumPrefetches,
2598 bool HasCall) const override {
2599 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2600 NumPrefetches, HasCall);
2601 }
2602
2603 /// Return the maximum prefetch distance in terms of loop
2604 /// iterations.
2605 ///
2606 unsigned getMaxPrefetchIterationsAhead() const override {
2607 return Impl.getMaxPrefetchIterationsAhead();
2608 }
2609
2610 /// \return True if prefetching should also be done for writes.
2611 bool enableWritePrefetching() const override {
2612 return Impl.enableWritePrefetching();
2613 }
2614
2615 /// \return if target want to issue a prefetch in address space \p AS.
2616 bool shouldPrefetchAddressSpace(unsigned AS) const override {
2617 return Impl.shouldPrefetchAddressSpace(AS);
2618 }
2619
2620 unsigned getMaxInterleaveFactor(ElementCount VF) override {
2621 return Impl.getMaxInterleaveFactor(VF);
2622 }
2623 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
2624 unsigned &JTSize,
2625 ProfileSummaryInfo *PSI,
2626 BlockFrequencyInfo *BFI) override {
2627 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2628 }
2629 InstructionCost getArithmeticInstrCost(
2630 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2631 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2632 ArrayRef<const Value *> Args,
2633 const Instruction *CxtI = nullptr) override {
2634 return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
2635 Args, CxtI);
2636 }
2637 InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
2638 unsigned Opcode1,
2639 const SmallBitVector &OpcodeMask,
2640 TTI::TargetCostKind CostKind) const override {
2641 return Impl.getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask, CostKind);
2642 }
2643
2644 InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
2645 ArrayRef<int> Mask,
2646 TTI::TargetCostKind CostKind, int Index,
2647 VectorType *SubTp,
2648 ArrayRef<const Value *> Args,
2649 const Instruction *CxtI) override {
2650 return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args,
2651 CxtI);
2652 }
2653 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
2654 CastContextHint CCH,
2655 TTI::TargetCostKind CostKind,
2656 const Instruction *I) override {
2657 return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
2658 }
2659 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2660 VectorType *VecTy,
2661 unsigned Index) override {
2662 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
2663 }
2664 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
2665 const Instruction *I = nullptr) override {
2666 return Impl.getCFInstrCost(Opcode, CostKind, I);
2667 }
2668 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
2669 CmpInst::Predicate VecPred,
2670 TTI::TargetCostKind CostKind,
2671 const Instruction *I) override {
2672 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
2673 }
2674 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2675 TTI::TargetCostKind CostKind,
2676 unsigned Index, Value *Op0,
2677 Value *Op1) override {
2678 return Impl.getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
2679 }
2680 InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
2681 TTI::TargetCostKind CostKind,
2682 unsigned Index) override {
2683 return Impl.getVectorInstrCost(I, Val, CostKind, Index);
2684 }
2685 InstructionCost
2686 getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
2687 const APInt &DemandedDstElts,
2688 TTI::TargetCostKind CostKind) override {
2689 return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2690 DemandedDstElts, CostKind);
2691 }
2692 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2693 unsigned AddressSpace,
2694 TTI::TargetCostKind CostKind,
2695 OperandValueInfo OpInfo,
2696 const Instruction *I) override {
2697 return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind,
2698 OpInfo, I);
2699 }
2700 InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2701 unsigned AddressSpace,
2702 TTI::TargetCostKind CostKind,
2703 const Instruction *I) override {
2704 return Impl.getVPMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2705 CostKind, I);
2706 }
2707 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
2708 Align Alignment, unsigned AddressSpace,
2709 TTI::TargetCostKind CostKind) override {
2710 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2711 CostKind);
2712 }
2713 InstructionCost
2714 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2715 bool VariableMask, Align Alignment,
2716 TTI::TargetCostKind CostKind,
2717 const Instruction *I = nullptr) override {
2718 return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
2719 Alignment, CostKind, I);
2720 }
2721 InstructionCost
2722 getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2723 bool VariableMask, Align Alignment,
2724 TTI::TargetCostKind CostKind,
2725 const Instruction *I = nullptr) override {
2726 return Impl.getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
2727 Alignment, CostKind, I);
2728 }
2729 InstructionCost getInterleavedMemoryOpCost(
2730 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
2731 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2732 bool UseMaskForCond, bool UseMaskForGaps) override {
2733 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2734 Alignment, AddressSpace, CostKind,
2735 UseMaskForCond, UseMaskForGaps);
2736 }
2737 InstructionCost
2738 getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
2739 std::optional<FastMathFlags> FMF,
2740 TTI::TargetCostKind CostKind) override {
2741 return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
2742 }
2743 InstructionCost
2744 getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
2745 TTI::TargetCostKind CostKind) override {
2746 return Impl.getMinMaxReductionCost(IID, Ty, FMF, CostKind);
2747 }
2748 InstructionCost
2749 getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
2750 VectorType *Ty, FastMathFlags FMF,
2751 TTI::TargetCostKind CostKind) override {
2752 return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
2753 CostKind);
2754 }
2755 InstructionCost
2756 getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty,
2757 TTI::TargetCostKind CostKind) override {
2758 return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty, CostKind);
2759 }
2760 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2761 TTI::TargetCostKind CostKind) override {
2762 return Impl.getIntrinsicInstrCost(ICA, CostKind);
2763 }
2764 InstructionCost getCallInstrCost(Function *F, Type *RetTy,
2765 ArrayRef<Type *> Tys,
2766 TTI::TargetCostKind CostKind) override {
2767 return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
2768 }
2769 unsigned getNumberOfParts(Type *Tp) override {
2770 return Impl.getNumberOfParts(Tp);
2771 }
2772 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2773 const SCEV *Ptr) override {
2774 return Impl.getAddressComputationCost(Ty, SE, Ptr);
2775 }
2776 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
2777 return Impl.getCostOfKeepingLiveOverCall(Tys);
2778 }
2779 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2780 MemIntrinsicInfo &Info) override {
2781 return Impl.getTgtMemIntrinsic(Inst, Info);
2782 }
2783 unsigned getAtomicMemIntrinsicMaxElementSize() const override {
2784 return Impl.getAtomicMemIntrinsicMaxElementSize();
2785 }
2786 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2787 Type *ExpectedType) override {
2788 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2789 }
2790 Type *getMemcpyLoopLoweringType(
2791 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
2792 unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
2793 std::optional<uint32_t> AtomicElementSize) const override {
2794 return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
2795 DestAddrSpace, SrcAlign, DestAlign,
2796 AtomicElementSize);
2797 }
2798 void getMemcpyLoopResidualLoweringType(
2799 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2800 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2801 unsigned SrcAlign, unsigned DestAlign,
2802 std::optional<uint32_t> AtomicCpySize) const override {
2803 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2804 SrcAddrSpace, DestAddrSpace,
2805 SrcAlign, DestAlign, AtomicCpySize);
2806 }
2807 bool areInlineCompatible(const Function *Caller,
2808 const Function *Callee) const override {
2809 return Impl.areInlineCompatible(Caller, Callee);
2810 }
2811 unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
2812 unsigned DefaultCallPenalty) const override {
2813 return Impl.getInlineCallPenalty(F, Call, DefaultCallPenalty);
2814 }
2815 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
2816 const ArrayRef<Type *> &Types) const override {
2817 return Impl.areTypesABICompatible(Caller, Callee, Types);
2818 }
2819 bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
2820 return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
2821 }
2822 bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
2823 return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
2824 }
2825 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
2826 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2827 }
2828 bool isLegalToVectorizeLoad(LoadInst *LI) const override {
2829 return Impl.isLegalToVectorizeLoad(LI);
2830 }
2831 bool isLegalToVectorizeStore(StoreInst *SI) const override {
2832 return Impl.isLegalToVectorizeStore(SI);
2833 }
2834 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
2835 unsigned AddrSpace) const override {
2836 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2837 AddrSpace);
2838 }
2839 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
2840 unsigned AddrSpace) const override {
2841 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2842 AddrSpace);
2843 }
2844 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
2845 ElementCount VF) const override {
2846 return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2847 }
2848 bool isElementTypeLegalForScalableVector(Type *Ty) const override {
2849 return Impl.isElementTypeLegalForScalableVector(Ty);
2850 }
2851 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2852 unsigned ChainSizeInBytes,
2853 VectorType *VecTy) const override {
2854 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2855 }
2856 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2857 unsigned ChainSizeInBytes,
2858 VectorType *VecTy) const override {
2859 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2860 }
2861 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2862 ReductionFlags Flags) const override {
2863 return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2864 }
2865 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2866 ReductionFlags Flags) const override {
2867 return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2868 }
2869 bool preferEpilogueVectorization() const override {
2870 return Impl.preferEpilogueVectorization();
2871 }
2872
2873 bool shouldExpandReduction(const IntrinsicInst *II) const override {
2874 return Impl.shouldExpandReduction(II);
2875 }
2876
2877 unsigned getGISelRematGlobalCost() const override {
2878 return Impl.getGISelRematGlobalCost();
2879 }
2880
2881 unsigned getMinTripCountTailFoldingThreshold() const override {
2882 return Impl.getMinTripCountTailFoldingThreshold();
2883 }
2884
2885 bool supportsScalableVectors() const override {
2886 return Impl.supportsScalableVectors();
2887 }
2888
2889 bool enableScalableVectorization() const override {
2890 return Impl.enableScalableVectorization();
2891 }
2892
2893 bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2894 Align Alignment) const override {
2895 return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
2896 }
2897
2898 VPLegalization
2899 getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
2900 return Impl.getVPLegalizationStrategy(PI);
2901 }
2902
2903 bool hasArmWideBranch(bool Thumb) const override {
2904 return Impl.hasArmWideBranch(Thumb);
2905 }
2906
2907 unsigned getMaxNumArgs() const override {
2908 return Impl.getMaxNumArgs();
2909 }
2910};
2911
2912template <typename T>
2913TargetTransformInfo::TargetTransformInfo(T Impl)
2914 : TTIImpl(new Model<T>(Impl)) {}
2915
2916/// Analysis pass providing the \c TargetTransformInfo.
2917///
2918/// The core idea of the TargetIRAnalysis is to expose an interface through
2919/// which LLVM targets can analyze and provide information about the middle
2920/// end's target-independent IR. This supports use cases such as target-aware
2921/// cost modeling of IR constructs.
2922///
2923/// This is a function analysis because much of the cost modeling for targets
2924/// is done in a subtarget specific way and LLVM supports compiling different
2925/// functions targeting different subtargets in order to support runtime
2926/// dispatch according to the observed subtarget.
2927class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
2928public:
2929 typedef TargetTransformInfo Result;
2930
2931 /// Default construct a target IR analysis.
2932 ///
2933 /// This will use the module's datalayout to construct a baseline
2934 /// conservative TTI result.
2935 TargetIRAnalysis();
2936
2937 /// Construct an IR analysis pass around a target-provide callback.
2938 ///
2939 /// The callback will be called with a particular function for which the TTI
2940 /// is needed and must return a TTI object for that function.
2941 TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
2942
2943 // Value semantics. We spell out the constructors for MSVC.
2944 TargetIRAnalysis(const TargetIRAnalysis &Arg)
2945 : TTICallback(Arg.TTICallback) {}
2946 TargetIRAnalysis(TargetIRAnalysis &&Arg)
2947 : TTICallback(std::move(Arg.TTICallback)) {}
2948 TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
2949 TTICallback = RHS.TTICallback;
2950 return *this;
2951 }
2952 TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
2953 TTICallback = std::move(RHS.TTICallback);
2954 return *this;
2955 }
2956
2957 Result run(const Function &F, FunctionAnalysisManager &);
2958
2959private:
2960 friend AnalysisInfoMixin<TargetIRAnalysis>;
2961 static AnalysisKey Key;
2962
2963 /// The callback used to produce a result.
2964 ///
2965 /// We use a completely opaque callback so that targets can provide whatever
2966 /// mechanism they desire for constructing the TTI for a given function.
2967 ///
2968 /// FIXME: Should we really use std::function? It's relatively inefficient.
2969 /// It might be possible to arrange for even stateful callbacks to outlive
2970 /// the analysis and thus use a function_ref which would be lighter weight.
2971 /// This may also be less error prone as the callback is likely to reference
2972 /// the external TargetMachine, and that reference needs to never dangle.
2973 std::function<Result(const Function &)> TTICallback;
2974
2975 /// Helper function used as the callback in the default constructor.
2976 static Result getDefaultTTI(const Function &F);
2977};
2978
2979/// Wrapper pass for TargetTransformInfo.
2980///
2981/// This pass can be constructed from a TTI object which it stores internally
2982/// and is queried by passes.
2983class TargetTransformInfoWrapperPass : public ImmutablePass {
2984 TargetIRAnalysis TIRA;
2985 std::optional<TargetTransformInfo> TTI;
2986
2987 virtual void anchor();
2988
2989public:
2990 static char ID;
2991
2992 /// We must provide a default constructor for the pass but it should
2993 /// never be used.
2994 ///
2995 /// Use the constructor below or call one of the creation routines.
2996 TargetTransformInfoWrapperPass();
2997
2998 explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
2999
3000 TargetTransformInfo &getTTI(const Function &F);
3001};
3002
3003/// Create an analysis pass wrapper around a TTI object.
3004///
3005/// This analysis pass just holds the TTI instance and makes it available to
3006/// clients.
3007ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
3008
3009} // namespace llvm
3010
3011#endif
3012

source code of llvm/include/llvm/Analysis/TargetTransformInfo.h