TargetTransformInfo.h source code [llvm/include/llvm/Analysis/TargetTransformInfo.h]

1	//===- TargetTransformInfo.h ------------------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	/// \file
9	/// This pass exposes codegen information to IR-level passes. Every
10	/// transformation that uses codegen information is broken into three parts:
11	/// 1. The IR-level analysis pass.
12	/// 2. The IR-level transformation interface which provides the needed
13	/// information.
14	/// 3. Codegen-level implementation which uses target-specific hooks.
15	///
16	/// This file defines #2, which is the interface that IR-level transformations
17	/// use for querying the codegen.
18	///
19	//===----------------------------------------------------------------------===//
20
21	#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22	#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23
24	#include "llvm/ADT/SmallBitVector.h"
25	#include "llvm/IR/FMF.h"
26	#include "llvm/IR/InstrTypes.h"
27	#include "llvm/IR/PassManager.h"
28	#include "llvm/Pass.h"
29	#include "llvm/Support/AtomicOrdering.h"
30	#include "llvm/Support/BranchProbability.h"
31	#include "llvm/Support/InstructionCost.h"
32	#include <functional>
33	#include <optional>
34	#include <utility>
35
36	namespace llvm {
37
38	namespace Intrinsic {
39	typedef unsigned ID;
40	}
41
42	class AllocaInst;
43	class AssumptionCache;
44	class BlockFrequencyInfo;
45	class DominatorTree;
46	class BranchInst;
47	class CallBase;
48	class Function;
49	class GlobalValue;
50	class InstCombiner;
51	class OptimizationRemarkEmitter;
52	class InterleavedAccessInfo;
53	class IntrinsicInst;
54	class LoadInst;
55	class Loop;
56	class LoopInfo;
57	class LoopVectorizationLegality;
58	class ProfileSummaryInfo;
59	class RecurrenceDescriptor;
60	class SCEV;
61	class ScalarEvolution;
62	class StoreInst;
63	class SwitchInst;
64	class TargetLibraryInfo;
65	class Type;
66	class User;
67	class Value;
68	class VPIntrinsic;
69	struct KnownBits;
70
71	/// Information about a load/store intrinsic defined by the target.
72	struct MemIntrinsicInfo {
73	/// This is the pointer that the intrinsic is loading from or storing to.
74	/// If this is non-null, then analysis/optimization passes can assume that
75	/// this intrinsic is functionally equivalent to a load/store from this
76	/// pointer.
77	Value PtrVal = nullptr*;
78
79	// Ordering for atomic operations.
80	AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
81
82	// Same Id is set by the target for corresponding load/store intrinsics.
83	unsigned short MatchingId = `0`;
84
85	bool ReadMem = false;
86	bool WriteMem = false;
87	bool IsVolatile = false;
88
89	bool isUnordered() const {
90	return (Ordering == AtomicOrdering::NotAtomic \|\|
91	Ordering == AtomicOrdering::Unordered) &&
92	!IsVolatile;
93	}
94	};
95
96	/// Attributes of a target dependent hardware loop.
97	struct HardwareLoopInfo {
98	HardwareLoopInfo() = delete;
99	HardwareLoopInfo(Loop *L);
100	Loop L = nullptr*;
101	BasicBlock ExitBlock = nullptr*;
102	BranchInst ExitBranch = nullptr*;
103	const SCEV ExitCount = nullptr*;
104	IntegerType CountType = nullptr*;
105	Value LoopDecrement = nullptr; // Decrement the loop counter by this*
106	// value in every iteration.
107	bool IsNestingLegal = false; // Can a hardware loop be a parent to
108	// another hardware loop?
109	bool CounterInReg = false; // Should loop counter be updated in
110	// the loop via a phi?
111	bool PerformEntryTest = false; // Generate the intrinsic which also performs
112	// icmp ne zero on the loop counter value and
113	// produces an i1 to guard the loop entry.
114	bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI,
115	DominatorTree &DT, bool ForceNestedLoop = false,
116	bool ForceHardwareLoopPHI = false);
117	bool canAnalyze(LoopInfo &LI);
118	};
119
120	class IntrinsicCostAttributes {
121	const IntrinsicInst II = nullptr*;
122	Type RetTy = nullptr*;
123	Intrinsic::ID IID;
124	SmallVector<Type *, `4`> ParamTys;
125	SmallVector<const Value *, `4`> Arguments;
126	FastMathFlags FMF;
127	// If ScalarizationCost is UINT_MAX, the cost of scalarizing the
128	// arguments and the return value will be computed based on types.
129	InstructionCost ScalarizationCost = InstructionCost::getInvalid();
130
131	public:
132	IntrinsicCostAttributes(
133	Intrinsic::ID Id, const CallBase &CI,
134	InstructionCost ScalarCost = InstructionCost::getInvalid(),
135	bool TypeBasedOnly = false);
136
137	IntrinsicCostAttributes(
138	Intrinsic::ID Id, Type RTy, ArrayRef<Type > Tys,
139	FastMathFlags Flags = FastMathFlags (), const IntrinsicInst I = nullptr*,
140	InstructionCost ScalarCost = InstructionCost::getInvalid());
141
142	IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
143	ArrayRef<const Value *> Args);
144
145	IntrinsicCostAttributes(
146	Intrinsic::ID Id, Type RTy, ArrayRef<const* Value *> Args,
147	ArrayRef<Type *> Tys, FastMathFlags Flags = FastMathFlags (),
148	const IntrinsicInst I = nullptr*,
149	InstructionCost ScalarCost = InstructionCost::getInvalid());
150
151	Intrinsic::ID getID() const { return IID; }
152	const IntrinsicInst getInst() const* { return II; }
153	Type getReturnType() const* { return RetTy; }
154	FastMathFlags getFlags() const { return FMF; }
155	InstructionCost getScalarizationCost() const { return ScalarizationCost; }
156	const SmallVectorImpl<const Value > &getArgs() const* { return Arguments; }
157	const SmallVectorImpl<Type > &getArgTypes() const* { return ParamTys; }
158
159	bool isTypeBasedOnly() const {
160	return Arguments.empty();
161	}
162
163	bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
164	};
165
166	enum class TailFoldingStyle {
167	/// Don't use tail folding
168	None,
169	/// Use predicate only to mask operations on data in the loop.
170	/// When the VL is not known to be a power-of-2, this method requires a
171	/// runtime overflow check for the i + VL in the loop because it compares the
172	/// scalar induction variable against the tripcount rounded up by VL which may
173	/// overflow. When the VL is a power-of-2, both the increment and uprounded
174	/// tripcount will overflow to 0, which does not require a runtime check
175	/// since the loop is exited when the loop induction variable equals the
176	/// uprounded trip-count, which are both 0.
177	Data,
178	/// Same as Data, but avoids using the get.active.lane.mask intrinsic to
179	/// calculate the mask and instead implements this with a
180	/// splat/stepvector/cmp.
181	/// FIXME: Can this kind be removed now that SelectionDAGBuilder expands the
182	/// active.lane.mask intrinsic when it is not natively supported?
183	DataWithoutLaneMask,
184	/// Use predicate to control both data and control flow.
185	/// This method always requires a runtime overflow check for the i + VL
186	/// increment inside the loop, because it uses the result direclty in the
187	/// active.lane.mask to calculate the mask for the next iteration. If the
188	/// increment overflows, the mask is no longer correct.
189	DataAndControlFlow,
190	/// Use predicate to control both data and control flow, but modify
191	/// the trip count so that a runtime overflow check can be avoided
192	/// and such that the scalar epilogue loop can always be removed.
193	DataAndControlFlowWithoutRuntimeCheck,
194	/// Use predicated EVL instructions for tail-folding.
195	/// Indicates that VP intrinsics should be used.
196	DataWithEVL,
197	};
198
199	struct TailFoldingInfo {
200	TargetLibraryInfo *TLI;
201	LoopVectorizationLegality *LVL;
202	InterleavedAccessInfo *IAI;
203	TailFoldingInfo(TargetLibraryInfo TLI, LoopVectorizationLegality LVL,
204	InterleavedAccessInfo *IAI)
205	: TLI(TLI), LVL(LVL), IAI(IAI) {}
206	};
207
208	class TargetTransformInfo;
209	typedef TargetTransformInfo TTI;
210
211	/// This pass provides access to the codegen interfaces that are needed
212	/// for IR-level transformations.
213	class TargetTransformInfo {
214	public:
215	/// Construct a TTI object using a type implementing the \c Concept
216	/// API below.
217	///
218	/// This is used by targets to construct a TTI wrapping their target-specific
219	/// implementation that encodes appropriate costs for their target.
220	template <typename T> TargetTransformInfo(T Impl);
221
222	/// Construct a baseline TTI object using a minimal implementation of
223	/// the \c Concept API below.
224	///
225	/// The TTI implementation will reflect the information in the DataLayout
226	/// provided if non-null.
227	explicit TargetTransformInfo(const DataLayout &DL);
228
229	// Provide move semantics.
230	TargetTransformInfo(TargetTransformInfo &&Arg);
231	TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
232
233	// We need to define the destructor out-of-line to define our sub-classes
234	// out-of-line.
235	~TargetTransformInfo();
236
237	/// Handle the invalidation of this information.
238	///
239	/// When used as a result of \c TargetIRAnalysis this method will be called
240	/// when the function this was computed for changes. When it returns false,
241	/// the information is preserved across those changes.
242	bool invalidate(Function &, const PreservedAnalyses &,
243	FunctionAnalysisManager::Invalidator &) {
244	// FIXME: We should probably in some way ensure that the subtarget
245	// information for a function hasn't changed.
246	return false;
247	}
248
249	/// \name Generic Target Information
250	/// @{
251
252	/// The kind of cost model.
253	///
254	/// There are several different cost models that can be customized by the
255	/// target. The normalization of each cost model may be target specific.
256	/// e.g. TCK_SizeAndLatency should be comparable to target thresholds such as
257	/// those derived from MCSchedModel::LoopMicroOpBufferSize etc.
258	enum TargetCostKind {
259	TCK_RecipThroughput, ///< Reciprocal throughput.
260	TCK_Latency, ///< The latency of instruction.
261	TCK_CodeSize, ///< Instruction code size.
262	TCK_SizeAndLatency ///< The weighted sum of size and latency.
263	};
264
265	/// Underlying constants for 'cost' values in this interface.
266	///
267	/// Many APIs in this interface return a cost. This enum defines the
268	/// fundamental values that should be used to interpret (and produce) those
269	/// costs. The costs are returned as an int rather than a member of this
270	/// enumeration because it is expected that the cost of one IR instruction
271	/// may have a multiplicative factor to it or otherwise won't fit directly
272	/// into the enum. Moreover, it is common to sum or average costs which works
273	/// better as simple integral values. Thus this enum only provides constants.
274	/// Also note that the returned costs are signed integers to make it natural
275	/// to add, subtract, and test with zero (a common boundary condition). It is
276	/// not expected that 2^32 is a realistic cost to be modeling at any point.
277	///
278	/// Note that these costs should usually reflect the intersection of code-size
279	/// cost and execution cost. A free instruction is typically one that folds
280	/// into another instruction. For example, reg-to-reg moves can often be
281	/// skipped by renaming the registers in the CPU, but they still are encoded
282	/// and thus wouldn't be considered 'free' here.
283	enum TargetCostConstants {
284	TCC_Free = `0`, ///< Expected to fold away in lowering.
285	TCC_Basic = `1`, ///< The cost of a typical 'add' instruction.
286	TCC_Expensive = `4` ///< The cost of a 'div' instruction on x86.
287	};
288
289	/// Estimate the cost of a GEP operation when lowered.
290	///
291	/// \p PointeeType is the source element type of the GEP.
292	/// \p Ptr is the base pointer operand.
293	/// \p Operands is the list of indices following the base pointer.
294	///
295	/// \p AccessType is a hint as to what type of memory might be accessed by
296	/// users of the GEP. getGEPCost will use it to determine if the GEP can be
297	/// folded into the addressing mode of a load/store. If AccessType is null,
298	/// then the resulting target type based off of PointeeType will be used as an
299	/// approximation.
300	InstructionCost
301	getGEPCost(Type PointeeType, const* Value *Ptr,
302	ArrayRef<const Value > Operands, Type AccessType = nullptr,
303	TargetCostKind CostKind = TCK_SizeAndLatency) const;
304
305	/// Describe known properties for a set of pointers.
306	struct PointersChainInfo {
307	/// All the GEPs in a set have same base address.
308	unsigned IsSameBaseAddress : `1`;
309	/// These properties only valid if SameBaseAddress is set.
310	/// True if all pointers are separated by a unit stride.
311	unsigned IsUnitStride : `1`;
312	/// True if distance between any two neigbouring pointers is a known value.
313	unsigned IsKnownStride : `1`;
314	unsigned Reserved : `29`;
315
316	bool isSameBase() const { return IsSameBaseAddress; }
317	bool isUnitStride() const { return IsSameBaseAddress && IsUnitStride; }
318	bool isKnownStride() const { return IsSameBaseAddress && IsKnownStride; }
319
320	static PointersChainInfo getUnitStride() {
321	return {/IsSameBaseAddress=/.IsSameBaseAddress: `1`, /IsUnitStride=/.IsUnitStride: `1`,
322	/IsKnownStride=/.IsKnownStride: `1`, .Reserved: `0`};
323	}
324	static PointersChainInfo getKnownStride() {
325	return {/IsSameBaseAddress=/.IsSameBaseAddress: `1`, /IsUnitStride=/.IsUnitStride: `0`,
326	/IsKnownStride=/.IsKnownStride: `1`, .Reserved: `0`};
327	}
328	static PointersChainInfo getUnknownStride() {
329	return {/IsSameBaseAddress=/.IsSameBaseAddress: `1`, /IsUnitStride=/.IsUnitStride: `0`,
330	/IsKnownStride=/.IsKnownStride: `0`, .Reserved: `0`};
331	}
332	};
333	static_assert(sizeof(PointersChainInfo) == `4`, "Was size increase justified?");
334
335	/// Estimate the cost of a chain of pointers (typically pointer operands of a
336	/// chain of loads or stores within same block) operations set when lowered.
337	/// \p AccessTy is the type of the loads/stores that will ultimately use the
338	/// \p Ptrs.
339	InstructionCost
340	getPointersChainCost(ArrayRef<const Value > Ptrs, const* Value *Base,
341	const PointersChainInfo &Info, Type *AccessTy,
342	TargetCostKind CostKind = TTI::TCK_RecipThroughput
343
344	) const;
345
346	/// \returns A value by which our inlining threshold should be multiplied.
347	/// This is primarily used to bump up the inlining threshold wholesale on
348	/// targets where calls are unusually expensive.
349	///
350	/// TODO: This is a rather blunt instrument. Perhaps altering the costs of
351	/// individual classes of instructions would be better.
352	unsigned getInliningThresholdMultiplier() const;
353
354	unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const;
355	unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const;
356
357	/// \returns A value to be added to the inlining threshold.
358	unsigned adjustInliningThreshold(const CallBase CB) const*;
359
360	/// \returns The cost of having an Alloca in the caller if not inlined, to be
361	/// added to the threshold
362	unsigned getCallerAllocaCost(const CallBase CB, const* AllocaInst AI) const*;
363
364	/// \returns Vector bonus in percent.
365	///
366	/// Vector bonuses: We want to more aggressively inline vector-dense kernels
367	/// and apply this bonus based on the percentage of vector instructions. A
368	/// bonus is applied if the vector instructions exceed 50% and half that
369	/// amount is applied if it exceeds 10%. Note that these bonuses are some what
370	/// arbitrary and evolved over time by accident as much as because they are
371	/// principled bonuses.
372	/// FIXME: It would be nice to base the bonus values on something more
373	/// scientific. A target may has no bonus on vector instructions.
374	int getInlinerVectorBonusPercent() const;
375
376	/// \return the expected cost of a memcpy, which could e.g. depend on the
377	/// source/destination type and alignment and the number of bytes copied.
378	InstructionCost getMemcpyCost(const Instruction I) const*;
379
380	/// Returns the maximum memset / memcpy size in bytes that still makes it
381	/// profitable to inline the call.
382	uint64_t getMaxMemIntrinsicInlineSizeThreshold() const;
383
384	/// \return The estimated number of case clusters when lowering \p 'SI'.
385	/// \p JTSize Set a jump table size only when \p SI is suitable for a jump
386	/// table.
387	unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
388	unsigned &JTSize,
389	ProfileSummaryInfo *PSI,
390	BlockFrequencyInfo BFI) const*;
391
392	/// Estimate the cost of a given IR user when lowered.
393	///
394	/// This can estimate the cost of either a ConstantExpr or Instruction when
395	/// lowered.
396	///
397	/// \p Operands is a list of operands which can be a result of transformations
398	/// of the current operands. The number of the operands on the list must equal
399	/// to the number of the current operands the IR user has. Their order on the
400	/// list must be the same as the order of the current operands the IR user
401	/// has.
402	///
403	/// The returned cost is defined in terms of \c TargetCostConstants, see its
404	/// comments for a detailed explanation of the cost values.
405	InstructionCost getInstructionCost(const User *U,
406	ArrayRef<const Value *> Operands,
407	TargetCostKind CostKind) const;
408
409	/// This is a helper function which calls the three-argument
410	/// getInstructionCost with \p Operands which are the current operands U has.
411	InstructionCost getInstructionCost(const User *U,
412	TargetCostKind CostKind) const {
413	SmallVector<const Value *, `4`> Operands(U->operand_values());
414	return getInstructionCost(U, Operands, CostKind);
415	}
416
417	/// If a branch or a select condition is skewed in one direction by more than
418	/// this factor, it is very likely to be predicted correctly.
419	BranchProbability getPredictableBranchThreshold() const;
420
421	/// Return true if branch divergence exists.
422	///
423	/// Branch divergence has a significantly negative impact on GPU performance
424	/// when threads in the same wavefront take different paths due to conditional
425	/// branches.
426	///
427	/// If \p F is passed, provides a context function. If \p F is known to only
428	/// execute in a single threaded environment, the target may choose to skip
429	/// uniformity analysis and assume all values are uniform.
430	bool hasBranchDivergence(const Function F = nullptr) const*;
431
432	/// Returns whether V is a source of divergence.
433	///
434	/// This function provides the target-dependent information for
435	/// the target-independent UniformityAnalysis.
436	bool isSourceOfDivergence(const Value V) const*;
437
438	// Returns true for the target specific
439	// set of operations which produce uniform result
440	// even taking non-uniform arguments
441	bool isAlwaysUniform(const Value V) const*;
442
443	/// Query the target whether the specified address space cast from FromAS to
444	/// ToAS is valid.
445	bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
446
447	/// Return false if a \p AS0 address cannot possibly alias a \p AS1 address.
448	bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const;
449
450	/// Returns the address space ID for a target's 'flat' address space. Note
451	/// this is not necessarily the same as addrspace(0), which LLVM sometimes
452	/// refers to as the generic address space. The flat address space is a
453	/// generic address space that can be used access multiple segments of memory
454	/// with different address spaces. Access of a memory location through a
455	/// pointer with this address space is expected to be legal but slower
456	/// compared to the same memory location accessed through a pointer with a
457	/// different address space.
458	//
459	/// This is for targets with different pointer representations which can
460	/// be converted with the addrspacecast instruction. If a pointer is converted
461	/// to this address space, optimizations should attempt to replace the access
462	/// with the source address space.
463	///
464	/// \returns ~0u if the target does not have such a flat address space to
465	/// optimize away.
466	unsigned getFlatAddressSpace() const;
467
468	/// Return any intrinsic address operand indexes which may be rewritten if
469	/// they use a flat address space pointer.
470	///
471	/// \returns true if the intrinsic was handled.
472	bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
473	Intrinsic::ID IID) const;
474
475	bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
476
477	/// Return true if globals in this address space can have initializers other
478	/// than `undef`.
479	bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const;
480
481	unsigned getAssumedAddrSpace(const Value V) const*;
482
483	bool isSingleThreaded() const;
484
485	std::pair<const Value , unsigned*>
486	getPredicatedAddrSpace(const Value V) const*;
487
488	/// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
489	/// NewV, which has a different address space. This should happen for every
490	/// operand index that collectFlatAddressOperands returned for the intrinsic.
491	/// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
492	/// new value (which may be the original \p II with modified operands).
493	Value rewriteIntrinsicWithAddressSpace(IntrinsicInst II, Value *OldV,
494	Value NewV) const*;
495
496	/// Test whether calls to a function lower to actual program function
497	/// calls.
498	///
499	/// The idea is to test whether the program is likely to require a 'call'
500	/// instruction or equivalent in order to call the given function.
501	///
502	/// FIXME: It's not clear that this is a good or useful query API. Client's
503	/// should probably move to simpler cost metrics using the above.
504	/// Alternatively, we could split the cost interface into distinct code-size
505	/// and execution-speed costs. This would allow modelling the core of this
506	/// query more accurately as a call is a single small instruction, but
507	/// incurs significant execution cost.
508	bool isLoweredToCall(const Function F) const*;
509
510	struct LSRCost {
511	/// TODO: Some of these could be merged. Also, a lexical ordering
512	/// isn't always optimal.
513	unsigned Insns;
514	unsigned NumRegs;
515	unsigned AddRecCost;
516	unsigned NumIVMuls;
517	unsigned NumBaseAdds;
518	unsigned ImmCost;
519	unsigned SetupCost;
520	unsigned ScaleCost;
521	};
522
523	/// Parameters that control the generic loop unrolling transformation.
524	struct UnrollingPreferences {
525	/// The cost threshold for the unrolled loop. Should be relative to the
526	/// getInstructionCost values returned by this API, and the expectation is
527	/// that the unrolled loop's instructions when run through that interface
528	/// should not exceed this cost. However, this is only an estimate. Also,
529	/// specific loops may be unrolled even with a cost above this threshold if
530	/// deemed profitable. Set this to UINT_MAX to disable the loop body cost
531	/// restriction.
532	unsigned Threshold;
533	/// If complete unrolling will reduce the cost of the loop, we will boost
534	/// the Threshold by a certain percent to allow more aggressive complete
535	/// unrolling. This value provides the maximum boost percentage that we
536	/// can apply to Threshold (The value should be no less than 100).
537	/// BoostedThreshold = Threshold min(RolledCost / UnrolledCost,*
538	/// MaxPercentThresholdBoost / 100)
539	/// E.g. if complete unrolling reduces the loop execution time by 50%
540	/// then we boost the threshold by the factor of 2x. If unrolling is not
541	/// expected to reduce the running time, then we do not increase the
542	/// threshold.
543	unsigned MaxPercentThresholdBoost;
544	/// The cost threshold for the unrolled loop when optimizing for size (set
545	/// to UINT_MAX to disable).
546	unsigned OptSizeThreshold;
547	/// The cost threshold for the unrolled loop, like Threshold, but used
548	/// for partial/runtime unrolling (set to UINT_MAX to disable).
549	unsigned PartialThreshold;
550	/// The cost threshold for the unrolled loop when optimizing for size, like
551	/// OptSizeThreshold, but used for partial/runtime unrolling (set to
552	/// UINT_MAX to disable).
553	unsigned PartialOptSizeThreshold;
554	/// A forced unrolling factor (the number of concatenated bodies of the
555	/// original loop in the unrolled loop body). When set to 0, the unrolling
556	/// transformation will select an unrolling factor based on the current cost
557	/// threshold and other factors.
558	unsigned Count;
559	/// Default unroll count for loops with run-time trip count.
560	unsigned DefaultUnrollRuntimeCount;
561	// Set the maximum unrolling factor. The unrolling factor may be selected
562	// using the appropriate cost threshold, but may not exceed this number
563	// (set to UINT_MAX to disable). This does not apply in cases where the
564	// loop is being fully unrolled.
565	unsigned MaxCount;
566	/// Set the maximum upper bound of trip count. Allowing the MaxUpperBound
567	/// to be overrided by a target gives more flexiblity on certain cases.
568	/// By default, MaxUpperBound uses UnrollMaxUpperBound which value is 8.
569	unsigned MaxUpperBound;
570	/// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
571	/// applies even if full unrolling is selected. This allows a target to fall
572	/// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
573	unsigned FullUnrollMaxCount;
574	// Represents number of instructions optimized when "back edge"
575	// becomes "fall through" in unrolled loop.
576	// For now we count a conditional branch on a backedge and a comparison
577	// feeding it.
578	unsigned BEInsns;
579	/// Allow partial unrolling (unrolling of loops to expand the size of the
580	/// loop body, not only to eliminate small constant-trip-count loops).
581	bool Partial;
582	/// Allow runtime unrolling (unrolling of loops to expand the size of the
583	/// loop body even when the number of loop iterations is not known at
584	/// compile time).
585	bool Runtime;
586	/// Allow generation of a loop remainder (extra iterations after unroll).
587	bool AllowRemainder;
588	/// Allow emitting expensive instructions (such as divisions) when computing
589	/// the trip count of a loop for runtime unrolling.
590	bool AllowExpensiveTripCount;
591	/// Apply loop unroll on any kind of loop
592	/// (mainly to loops that fail runtime unrolling).
593	bool Force;
594	/// Allow using trip count upper bound to unroll loops.
595	bool UpperBound;
596	/// Allow unrolling of all the iterations of the runtime loop remainder.
597	bool UnrollRemainder;
598	/// Allow unroll and jam. Used to enable unroll and jam for the target.
599	bool UnrollAndJam;
600	/// Threshold for unroll and jam, for inner loop size. The 'Threshold'
601	/// value above is used during unroll and jam for the outer loop size.
602	/// This value is used in the same manner to limit the size of the inner
603	/// loop.
604	unsigned UnrollAndJamInnerLoopThreshold;
605	/// Don't allow loop unrolling to simulate more than this number of
606	/// iterations when checking full unroll profitability
607	unsigned MaxIterationsCountToAnalyze;
608	/// Don't disable runtime unroll for the loops which were vectorized.
609	bool UnrollVectorizedLoop = false;
610	};
611
612	/// Get target-customized preferences for the generic loop unrolling
613	/// transformation. The caller will initialize UP with the current
614	/// target-independent defaults.
615	void getUnrollingPreferences(Loop *L, ScalarEvolution &,
616	UnrollingPreferences &UP,
617	OptimizationRemarkEmitter ORE) const*;
618
619	/// Query the target whether it would be profitable to convert the given loop
620	/// into a hardware loop.
621	bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
622	AssumptionCache &AC, TargetLibraryInfo *LibInfo,
623	HardwareLoopInfo &HWLoopInfo) const;
624
625	/// Query the target whether it would be prefered to create a predicated
626	/// vector loop, which can avoid the need to emit a scalar epilogue loop.
627	bool preferPredicateOverEpilogue(TailFoldingInfo TFI) const*;
628
629	/// Query the target what the preferred style of tail folding is.
630	/// \param IVUpdateMayOverflow Tells whether it is known if the IV update
631	/// may (or will never) overflow for the suggested VF/UF in the given loop.
632	/// Targets can use this information to select a more optimal tail folding
633	/// style. The value conservatively defaults to true, such that no assumptions
634	/// are made on overflow.
635	TailFoldingStyle
636	getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const;
637
638	// Parameters that control the loop peeling transformation
639	struct PeelingPreferences {
640	/// A forced peeling factor (the number of bodied of the original loop
641	/// that should be peeled off before the loop body). When set to 0, the
642	/// a peeling factor based on profile information and other factors.
643	unsigned PeelCount;
644	/// Allow peeling off loop iterations.
645	bool AllowPeeling;
646	/// Allow peeling off loop iterations for loop nests.
647	bool AllowLoopNestsPeeling;
648	/// Allow peeling basing on profile. Uses to enable peeling off all
649	/// iterations basing on provided profile.
650	/// If the value is true the peeling cost model can decide to peel only
651	/// some iterations and in this case it will set this to false.
652	bool PeelProfiledIterations;
653	};
654
655	/// Get target-customized preferences for the generic loop peeling
656	/// transformation. The caller will initialize \p PP with the current
657	/// target-independent defaults with information from \p L and \p SE.
658	void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
659	PeelingPreferences &PP) const;
660
661	/// Targets can implement their own combinations for target-specific
662	/// intrinsics. This function will be called from the InstCombine pass every
663	/// time a target-specific intrinsic is encountered.
664	///
665	/// \returns std::nullopt to not do anything target specific or a value that
666	/// will be returned from the InstCombiner. It is possible to return null and
667	/// stop further processing of the intrinsic by returning nullptr.
668	std::optional<Instruction *> instCombineIntrinsic(InstCombiner & IC,
669	IntrinsicInst & II) const;
670	/// Can be used to implement target-specific instruction combining.
671	/// \see instCombineIntrinsic
672	std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
673	InstCombiner & IC, IntrinsicInst & II, APInt DemandedMask,
674	KnownBits & Known, bool &KnownBitsComputed) const;
675	/// Can be used to implement target-specific instruction combining.
676	/// \see instCombineIntrinsic
677	std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
678	InstCombiner & IC, IntrinsicInst & II, APInt DemandedElts,
679	APInt & UndefElts, APInt & UndefElts2, APInt & UndefElts3,
680	std::function<void(Instruction , unsigned*, APInt, APInt &)>
681	SimplifyAndSetOp) const;
682	/// @}
683
684	/// \name Scalar Target Information
685	/// @{
686
687	/// Flags indicating the kind of support for population count.
688	///
689	/// Compared to the SW implementation, HW support is supposed to
690	/// significantly boost the performance when the population is dense, and it
691	/// may or may not degrade performance if the population is sparse. A HW
692	/// support is considered as "Fast" if it can outperform, or is on a par
693	/// with, SW implementation when the population is sparse; otherwise, it is
694	/// considered as "Slow".
695	enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
696
697	/// Return true if the specified immediate is legal add immediate, that
698	/// is the target has add instructions which can add a register with the
699	/// immediate without having to materialize the immediate into a register.
700	bool isLegalAddImmediate(int64_t Imm) const;
701
702	/// Return true if adding the specified scalable immediate is legal, that is
703	/// the target has add instructions which can add a register with the
704	/// immediate (multiplied by vscale) without having to materialize the
705	/// immediate into a register.
706	bool isLegalAddScalableImmediate(int64_t Imm) const;
707
708	/// Return true if the specified immediate is legal icmp immediate,
709	/// that is the target has icmp instructions which can compare a register
710	/// against the immediate without having to materialize the immediate into a
711	/// register.
712	bool isLegalICmpImmediate(int64_t Imm) const;
713
714	/// Return true if the addressing mode represented by AM is legal for
715	/// this target, for a load/store of the specified type.
716	/// The type may be VoidTy, in which case only return true if the addressing
717	/// mode is legal for a load/store of any legal type.
718	/// If target returns true in LSRWithInstrQueries(), I may be valid.
719	/// \param ScalableOffset represents a quantity of bytes multiplied by vscale,
720	/// an invariant value known only at runtime. Most targets should not accept
721	/// a scalable offset.
722	///
723	/// TODO: Handle pre/postinc as well.
724	bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
725	bool HasBaseReg, int64_t Scale,
726	unsigned AddrSpace = `0`, Instruction I = nullptr*,
727	int64_t ScalableOffset = `0`) const;
728
729	/// Return true if LSR cost of C1 is lower than C2.
730	bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
731	const TargetTransformInfo::LSRCost &C2) const;
732
733	/// Return true if LSR major cost is number of registers. Targets which
734	/// implement their own isLSRCostLess and unset number of registers as major
735	/// cost should return false, otherwise return true.
736	bool isNumRegsMajorCostOfLSR() const;
737
738	/// Return true if LSR should attempts to replace a use of an otherwise dead
739	/// primary IV in the latch condition with another IV available in the loop.
740	/// When successful, makes the primary IV dead.
741	bool shouldFoldTerminatingConditionAfterLSR() const;
742
743	/// \returns true if LSR should not optimize a chain that includes \p I.
744	bool isProfitableLSRChainElement(Instruction I) const*;
745
746	/// Return true if the target can fuse a compare and branch.
747	/// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
748	/// calculation for the instructions in a loop.
749	bool canMacroFuseCmp() const;
750
751	/// Return true if the target can save a compare for loop count, for example
752	/// hardware loop saves a compare.
753	bool canSaveCmp(Loop L, BranchInst BI, ScalarEvolution SE, LoopInfo *LI,
754	DominatorTree DT, AssumptionCache AC,
755	TargetLibraryInfo LibInfo) const*;
756
757	enum AddressingModeKind {
758	AMK_PreIndexed,
759	AMK_PostIndexed,
760	AMK_None
761	};
762
763	/// Return the preferred addressing mode LSR should make efforts to generate.
764	AddressingModeKind getPreferredAddressingMode(const Loop *L,
765	ScalarEvolution SE) const*;
766
767	/// Return true if the target supports masked store.
768	bool isLegalMaskedStore(Type DataType, Align Alignment) const*;
769	/// Return true if the target supports masked load.
770	bool isLegalMaskedLoad(Type DataType, Align Alignment) const*;
771
772	/// Return true if the target supports nontemporal store.
773	bool isLegalNTStore(Type DataType, Align Alignment) const*;
774	/// Return true if the target supports nontemporal load.
775	bool isLegalNTLoad(Type DataType, Align Alignment) const*;
776
777	/// \Returns true if the target supports broadcasting a load to a vector of
778	/// type <NumElements x ElementTy>.
779	bool isLegalBroadcastLoad(Type ElementTy, ElementCount NumElements) const*;
780
781	/// Return true if the target supports masked scatter.
782	bool isLegalMaskedScatter(Type DataType, Align Alignment) const*;
783	/// Return true if the target supports masked gather.
784	bool isLegalMaskedGather(Type DataType, Align Alignment) const*;
785	/// Return true if the target forces scalarizing of llvm.masked.gather
786	/// intrinsics.
787	bool forceScalarizeMaskedGather(VectorType Type, Align Alignment) const*;
788	/// Return true if the target forces scalarizing of llvm.masked.scatter
789	/// intrinsics.
790	bool forceScalarizeMaskedScatter(VectorType Type, Align Alignment) const*;
791
792	/// Return true if the target supports masked compress store.
793	bool isLegalMaskedCompressStore(Type DataType, Align Alignment) const*;
794	/// Return true if the target supports masked expand load.
795	bool isLegalMaskedExpandLoad(Type DataType, Align Alignment) const*;
796
797	/// Return true if the target supports strided load.
798	bool isLegalStridedLoadStore(Type DataType, Align Alignment) const*;
799
800	/// Return true if this is an alternating opcode pattern that can be lowered
801	/// to a single instruction on the target. In X86 this is for the addsub
802	/// instruction which corrsponds to a Shuffle + Fadd + FSub pattern in IR.
803	/// This function expectes two opcodes: \p Opcode1 and \p Opcode2 being
804	/// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
805	/// when \p Opcode0 is selected and `1` when Opcode1 is selected.
806	/// \p VecTy is the vector type of the instruction to be generated.
807	bool isLegalAltInstr(VectorType VecTy, unsigned* Opcode0, unsigned Opcode1,
808	const SmallBitVector &OpcodeMask) const;
809
810	/// Return true if we should be enabling ordered reductions for the target.
811	bool enableOrderedReductions() const;
812
813	/// Return true if the target has a unified operation to calculate division
814	/// and remainder. If so, the additional implicit multiplication and
815	/// subtraction required to calculate a remainder from division are free. This
816	/// can enable more aggressive transformations for division and remainder than
817	/// would typically be allowed using throughput or size cost models.
818	bool hasDivRemOp(Type DataType, bool* IsSigned) const;
819
820	/// Return true if the given instruction (assumed to be a memory access
821	/// instruction) has a volatile variant. If that's the case then we can avoid
822	/// addrspacecast to generic AS for volatile loads/stores. Default
823	/// implementation returns false, which prevents address space inference for
824	/// volatile loads/stores.
825	bool hasVolatileVariant(Instruction I, unsigned* AddrSpace) const;
826
827	/// Return true if target doesn't mind addresses in vectors.
828	bool prefersVectorizedAddressing() const;
829
830	/// Return the cost of the scaling factor used in the addressing
831	/// mode represented by AM for this target, for a load/store
832	/// of the specified type.
833	/// If the AM is supported, the return value must be >= 0.
834	/// If the AM is not supported, it returns a negative value.
835	/// TODO: Handle pre/postinc as well.
836	InstructionCost getScalingFactorCost(Type Ty, GlobalValue BaseGV,
837	int64_t BaseOffset, bool HasBaseReg,
838	int64_t Scale,
839	unsigned AddrSpace = `0`) const;
840
841	/// Return true if the loop strength reduce pass should make
842	/// Instruction based TTI queries to isLegalAddressingMode(). This is*
843	/// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
844	/// immediate offset and no index register.
845	bool LSRWithInstrQueries() const;
846
847	/// Return true if it's free to truncate a value of type Ty1 to type
848	/// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
849	/// by referencing its sub-register AX.
850	bool isTruncateFree(Type Ty1, Type Ty2) const;
851
852	/// Return true if it is profitable to hoist instruction in the
853	/// then/else to before if.
854	bool isProfitableToHoist(Instruction I) const*;
855
856	bool useAA() const;
857
858	/// Return true if this type is legal.
859	bool isTypeLegal(Type Ty) const*;
860
861	/// Returns the estimated number of registers required to represent \p Ty.
862	unsigned getRegUsageForType(Type Ty) const*;
863
864	/// Return true if switches should be turned into lookup tables for the
865	/// target.
866	bool shouldBuildLookupTables() const;
867
868	/// Return true if switches should be turned into lookup tables
869	/// containing this constant value for the target.
870	bool shouldBuildLookupTablesForConstant(Constant C) const*;
871
872	/// Return true if lookup tables should be turned into relative lookup tables.
873	bool shouldBuildRelLookupTables() const;
874
875	/// Return true if the input function which is cold at all call sites,
876	/// should use coldcc calling convention.
877	bool useColdCCForColdCall(Function &F) const;
878
879	/// Estimate the overhead of scalarizing an instruction. Insert and Extract
880	/// are set if the demanded result elements need to be inserted and/or
881	/// extracted from vectors.
882	InstructionCost getScalarizationOverhead(VectorType *Ty,
883	const APInt &DemandedElts,
884	bool Insert, bool Extract,
885	TTI::TargetCostKind CostKind) const;
886
887	/// Estimate the overhead of scalarizing an instructions unique
888	/// non-constant operands. The (potentially vector) types to use for each of
889	/// argument are passes via Tys.
890	InstructionCost
891	getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
892	ArrayRef<Type *> Tys,
893	TTI::TargetCostKind CostKind) const;
894
895	/// If target has efficient vector element load/store instructions, it can
896	/// return true here so that insertion/extraction costs are not added to
897	/// the scalarization cost of a load/store.
898	bool supportsEfficientVectorElementLoadStore() const;
899
900	/// If the target supports tail calls.
901	bool supportsTailCalls() const;
902
903	/// If target supports tail call on \p CB
904	bool supportsTailCallFor(const CallBase CB) const*;
905
906	/// Don't restrict interleaved unrolling to small loops.
907	bool enableAggressiveInterleaving(bool LoopHasReductions) const;
908
909	/// Returns options for expansion of memcmp. IsZeroCmp is
910	// true if this is the expansion of memcmp(p1, p2, s) == 0.
911	struct MemCmpExpansionOptions {
912	// Return true if memcmp expansion is enabled.
913	operator bool() const { return MaxNumLoads > `0`; }
914
915	// Maximum number of load operations.
916	unsigned MaxNumLoads = `0`;
917
918	// The list of available load sizes (in bytes), sorted in decreasing order.
919	SmallVector<unsigned, `8`> LoadSizes;
920
921	// For memcmp expansion when the memcmp result is only compared equal or
922	// not-equal to 0, allow up to this number of load pairs per block. As an
923	// example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
924	// a0 = load2bytes &a[0]
925	// b0 = load2bytes &b[0]
926	// a2 = load1byte &a[2]
927	// b2 = load1byte &b[2]
928	// r = cmp eq (a0 ^ b0 \| a2 ^ b2), 0
929	unsigned NumLoadsPerBlock = `1`;
930
931	// Set to true to allow overlapping loads. For example, 7-byte compares can
932	// be done with two 4-byte compares instead of 4+2+1-byte compares. This
933	// requires all loads in LoadSizes to be doable in an unaligned way.
934	bool AllowOverlappingLoads = false;
935
936	// Sometimes, the amount of data that needs to be compared is smaller than
937	// the standard register size, but it cannot be loaded with just one load
938	// instruction. For example, if the size of the memory comparison is 6
939	// bytes, we can handle it more efficiently by loading all 6 bytes in a
940	// single block and generating an 8-byte number, instead of generating two
941	// separate blocks with conditional jumps for 4 and 2 byte loads. This
942	// approach simplifies the process and produces the comparison result as
943	// normal. This array lists the allowed sizes of memcmp tails that can be
944	// merged into one block
945	SmallVector<unsigned, `4`> AllowedTailExpansions;
946	};
947	MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
948	bool IsZeroCmp) const;
949
950	/// Should the Select Optimization pass be enabled and ran.
951	bool enableSelectOptimize() const;
952
953	/// Should the Select Optimization pass treat the given instruction like a
954	/// select, potentially converting it to a conditional branch. This can
955	/// include select-like instructions like or(zext(c), x) that can be converted
956	/// to selects.
957	bool shouldTreatInstructionLikeSelect(const Instruction I) const*;
958
959	/// Enable matching of interleaved access groups.
960	bool enableInterleavedAccessVectorization() const;
961
962	/// Enable matching of interleaved access groups that contain predicated
963	/// accesses or gaps and therefore vectorized using masked
964	/// vector loads/stores.
965	bool enableMaskedInterleavedAccessVectorization() const;
966
967	/// Indicate that it is potentially unsafe to automatically vectorize
968	/// floating-point operations because the semantics of vector and scalar
969	/// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
970	/// does not support IEEE-754 denormal numbers, while depending on the
971	/// platform, scalar floating-point math does.
972	/// This applies to floating-point math operations and calls, not memory
973	/// operations, shuffles, or casts.
974	bool isFPVectorizationPotentiallyUnsafe() const;
975
976	/// Determine if the target supports unaligned memory accesses.
977	bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
978	unsigned AddressSpace = `0`,
979	Align Alignment = Align (`1`),
980	unsigned Fast = nullptr) const*;
981
982	/// Return hardware support for population count.
983	PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
984
985	/// Return true if the hardware has a fast square-root instruction.
986	bool haveFastSqrt(Type Ty) const*;
987
988	/// Return true if the cost of the instruction is too high to speculatively
989	/// execute and should be kept behind a branch.
990	/// This normally just wraps around a getInstructionCost() call, but some
991	/// targets might report a low TCK_SizeAndLatency value that is incompatible
992	/// with the fixed TCC_Expensive value.
993	/// NOTE: This assumes the instruction passes isSafeToSpeculativelyExecute().
994	bool isExpensiveToSpeculativelyExecute(const Instruction I) const*;
995
996	/// Return true if it is faster to check if a floating-point value is NaN
997	/// (or not-NaN) versus a comparison against a constant FP zero value.
998	/// Targets should override this if materializing a 0.0 for comparison is
999	/// generally as cheap as checking for ordered/unordered.
1000	bool isFCmpOrdCheaperThanFCmpZero(Type Ty) const*;
1001
1002	/// Return the expected cost of supporting the floating point operation
1003	/// of the specified type.
1004	InstructionCost getFPOpCost(Type Ty) const*;
1005
1006	/// Return the expected cost of materializing for the given integer
1007	/// immediate of the specified type.
1008	InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
1009	TargetCostKind CostKind) const;
1010
1011	/// Return the expected cost of materialization for the given integer
1012	/// immediate of the specified type for a given instruction. The cost can be
1013	/// zero if the immediate can be folded into the specified instruction.
1014	InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1015	const APInt &Imm, Type *Ty,
1016	TargetCostKind CostKind,
1017	Instruction Inst = nullptr) const*;
1018	InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
1019	const APInt &Imm, Type *Ty,
1020	TargetCostKind CostKind) const;
1021
1022	/// Return the expected cost for the given integer when optimising
1023	/// for size. This is different than the other integer immediate cost
1024	/// functions in that it is subtarget agnostic. This is useful when you e.g.
1025	/// target one ISA such as Aarch32 but smaller encodings could be possible
1026	/// with another such as Thumb. This return value is used as a penalty when
1027	/// the total costs for a constant is calculated (the bigger the cost, the
1028	/// more beneficial constant hoisting is).
1029	InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1030	const APInt &Imm, Type Ty) const*;
1031
1032	/// It can be advantageous to detach complex constants from their uses to make
1033	/// their generation cheaper. This hook allows targets to report when such
1034	/// transformations might negatively effect the code generation of the
1035	/// underlying operation. The motivating example is divides whereby hoisting
1036	/// constants prevents the code generator's ability to transform them into
1037	/// combinations of simpler operations.
1038	bool preferToKeepConstantsAttached(const Instruction &Inst,
1039	const Function &Fn) const;
1040
1041	/// @}
1042
1043	/// \name Vector Target Information
1044	/// @{
1045
1046	/// The various kinds of shuffle patterns for vector queries.
1047	enum ShuffleKind {
1048	SK_Broadcast, ///< Broadcast element 0 to all other elements.
1049	SK_Reverse, ///< Reverse the order of the vector.
1050	SK_Select, ///< Selects elements from the corresponding lane of
1051	///< either source operand. This is equivalent to a
1052	///< vector select with a constant condition operand.
1053	SK_Transpose, ///< Transpose two vectors.
1054	SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
1055	SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
1056	SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
1057	///< with any shuffle mask.
1058	SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
1059	///< shuffle mask.
1060	SK_Splice ///< Concatenates elements from the first input vector
1061	///< with elements of the second input vector. Returning
1062	///< a vector of the same type as the input vectors.
1063	///< Index indicates start offset in first input vector.
1064	};
1065
1066	/// Additional information about an operand's possible values.
1067	enum OperandValueKind {
1068	OK_AnyValue, // Operand can have any value.
1069	OK_UniformValue, // Operand is uniform (splat of a value).
1070	OK_UniformConstantValue, // Operand is uniform constant.
1071	OK_NonUniformConstantValue // Operand is a non uniform constant value.
1072	};
1073
1074	/// Additional properties of an operand's values.
1075	enum OperandValueProperties {
1076	OP_None = `0`,
1077	OP_PowerOf2 = `1`,
1078	OP_NegatedPowerOf2 = `2`,
1079	};
1080
1081	// Describe the values an operand can take. We're in the process
1082	// of migrating uses of OperandValueKind and OperandValueProperties
1083	// to use this class, and then will change the internal representation.
1084	struct OperandValueInfo {
1085	OperandValueKind Kind = OK_AnyValue;
1086	OperandValueProperties Properties = OP_None;
1087
1088	bool isConstant() const {
1089	return Kind == OK_UniformConstantValue \|\| Kind == OK_NonUniformConstantValue;
1090	}
1091	bool isUniform() const {
1092	return Kind == OK_UniformConstantValue \|\| Kind == OK_UniformValue;
1093	}
1094	bool isPowerOf2() const {
1095	return Properties == OP_PowerOf2;
1096	}
1097	bool isNegatedPowerOf2() const {
1098	return Properties == OP_NegatedPowerOf2;
1099	}
1100
1101	OperandValueInfo getNoProps() const {
1102	return {.Kind: Kind, .Properties: OP_None};
1103	}
1104	};
1105
1106	/// \return the number of registers in the target-provided register class.
1107	unsigned getNumberOfRegisters(unsigned ClassID) const;
1108
1109	/// \return the target-provided register class ID for the provided type,
1110	/// accounting for type promotion and other type-legalization techniques that
1111	/// the target might apply. However, it specifically does not account for the
1112	/// scalarization or splitting of vector types. Should a vector type require
1113	/// scalarization or splitting into multiple underlying vector registers, that
1114	/// type should be mapped to a register class containing no registers.
1115	/// Specifically, this is designed to provide a simple, high-level view of the
1116	/// register allocation later performed by the backend. These register classes
1117	/// don't necessarily map onto the register classes used by the backend.
1118	/// FIXME: It's not currently possible to determine how many registers
1119	/// are used by the provided type.
1120	unsigned getRegisterClassForType(bool Vector, Type Ty = nullptr) const*;
1121
1122	/// \return the target-provided register class name
1123	const char getRegisterClassName(unsigned* ClassID) const;
1124
1125	enum RegisterKind { RGK_Scalar, RGK_FixedWidthVector, RGK_ScalableVector };
1126
1127	/// \return The width of the largest scalar or vector register type.
1128	TypeSize getRegisterBitWidth(RegisterKind K) const;
1129
1130	/// \return The width of the smallest vector register type.
1131	unsigned getMinVectorRegisterBitWidth() const;
1132
1133	/// \return The maximum value of vscale if the target specifies an
1134	/// architectural maximum vector length, and std::nullopt otherwise.
1135	std::optional<unsigned> getMaxVScale() const;
1136
1137	/// \return the value of vscale to tune the cost model for.
1138	std::optional<unsigned> getVScaleForTuning() const;
1139
1140	/// \return true if vscale is known to be a power of 2
1141	bool isVScaleKnownToBeAPowerOfTwo() const;
1142
1143	/// \return True if the vectorization factor should be chosen to
1144	/// make the vector of the smallest element type match the size of a
1145	/// vector register. For wider element types, this could result in
1146	/// creating vectors that span multiple vector registers.
1147	/// If false, the vectorization factor will be chosen based on the
1148	/// size of the widest element type.
1149	/// \p K Register Kind for vectorization.
1150	bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const;
1151
1152	/// \return The minimum vectorization factor for types of given element
1153	/// bit width, or 0 if there is no minimum VF. The returned value only
1154	/// applies when shouldMaximizeVectorBandwidth returns true.
1155	/// If IsScalable is true, the returned ElementCount must be a scalable VF.
1156	ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
1157
1158	/// \return The maximum vectorization factor for types of given element
1159	/// bit width and opcode, or 0 if there is no maximum VF.
1160	/// Currently only used by the SLP vectorizer.
1161	unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
1162
1163	/// \return The minimum vectorization factor for the store instruction. Given
1164	/// the initial estimation of the minimum vector factor and store value type,
1165	/// it tries to find possible lowest VF, which still might be profitable for
1166	/// the vectorization.
1167	/// \param VF Initial estimation of the minimum vector factor.
1168	/// \param ScalarMemTy Scalar memory type of the store operation.
1169	/// \param ScalarValTy Scalar type of the stored value.
1170	/// Currently only used by the SLP vectorizer.
1171	unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1172	Type ScalarValTy) const*;
1173
1174	/// \return True if it should be considered for address type promotion.
1175	/// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
1176	/// profitable without finding other extensions fed by the same input.
1177	bool shouldConsiderAddressTypePromotion(
1178	const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
1179
1180	/// \return The size of a cache line in bytes.
1181	unsigned getCacheLineSize() const;
1182
1183	/// The possible cache levels
1184	enum class CacheLevel {
1185	L1D, // The L1 data cache
1186	L2D, // The L2 data cache
1187
1188	// We currently do not model L3 caches, as their sizes differ widely between
1189	// microarchitectures. Also, we currently do not have a use for L3 cache
1190	// size modeling yet.
1191	};
1192
1193	/// \return The size of the cache level in bytes, if available.
1194	std::optional<unsigned> getCacheSize(CacheLevel Level) const;
1195
1196	/// \return The associativity of the cache level, if available.
1197	std::optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
1198
1199	/// \return The minimum architectural page size for the target.
1200	std::optional<unsigned> getMinPageSize() const;
1201
1202	/// \return How much before a load we should place the prefetch
1203	/// instruction. This is currently measured in number of
1204	/// instructions.
1205	unsigned getPrefetchDistance() const;
1206
1207	/// Some HW prefetchers can handle accesses up to a certain constant stride.
1208	/// Sometimes prefetching is beneficial even below the HW prefetcher limit,
1209	/// and the arguments provided are meant to serve as a basis for deciding this
1210	/// for a particular loop.
1211	///
1212	/// \param NumMemAccesses Number of memory accesses in the loop.
1213	/// \param NumStridedMemAccesses Number of the memory accesses that
1214	/// ScalarEvolution could find a known stride
1215	/// for.
1216	/// \param NumPrefetches Number of software prefetches that will be
1217	/// emitted as determined by the addresses
1218	/// involved and the cache line size.
1219	/// \param HasCall True if the loop contains a call.
1220	///
1221	/// \return This is the minimum stride in bytes where it makes sense to start
1222	/// adding SW prefetches. The default is 1, i.e. prefetch with any
1223	/// stride.
1224	unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1225	unsigned NumStridedMemAccesses,
1226	unsigned NumPrefetches, bool HasCall) const;
1227
1228	/// \return The maximum number of iterations to prefetch ahead. If
1229	/// the required number of iterations is more than this number, no
1230	/// prefetching is performed.
1231	unsigned getMaxPrefetchIterationsAhead() const;
1232
1233	/// \return True if prefetching should also be done for writes.
1234	bool enableWritePrefetching() const;
1235
1236	/// \return if target want to issue a prefetch in address space \p AS.
1237	bool shouldPrefetchAddressSpace(unsigned AS) const;
1238
1239	/// \return The maximum interleave factor that any transform should try to
1240	/// perform for this target. This number depends on the level of parallelism
1241	/// and the number of execution units in the CPU.
1242	unsigned getMaxInterleaveFactor(ElementCount VF) const;
1243
1244	/// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
1245	static OperandValueInfo getOperandInfo(const Value *V);
1246
1247	/// This is an approximation of reciprocal throughput of a math/logic op.
1248	/// A higher cost indicates less expected throughput.
1249	/// From Agner Fog's guides, reciprocal throughput is "the average number of
1250	/// clock cycles per instruction when the instructions are not part of a
1251	/// limiting dependency chain."
1252	/// Therefore, costs should be scaled to account for multiple execution units
1253	/// on the target that can process this type of instruction. For example, if
1254	/// there are 5 scalar integer units and 2 vector integer units that can
1255	/// calculate an 'add' in a single cycle, this model should indicate that the
1256	/// cost of the vector add instruction is 2.5 times the cost of the scalar
1257	/// add instruction.
1258	/// \p Args is an optional argument which holds the instruction operands
1259	/// values so the TTI can analyze those values searching for special
1260	/// cases or optimizations based on those values.
1261	/// \p CxtI is the optional original context instruction, if one exists, to
1262	/// provide even more information.
1263	/// \p TLibInfo is used to search for platform specific vector library
1264	/// functions for instructions that might be converted to calls (e.g. frem).
1265	InstructionCost getArithmeticInstrCost(
1266	unsigned Opcode, Type *Ty,
1267	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1268	TTI::OperandValueInfo Opd1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
1269	TTI::OperandValueInfo Opd2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
1270	ArrayRef<const Value *> Args = std::nullopt,
1271	const Instruction CxtI = nullptr*,
1272	const TargetLibraryInfo TLibInfo = nullptr) const*;
1273
1274	/// Returns the cost estimation for alternating opcode pattern that can be
1275	/// lowered to a single instruction on the target. In X86 this is for the
1276	/// addsub instruction which corrsponds to a Shuffle + Fadd + FSub pattern in
1277	/// IR. This function expects two opcodes: \p Opcode1 and \p Opcode2 being
1278	/// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
1279	/// when \p Opcode0 is selected and `1` when Opcode1 is selected.
1280	/// \p VecTy is the vector type of the instruction to be generated.
1281	InstructionCost getAltInstrCost(
1282	VectorType VecTy, unsigned* Opcode0, unsigned Opcode1,
1283	const SmallBitVector &OpcodeMask,
1284	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1285
1286	/// \return The cost of a shuffle instruction of kind Kind and of type Tp.
1287	/// The exact mask may be passed as Mask, or else the array will be empty.
1288	/// The index and subtype parameters are used by the subvector insertion and
1289	/// extraction shuffle kinds to show the insert/extract point and the type of
1290	/// the subvector being inserted/extracted. The operands of the shuffle can be
1291	/// passed through \p Args, which helps improve the cost estimation in some
1292	/// cases, like in broadcast loads.
1293	/// NOTE: For subvector extractions Tp represents the source type.
1294	InstructionCost getShuffleCost(
1295	ShuffleKind Kind, VectorType Tp, ArrayRef<int*> Mask = std::nullopt,
1296	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, int Index = `0`,
1297	VectorType SubTp = nullptr, ArrayRef<const* Value *> Args = std::nullopt,
1298	const Instruction CxtI = nullptr) const*;
1299
1300	/// Represents a hint about the context in which a cast is used.
1301	///
1302	/// For zext/sext, the context of the cast is the operand, which must be a
1303	/// load of some kind. For trunc, the context is of the cast is the single
1304	/// user of the instruction, which must be a store of some kind.
1305	///
1306	/// This enum allows the vectorizer to give getCastInstrCost an idea of the
1307	/// type of cast it's dealing with, as not every cast is equal. For instance,
1308	/// the zext of a load may be free, but the zext of an interleaving load can
1309	//// be (very) expensive!
1310	///
1311	/// See \c getCastContextHint to compute a CastContextHint from a cast
1312	/// Instruction. Callers can use it if they don't need to override the*
1313	/// context and just want it to be calculated from the instruction.
1314	///
1315	/// FIXME: This handles the types of load/store that the vectorizer can
1316	/// produce, which are the cases where the context instruction is most
1317	/// likely to be incorrect. There are other situations where that can happen
1318	/// too, which might be handled here but in the long run a more general
1319	/// solution of costing multiple instructions at the same times may be better.
1320	enum class CastContextHint : uint8_t {
1321	None, ///< The cast is not used with a load/store of any kind.
1322	Normal, ///< The cast is used with a normal load/store.
1323	Masked, ///< The cast is used with a masked load/store.
1324	GatherScatter, ///< The cast is used with a gather/scatter.
1325	Interleave, ///< The cast is used with an interleaved load/store.
1326	Reversed, ///< The cast is used with a reversed load/store.
1327	};
1328
1329	/// Calculates a CastContextHint from \p I.
1330	/// This should be used by callers of getCastInstrCost if they wish to
1331	/// determine the context from some instruction.
1332	/// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
1333	/// or if it's another type of cast.
1334	static CastContextHint getCastContextHint(const Instruction *I);
1335
1336	/// \return The expected cost of cast instructions, such as bitcast, trunc,
1337	/// zext, etc. If there is an existing instruction that holds Opcode, it
1338	/// may be passed in the 'I' parameter.
1339	InstructionCost
1340	getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
1341	TTI::CastContextHint CCH,
1342	TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
1343	const Instruction I = nullptr) const*;
1344
1345	/// \return The expected cost of a sign- or zero-extended vector extract. Use
1346	/// Index = -1 to indicate that there is no information about the index value.
1347	InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1348	VectorType *VecTy,
1349	unsigned Index) const;
1350
1351	/// \return The expected cost of control-flow related instructions such as
1352	/// Phi, Ret, Br, Switch.
1353	InstructionCost
1354	getCFInstrCost(unsigned Opcode,
1355	TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
1356	const Instruction I = nullptr) const*;
1357
1358	/// \returns The expected cost of compare and select instructions. If there
1359	/// is an existing instruction that holds Opcode, it may be passed in the
1360	/// 'I' parameter. The \p VecPred parameter can be used to indicate the select
1361	/// is using a compare with the specified predicate as condition. When vector
1362	/// types are passed, \p VecPred must be used for all lanes.
1363	InstructionCost
1364	getCmpSelInstrCost(unsigned Opcode, Type ValTy, Type CondTy,
1365	CmpInst::Predicate VecPred,
1366	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1367	const Instruction I = nullptr) const*;
1368
1369	/// \return The expected cost of vector Insert and Extract.
1370	/// Use -1 to indicate that there is no information on the index value.
1371	/// This is used when the instruction is not available; a typical use
1372	/// case is to provision the cost of vectorization/scalarization in
1373	/// vectorizer passes.
1374	InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1375	TTI::TargetCostKind CostKind,
1376	unsigned Index = -`1`, Value Op0 = nullptr*,
1377	Value Op1 = nullptr) const*;
1378
1379	/// \return The expected cost of vector Insert and Extract.
1380	/// This is used when instruction is available, and implementation
1381	/// asserts 'I' is not nullptr.
1382	///
1383	/// A typical suitable use case is cost estimation when vector instruction
1384	/// exists (e.g., from basic blocks during transformation).
1385	InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
1386	TTI::TargetCostKind CostKind,
1387	unsigned Index = -`1`) const;
1388
1389	/// \return The cost of replication shuffle of \p VF elements typed \p EltTy
1390	/// \p ReplicationFactor times.
1391	///
1392	/// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
1393	/// <0,0,0,1,1,1,2,2,2,3,3,3>
1394	InstructionCost getReplicationShuffleCost(Type EltTy, int* ReplicationFactor,
1395	int VF,
1396	const APInt &DemandedDstElts,
1397	TTI::TargetCostKind CostKind);
1398
1399	/// \return The cost of Load and Store instructions.
1400	InstructionCost
1401	getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1402	unsigned AddressSpace,
1403	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1404	OperandValueInfo OpdInfo = {.Kind: OK_AnyValue, .Properties: OP_None},
1405	const Instruction I = nullptr) const*;
1406
1407	/// \return The cost of VP Load and Store instructions.
1408	InstructionCost
1409	getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1410	unsigned AddressSpace,
1411	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1412	const Instruction I = nullptr) const*;
1413
1414	/// \return The cost of masked Load and Store instructions.
1415	InstructionCost getMaskedMemoryOpCost(
1416	unsigned Opcode, Type Src, Align Alignment, unsigned* AddressSpace,
1417	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1418
1419	/// \return The cost of Gather or Scatter operation
1420	/// \p Opcode - is a type of memory access Load or Store
1421	/// \p DataTy - a vector type of the data to be loaded or stored
1422	/// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1423	/// \p VariableMask - true when the memory access is predicated with a mask
1424	/// that is not a compile-time constant
1425	/// \p Alignment - alignment of single element
1426	/// \p I - the optional original context instruction, if one exists, e.g. the
1427	/// load/store to transform or the call to the gather/scatter intrinsic
1428	InstructionCost getGatherScatterOpCost(
1429	unsigned Opcode, Type DataTy, const* Value Ptr, bool* VariableMask,
1430	Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1431	const Instruction I = nullptr) const*;
1432
1433	/// \return The cost of strided memory operations.
1434	/// \p Opcode - is a type of memory access Load or Store
1435	/// \p DataTy - a vector type of the data to be loaded or stored
1436	/// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1437	/// \p VariableMask - true when the memory access is predicated with a mask
1438	/// that is not a compile-time constant
1439	/// \p Alignment - alignment of single element
1440	/// \p I - the optional original context instruction, if one exists, e.g. the
1441	/// load/store to transform or the call to the gather/scatter intrinsic
1442	InstructionCost getStridedMemoryOpCost(
1443	unsigned Opcode, Type DataTy, const* Value Ptr, bool* VariableMask,
1444	Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1445	const Instruction I = nullptr) const*;
1446
1447	/// \return The cost of the interleaved memory operation.
1448	/// \p Opcode is the memory operation code
1449	/// \p VecTy is the vector type of the interleaved access.
1450	/// \p Factor is the interleave factor
1451	/// \p Indices is the indices for interleaved load members (as interleaved
1452	/// load allows gaps)
1453	/// \p Alignment is the alignment of the memory operation
1454	/// \p AddressSpace is address space of the pointer.
1455	/// \p UseMaskForCond indicates if the memory access is predicated.
1456	/// \p UseMaskForGaps indicates if gaps should be masked.
1457	InstructionCost getInterleavedMemoryOpCost(
1458	unsigned Opcode, Type VecTy, unsigned* Factor, ArrayRef<unsigned> Indices,
1459	Align Alignment, unsigned AddressSpace,
1460	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1461	bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
1462
1463	/// A helper function to determine the type of reduction algorithm used
1464	/// for a given \p Opcode and set of FastMathFlags \p FMF.
1465	static bool requiresOrderedReduction(std::optional<FastMathFlags> FMF) {
1466	return FMF && !(*FMF).allowReassoc();
1467	}
1468
1469	/// Calculate the cost of vector reduction intrinsics.
1470	///
1471	/// This is the cost of reducing the vector value of type \p Ty to a scalar
1472	/// value using the operation denoted by \p Opcode. The FastMathFlags
1473	/// parameter \p FMF indicates what type of reduction we are performing:
1474	/// 1. Tree-wise. This is the typical 'fast' reduction performed that
1475	/// involves successively splitting a vector into half and doing the
1476	/// operation on the pair of halves until you have a scalar value. For
1477	/// example:
1478	/// (v0, v1, v2, v3)
1479	/// ((v0+v2), (v1+v3), undef, undef)
1480	/// ((v0+v2+v1+v3), undef, undef, undef)
1481	/// This is the default behaviour for integer operations, whereas for
1482	/// floating point we only do this if \p FMF indicates that
1483	/// reassociation is allowed.
1484	/// 2. Ordered. For a vector with N elements this involves performing N
1485	/// operations in lane order, starting with an initial scalar value, i.e.
1486	/// result = InitVal + v0
1487	/// result = result + v1
1488	/// result = result + v2
1489	/// result = result + v3
1490	/// This is only the case for FP operations and when reassociation is not
1491	/// allowed.
1492	///
1493	InstructionCost getArithmeticReductionCost(
1494	unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
1495	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1496
1497	InstructionCost getMinMaxReductionCost(
1498	Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF = FastMathFlags (),
1499	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1500
1501	/// Calculate the cost of an extended reduction pattern, similar to
1502	/// getArithmeticReductionCost of an Add reduction with multiply and optional
1503	/// extensions. This is the cost of as:
1504	/// ResTy vecreduce.add(mul (A, B)).
1505	/// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)).
1506	InstructionCost getMulAccReductionCost(
1507	bool IsUnsigned, Type ResTy, VectorType Ty,
1508	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1509
1510	/// Calculate the cost of an extended reduction pattern, similar to
1511	/// getArithmeticReductionCost of a reduction with an extension.
1512	/// This is the cost of as:
1513	/// ResTy vecreduce.opcode(ext(Ty A)).
1514	InstructionCost getExtendedReductionCost(
1515	unsigned Opcode, bool IsUnsigned, Type ResTy, VectorType Ty,
1516	FastMathFlags FMF,
1517	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1518
1519	/// \returns The cost of Intrinsic instructions. Analyses the real arguments.
1520	/// Three cases are handled: 1. scalar instruction 2. vector instruction
1521	/// 3. scalar instruction which is to be vectorized.
1522	InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
1523	TTI::TargetCostKind CostKind) const;
1524
1525	/// \returns The cost of Call instructions.
1526	InstructionCost getCallInstrCost(
1527	Function F, Type RetTy, ArrayRef<Type *> Tys,
1528	TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;
1529
1530	/// \returns The number of pieces into which the provided type must be
1531	/// split during legalization. Zero is returned when the answer is unknown.
1532	unsigned getNumberOfParts(Type Tp) const*;
1533
1534	/// \returns The cost of the address computation. For most targets this can be
1535	/// merged into the instruction indexing mode. Some targets might want to
1536	/// distinguish between address computation for memory operations on vector
1537	/// types and scalar types. Such targets should override this function.
1538	/// The 'SE' parameter holds pointer for the scalar evolution object which
1539	/// is used in order to get the Ptr step value in case of constant stride.
1540	/// The 'Ptr' parameter holds SCEV of the access pointer.
1541	InstructionCost getAddressComputationCost(Type *Ty,
1542	ScalarEvolution SE = nullptr*,
1543	const SCEV Ptr = nullptr) const*;
1544
1545	/// \returns The cost, if any, of keeping values of the given types alive
1546	/// over a callsite.
1547	///
1548	/// Some types may require the use of register classes that do not have
1549	/// any callee-saved registers, so would require a spill and fill.
1550	InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type > Tys) const*;
1551
1552	/// \returns True if the intrinsic is a supported memory intrinsic. Info
1553	/// will contain additional information - whether the intrinsic may write
1554	/// or read to memory, volatility and the pointer. Info is undefined
1555	/// if false is returned.
1556	bool getTgtMemIntrinsic(IntrinsicInst Inst, MemIntrinsicInfo &Info) const*;
1557
1558	/// \returns The maximum element size, in bytes, for an element
1559	/// unordered-atomic memory intrinsic.
1560	unsigned getAtomicMemIntrinsicMaxElementSize() const;
1561
1562	/// \returns A value which is the result of the given memory intrinsic. New
1563	/// instructions may be created to extract the result from the given intrinsic
1564	/// memory operation. Returns nullptr if the target cannot create a result
1565	/// from the given intrinsic.
1566	Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
1567	Type ExpectedType) const*;
1568
1569	/// \returns The type to use in a loop expansion of a memcpy call.
1570	Type *getMemcpyLoopLoweringType(
1571	LLVMContext &Context, Value Length, unsigned* SrcAddrSpace,
1572	unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
1573	std::optional<uint32_t> AtomicElementSize = std::nullopt) const;
1574
1575	/// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1576	/// \param RemainingBytes The number of bytes to copy.
1577	///
1578	/// Calculates the operand types to use when copying \p RemainingBytes of
1579	/// memory, where source and destination alignments are \p SrcAlign and
1580	/// \p DestAlign respectively.
1581	void getMemcpyLoopResidualLoweringType(
1582	SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1583	unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1584	unsigned SrcAlign, unsigned DestAlign,
1585	std::optional<uint32_t> AtomicCpySize = std::nullopt) const;
1586
1587	/// \returns True if the two functions have compatible attributes for inlining
1588	/// purposes.
1589	bool areInlineCompatible(const Function *Caller,
1590	const Function Callee) const*;
1591
1592	/// Returns a penalty for invoking call \p Call in \p F.
1593	/// For example, if a function F calls a function G, which in turn calls
1594	/// function H, then getInlineCallPenalty(F, H()) would return the
1595	/// penalty of calling H from F, e.g. after inlining G into F.
1596	/// \p DefaultCallPenalty is passed to give a default penalty that
1597	/// the target can amend or override.
1598	unsigned getInlineCallPenalty(const Function F, const* CallBase &Call,
1599	unsigned DefaultCallPenalty) const;
1600
1601	/// \returns True if the caller and callee agree on how \p Types will be
1602	/// passed to or returned from the callee.
1603	/// to the callee.
1604	/// \param Types List of types to check.
1605	bool areTypesABICompatible(const Function Caller, const* Function *Callee,
1606	const ArrayRef<Type > &Types) const*;
1607
1608	/// The type of load/store indexing.
1609	enum MemIndexedMode {
1610	MIM_Unindexed, ///< No indexing.
1611	MIM_PreInc, ///< Pre-incrementing.
1612	MIM_PreDec, ///< Pre-decrementing.
1613	MIM_PostInc, ///< Post-incrementing.
1614	MIM_PostDec ///< Post-decrementing.
1615	};
1616
1617	/// \returns True if the specified indexed load for the given type is legal.
1618	bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type Ty) const*;
1619
1620	/// \returns True if the specified indexed store for the given type is legal.
1621	bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type Ty) const*;
1622
1623	/// \returns The bitwidth of the largest vector type that should be used to
1624	/// load/store in the given address space.
1625	unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1626
1627	/// \returns True if the load instruction is legal to vectorize.
1628	bool isLegalToVectorizeLoad(LoadInst LI) const*;
1629
1630	/// \returns True if the store instruction is legal to vectorize.
1631	bool isLegalToVectorizeStore(StoreInst SI) const*;
1632
1633	/// \returns True if it is legal to vectorize the given load chain.
1634	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
1635	unsigned AddrSpace) const;
1636
1637	/// \returns True if it is legal to vectorize the given store chain.
1638	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
1639	unsigned AddrSpace) const;
1640
1641	/// \returns True if it is legal to vectorize the given reduction kind.
1642	bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
1643	ElementCount VF) const;
1644
1645	/// \returns True if the given type is supported for scalable vectors
1646	bool isElementTypeLegalForScalableVector(Type Ty) const*;
1647
1648	/// \returns The new vector factor value if the target doesn't support \p
1649	/// SizeInBytes loads or has a better vector factor.
1650	unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1651	unsigned ChainSizeInBytes,
1652	VectorType VecTy) const*;
1653
1654	/// \returns The new vector factor value if the target doesn't support \p
1655	/// SizeInBytes stores or has a better vector factor.
1656	unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1657	unsigned ChainSizeInBytes,
1658	VectorType VecTy) const*;
1659
1660	/// Flags describing the kind of vector reduction.
1661	struct ReductionFlags {
1662	ReductionFlags() = default;
1663	bool IsMaxOp =
1664	false; ///< If the op a min/max kind, true if it's a max operation.
1665	bool IsSigned = false; ///< Whether the operation is a signed int reduction.
1666	bool NoNaN =
1667	false; ///< If op is an fp min/max, whether NaNs may be present.
1668	};
1669
1670	/// \returns True if the target prefers reductions in loop.
1671	bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1672	ReductionFlags Flags) const;
1673
1674	/// \returns True if the target prefers reductions select kept in the loop
1675	/// when tail folding. i.e.
1676	/// loop:
1677	/// p = phi (0, s)
1678	/// a = add (p, x)
1679	/// s = select (mask, a, p)
1680	/// vecreduce.add(s)
1681	///
1682	/// As opposed to the normal scheme of p = phi (0, a) which allows the select
1683	/// to be pulled out of the loop. If the select(.., add, ..) can be predicated
1684	/// by the target, this can lead to cleaner code generation.
1685	bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1686	ReductionFlags Flags) const;
1687
1688	/// Return true if the loop vectorizer should consider vectorizing an
1689	/// otherwise scalar epilogue loop.
1690	bool preferEpilogueVectorization() const;
1691
1692	/// \returns True if the target wants to expand the given reduction intrinsic
1693	/// into a shuffle sequence.
1694	bool shouldExpandReduction(const IntrinsicInst II) const*;
1695
1696	/// \returns the size cost of rematerializing a GlobalValue address relative
1697	/// to a stack reload.
1698	unsigned getGISelRematGlobalCost() const;
1699
1700	/// \returns the lower bound of a trip count to decide on vectorization
1701	/// while tail-folding.
1702	unsigned getMinTripCountTailFoldingThreshold() const;
1703
1704	/// \returns True if the target supports scalable vectors.
1705	bool supportsScalableVectors() const;
1706
1707	/// \return true when scalable vectorization is preferred.
1708	bool enableScalableVectorization() const;
1709
1710	/// \name Vector Predication Information
1711	/// @{
1712	/// Whether the target supports the %evl parameter of VP intrinsic efficiently
1713	/// in hardware, for the given opcode and type/alignment. (see LLVM Language
1714	/// Reference - "Vector Predication Intrinsics").
1715	/// Use of %evl is discouraged when that is not the case.
1716	bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1717	Align Alignment) const;
1718
1719	struct VPLegalization {
1720	enum VPTransform {
1721	// keep the predicating parameter
1722	Legal = `0`,
1723	// where legal, discard the predicate parameter
1724	Discard = `1`,
1725	// transform into something else that is also predicating
1726	Convert = `2`
1727	};
1728
1729	// How to transform the EVL parameter.
1730	// Legal: keep the EVL parameter as it is.
1731	// Discard: Ignore the EVL parameter where it is safe to do so.
1732	// Convert: Fold the EVL into the mask parameter.
1733	VPTransform EVLParamStrategy;
1734
1735	// How to transform the operator.
1736	// Legal: The target supports this operator.
1737	// Convert: Convert this to a non-VP operation.
1738	// The 'Discard' strategy is invalid.
1739	VPTransform OpStrategy;
1740
1741	bool shouldDoNothing() const {
1742	return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
1743	}
1744	VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)
1745	: EVLParamStrategy(EVLParamStrategy), OpStrategy(OpStrategy) {}
1746	};
1747
1748	/// \returns How the target needs this vector-predicated operation to be
1749	/// transformed.
1750	VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const;
1751	/// @}
1752
1753	/// \returns Whether a 32-bit branch instruction is available in Arm or Thumb
1754	/// state.
1755	///
1756	/// Used by the LowerTypeTests pass, which constructs an IR inline assembler
1757	/// node containing a jump table in a format suitable for the target, so it
1758	/// needs to know what format of jump table it can legally use.
1759	///
1760	/// For non-Arm targets, this function isn't used. It defaults to returning
1761	/// false, but it shouldn't matter what it returns anyway.
1762	bool hasArmWideBranch(bool Thumb) const;
1763
1764	/// \return The maximum number of function arguments the target supports.
1765	unsigned getMaxNumArgs() const;
1766
1767	/// @}
1768
1769	private:
1770	/// The abstract base class used to type erase specific TTI
1771	/// implementations.
1772	class Concept;
1773
1774	/// The template model for the base class which wraps a concrete
1775	/// implementation in a type erased interface.
1776	template <typename T> class Model;
1777
1778	std::unique_ptr<Concept> TTIImpl;
1779	};
1780
1781	class TargetTransformInfo::Concept {
1782	public:
1783	virtual ~Concept() = `0`;
1784	virtual const DataLayout &getDataLayout() const = `0`;
1785	virtual InstructionCost getGEPCost(Type PointeeType, const* Value *Ptr,
1786	ArrayRef<const Value *> Operands,
1787	Type *AccessType,
1788	TTI::TargetCostKind CostKind) = `0`;
1789	virtual InstructionCost
1790	getPointersChainCost(ArrayRef<const Value > Ptrs, const* Value *Base,
1791	const TTI::PointersChainInfo &Info, Type *AccessTy,
1792	TTI::TargetCostKind CostKind) = `0`;
1793	virtual unsigned getInliningThresholdMultiplier() const = `0`;
1794	virtual unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const = `0`;
1795	virtual unsigned
1796	getInliningCostBenefitAnalysisProfitableMultiplier() const = `0`;
1797	virtual unsigned adjustInliningThreshold(const CallBase *CB) = `0`;
1798	virtual int getInlinerVectorBonusPercent() const = `0`;
1799	virtual unsigned getCallerAllocaCost(const CallBase *CB,
1800	const AllocaInst AI) const* = `0`;
1801	virtual InstructionCost getMemcpyCost(const Instruction *I) = `0`;
1802	virtual uint64_t getMaxMemIntrinsicInlineSizeThreshold() const = `0`;
1803	virtual unsigned
1804	getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
1805	ProfileSummaryInfo *PSI,
1806	BlockFrequencyInfo *BFI) = `0`;
1807	virtual InstructionCost getInstructionCost(const User *U,
1808	ArrayRef<const Value *> Operands,
1809	TargetCostKind CostKind) = `0`;
1810	virtual BranchProbability getPredictableBranchThreshold() = `0`;
1811	virtual bool hasBranchDivergence(const Function F = nullptr*) = `0`;
1812	virtual bool isSourceOfDivergence(const Value *V) = `0`;
1813	virtual bool isAlwaysUniform(const Value *V) = `0`;
1814	virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = `0`;
1815	virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const = `0`;
1816	virtual unsigned getFlatAddressSpace() = `0`;
1817	virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1818	Intrinsic::ID IID) const = `0`;
1819	virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = `0`;
1820	virtual bool
1821	canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = `0`;
1822	virtual unsigned getAssumedAddrSpace(const Value V) const* = `0`;
1823	virtual bool isSingleThreaded() const = `0`;
1824	virtual std::pair<const Value , unsigned*>
1825	getPredicatedAddrSpace(const Value V) const* = `0`;
1826	virtual Value rewriteIntrinsicWithAddressSpace(IntrinsicInst II,
1827	Value *OldV,
1828	Value NewV) const* = `0`;
1829	virtual bool isLoweredToCall(const Function *F) = `0`;
1830	virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
1831	UnrollingPreferences &UP,
1832	OptimizationRemarkEmitter *ORE) = `0`;
1833	virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
1834	PeelingPreferences &PP) = `0`;
1835	virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1836	AssumptionCache &AC,
1837	TargetLibraryInfo *LibInfo,
1838	HardwareLoopInfo &HWLoopInfo) = `0`;
1839	virtual bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) = `0`;
1840	virtual TailFoldingStyle
1841	getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) = `0`;
1842	virtual std::optional<Instruction *> instCombineIntrinsic(
1843	InstCombiner &IC, IntrinsicInst &II) = `0`;
1844	virtual std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
1845	InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask,
1846	KnownBits & Known, bool &KnownBitsComputed) = `0`;
1847	virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
1848	InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts,
1849	APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
1850	std::function<void(Instruction , unsigned*, APInt, APInt &)>
1851	SimplifyAndSetOp) = `0`;
1852	virtual bool isLegalAddImmediate(int64_t Imm) = `0`;
1853	virtual bool isLegalAddScalableImmediate(int64_t Imm) = `0`;
1854	virtual bool isLegalICmpImmediate(int64_t Imm) = `0`;
1855	virtual bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV,
1856	int64_t BaseOffset, bool HasBaseReg,
1857	int64_t Scale, unsigned AddrSpace,
1858	Instruction *I,
1859	int64_t ScalableOffset) = `0`;
1860	virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
1861	const TargetTransformInfo::LSRCost &C2) = `0`;
1862	virtual bool isNumRegsMajorCostOfLSR() = `0`;
1863	virtual bool shouldFoldTerminatingConditionAfterLSR() const = `0`;
1864	virtual bool isProfitableLSRChainElement(Instruction *I) = `0`;
1865	virtual bool canMacroFuseCmp() = `0`;
1866	virtual bool canSaveCmp(Loop L, BranchInst BI, ScalarEvolution SE,
1867	LoopInfo LI, DominatorTree DT, AssumptionCache *AC,
1868	TargetLibraryInfo *LibInfo) = `0`;
1869	virtual AddressingModeKind
1870	getPreferredAddressingMode(const Loop L, ScalarEvolution SE) const = `0`;
1871	virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = `0`;
1872	virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = `0`;
1873	virtual bool isLegalNTStore(Type *DataType, Align Alignment) = `0`;
1874	virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = `0`;
1875	virtual bool isLegalBroadcastLoad(Type *ElementTy,
1876	ElementCount NumElements) const = `0`;
1877	virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = `0`;
1878	virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = `0`;
1879	virtual bool forceScalarizeMaskedGather(VectorType *DataType,
1880	Align Alignment) = `0`;
1881	virtual bool forceScalarizeMaskedScatter(VectorType *DataType,
1882	Align Alignment) = `0`;
1883	virtual bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) = `0`;
1884	virtual bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) = `0`;
1885	virtual bool isLegalStridedLoadStore(Type *DataType, Align Alignment) = `0`;
1886	virtual bool isLegalAltInstr(VectorType VecTy, unsigned* Opcode0,
1887	unsigned Opcode1,
1888	const SmallBitVector &OpcodeMask) const = `0`;
1889	virtual bool enableOrderedReductions() = `0`;
1890	virtual bool hasDivRemOp(Type DataType, bool* IsSigned) = `0`;
1891	virtual bool hasVolatileVariant(Instruction I, unsigned* AddrSpace) = `0`;
1892	virtual bool prefersVectorizedAddressing() = `0`;
1893	virtual InstructionCost getScalingFactorCost(Type Ty, GlobalValue BaseGV,
1894	int64_t BaseOffset,
1895	bool HasBaseReg, int64_t Scale,
1896	unsigned AddrSpace) = `0`;
1897	virtual bool LSRWithInstrQueries() = `0`;
1898	virtual bool isTruncateFree(Type Ty1, Type Ty2) = `0`;
1899	virtual bool isProfitableToHoist(Instruction *I) = `0`;
1900	virtual bool useAA() = `0`;
1901	virtual bool isTypeLegal(Type *Ty) = `0`;
1902	virtual unsigned getRegUsageForType(Type *Ty) = `0`;
1903	virtual bool shouldBuildLookupTables() = `0`;
1904	virtual bool shouldBuildLookupTablesForConstant(Constant *C) = `0`;
1905	virtual bool shouldBuildRelLookupTables() = `0`;
1906	virtual bool useColdCCForColdCall(Function &F) = `0`;
1907	virtual InstructionCost getScalarizationOverhead(VectorType *Ty,
1908	const APInt &DemandedElts,
1909	bool Insert, bool Extract,
1910	TargetCostKind CostKind) = `0`;
1911	virtual InstructionCost
1912	getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1913	ArrayRef<Type *> Tys,
1914	TargetCostKind CostKind) = `0`;
1915	virtual bool supportsEfficientVectorElementLoadStore() = `0`;
1916	virtual bool supportsTailCalls() = `0`;
1917	virtual bool supportsTailCallFor(const CallBase *CB) = `0`;
1918	virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = `0`;
1919	virtual MemCmpExpansionOptions
1920	enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = `0`;
1921	virtual bool enableSelectOptimize() = `0`;
1922	virtual bool shouldTreatInstructionLikeSelect(const Instruction *I) = `0`;
1923	virtual bool enableInterleavedAccessVectorization() = `0`;
1924	virtual bool enableMaskedInterleavedAccessVectorization() = `0`;
1925	virtual bool isFPVectorizationPotentiallyUnsafe() = `0`;
1926	virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1927	unsigned BitWidth,
1928	unsigned AddressSpace,
1929	Align Alignment,
1930	unsigned *Fast) = `0`;
1931	virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = `0`;
1932	virtual bool haveFastSqrt(Type *Ty) = `0`;
1933	virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I) = `0`;
1934	virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = `0`;
1935	virtual InstructionCost getFPOpCost(Type *Ty) = `0`;
1936	virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1937	const APInt &Imm, Type *Ty) = `0`;
1938	virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
1939	TargetCostKind CostKind) = `0`;
1940	virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1941	const APInt &Imm, Type *Ty,
1942	TargetCostKind CostKind,
1943	Instruction Inst = nullptr*) = `0`;
1944	virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
1945	const APInt &Imm, Type *Ty,
1946	TargetCostKind CostKind) = `0`;
1947	virtual bool preferToKeepConstantsAttached(const Instruction &Inst,
1948	const Function &Fn) const = `0`;
1949	virtual unsigned getNumberOfRegisters(unsigned ClassID) const = `0`;
1950	virtual unsigned getRegisterClassForType(bool Vector,
1951	Type Ty = nullptr) const* = `0`;
1952	virtual const char getRegisterClassName(unsigned* ClassID) const = `0`;
1953	virtual TypeSize getRegisterBitWidth(RegisterKind K) const = `0`;
1954	virtual unsigned getMinVectorRegisterBitWidth() const = `0`;
1955	virtual std::optional<unsigned> getMaxVScale() const = `0`;
1956	virtual std::optional<unsigned> getVScaleForTuning() const = `0`;
1957	virtual bool isVScaleKnownToBeAPowerOfTwo() const = `0`;
1958	virtual bool
1959	shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const = `0`;
1960	virtual ElementCount getMinimumVF(unsigned ElemWidth,
1961	bool IsScalable) const = `0`;
1962	virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = `0`;
1963	virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1964	Type ScalarValTy) const* = `0`;
1965	virtual bool shouldConsiderAddressTypePromotion(
1966	const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = `0`;
1967	virtual unsigned getCacheLineSize() const = `0`;
1968	virtual std::optional<unsigned> getCacheSize(CacheLevel Level) const = `0`;
1969	virtual std::optional<unsigned> getCacheAssociativity(CacheLevel Level)
1970	const = `0`;
1971	virtual std::optional<unsigned> getMinPageSize() const = `0`;
1972
1973	/// \return How much before a load we should place the prefetch
1974	/// instruction. This is currently measured in number of
1975	/// instructions.
1976	virtual unsigned getPrefetchDistance() const = `0`;
1977
1978	/// \return Some HW prefetchers can handle accesses up to a certain
1979	/// constant stride. This is the minimum stride in bytes where it
1980	/// makes sense to start adding SW prefetches. The default is 1,
1981	/// i.e. prefetch with any stride. Sometimes prefetching is beneficial
1982	/// even below the HW prefetcher limit, and the arguments provided are
1983	/// meant to serve as a basis for deciding this for a particular loop.
1984	virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1985	unsigned NumStridedMemAccesses,
1986	unsigned NumPrefetches,
1987	bool HasCall) const = `0`;
1988
1989	/// \return The maximum number of iterations to prefetch ahead. If
1990	/// the required number of iterations is more than this number, no
1991	/// prefetching is performed.
1992	virtual unsigned getMaxPrefetchIterationsAhead() const = `0`;
1993
1994	/// \return True if prefetching should also be done for writes.
1995	virtual bool enableWritePrefetching() const = `0`;
1996
1997	/// \return if target want to issue a prefetch in address space \p AS.
1998	virtual bool shouldPrefetchAddressSpace(unsigned AS) const = `0`;
1999
2000	virtual unsigned getMaxInterleaveFactor(ElementCount VF) = `0`;
2001	virtual InstructionCost getArithmeticInstrCost(
2002	unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2003	OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2004	ArrayRef<const Value > Args, const* Instruction CxtI = nullptr*) = `0`;
2005	virtual InstructionCost getAltInstrCost(
2006	VectorType VecTy, unsigned* Opcode0, unsigned Opcode1,
2007	const SmallBitVector &OpcodeMask,
2008	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const = `0`;
2009
2010	virtual InstructionCost
2011	getShuffleCost(ShuffleKind Kind, VectorType Tp, ArrayRef<int*> Mask,
2012	TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
2013	ArrayRef<const Value > Args, const* Instruction *CxtI) = `0`;
2014	virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
2015	Type *Src, CastContextHint CCH,
2016	TTI::TargetCostKind CostKind,
2017	const Instruction *I) = `0`;
2018	virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2019	VectorType *VecTy,
2020	unsigned Index) = `0`;
2021	virtual InstructionCost getCFInstrCost(unsigned Opcode,
2022	TTI::TargetCostKind CostKind,
2023	const Instruction I = nullptr*) = `0`;
2024	virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
2025	Type *CondTy,
2026	CmpInst::Predicate VecPred,
2027	TTI::TargetCostKind CostKind,
2028	const Instruction *I) = `0`;
2029	virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2030	TTI::TargetCostKind CostKind,
2031	unsigned Index, Value *Op0,
2032	Value *Op1) = `0`;
2033	virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
2034	TTI::TargetCostKind CostKind,
2035	unsigned Index) = `0`;
2036
2037	virtual InstructionCost
2038	getReplicationShuffleCost(Type EltTy, int* ReplicationFactor, int VF,
2039	const APInt &DemandedDstElts,
2040	TTI::TargetCostKind CostKind) = `0`;
2041
2042	virtual InstructionCost
2043	getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2044	unsigned AddressSpace, TTI::TargetCostKind CostKind,
2045	OperandValueInfo OpInfo, const Instruction *I) = `0`;
2046	virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src,
2047	Align Alignment,
2048	unsigned AddressSpace,
2049	TTI::TargetCostKind CostKind,
2050	const Instruction *I) = `0`;
2051	virtual InstructionCost
2052	getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2053	unsigned AddressSpace,
2054	TTI::TargetCostKind CostKind) = `0`;
2055	virtual InstructionCost
2056	getGatherScatterOpCost(unsigned Opcode, Type DataTy, const* Value *Ptr,
2057	bool VariableMask, Align Alignment,
2058	TTI::TargetCostKind CostKind,
2059	const Instruction I = nullptr*) = `0`;
2060	virtual InstructionCost
2061	getStridedMemoryOpCost(unsigned Opcode, Type DataTy, const* Value *Ptr,
2062	bool VariableMask, Align Alignment,
2063	TTI::TargetCostKind CostKind,
2064	const Instruction I = nullptr*) = `0`;
2065
2066	virtual InstructionCost getInterleavedMemoryOpCost(
2067	unsigned Opcode, Type VecTy, unsigned* Factor, ArrayRef<unsigned> Indices,
2068	Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2069	bool UseMaskForCond = false, bool UseMaskForGaps = false) = `0`;
2070	virtual InstructionCost
2071	getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
2072	std::optional<FastMathFlags> FMF,
2073	TTI::TargetCostKind CostKind) = `0`;
2074	virtual InstructionCost
2075	getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
2076	TTI::TargetCostKind CostKind) = `0`;
2077	virtual InstructionCost getExtendedReductionCost(
2078	unsigned Opcode, bool IsUnsigned, Type ResTy, VectorType Ty,
2079	FastMathFlags FMF,
2080	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = `0`;
2081	virtual InstructionCost getMulAccReductionCost(
2082	bool IsUnsigned, Type ResTy, VectorType Ty,
2083	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = `0`;
2084	virtual InstructionCost
2085	getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2086	TTI::TargetCostKind CostKind) = `0`;
2087	virtual InstructionCost getCallInstrCost(Function F, Type RetTy,
2088	ArrayRef<Type *> Tys,
2089	TTI::TargetCostKind CostKind) = `0`;
2090	virtual unsigned getNumberOfParts(Type *Tp) = `0`;
2091	virtual InstructionCost
2092	getAddressComputationCost(Type Ty, ScalarEvolution SE, const SCEV *Ptr) = `0`;
2093	virtual InstructionCost
2094	getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = `0`;
2095	virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2096	MemIntrinsicInfo &Info) = `0`;
2097	virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = `0`;
2098	virtual Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
2099	Type *ExpectedType) = `0`;
2100	virtual Type *getMemcpyLoopLoweringType(
2101	LLVMContext &Context, Value Length, unsigned* SrcAddrSpace,
2102	unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
2103	std::optional<uint32_t> AtomicElementSize) const = `0`;
2104
2105	virtual void getMemcpyLoopResidualLoweringType(
2106	SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2107	unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2108	unsigned SrcAlign, unsigned DestAlign,
2109	std::optional<uint32_t> AtomicCpySize) const = `0`;
2110	virtual bool areInlineCompatible(const Function *Caller,
2111	const Function Callee) const* = `0`;
2112	virtual unsigned getInlineCallPenalty(const Function F, const* CallBase &Call,
2113	unsigned DefaultCallPenalty) const = `0`;
2114	virtual bool areTypesABICompatible(const Function *Caller,
2115	const Function *Callee,
2116	const ArrayRef<Type > &Types) const* = `0`;
2117	virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type Ty) const* = `0`;
2118	virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type Ty) const* = `0`;
2119	virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = `0`;
2120	virtual bool isLegalToVectorizeLoad(LoadInst LI) const* = `0`;
2121	virtual bool isLegalToVectorizeStore(StoreInst SI) const* = `0`;
2122	virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
2123	Align Alignment,
2124	unsigned AddrSpace) const = `0`;
2125	virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
2126	Align Alignment,
2127	unsigned AddrSpace) const = `0`;
2128	virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
2129	ElementCount VF) const = `0`;
2130	virtual bool isElementTypeLegalForScalableVector(Type Ty) const* = `0`;
2131	virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2132	unsigned ChainSizeInBytes,
2133	VectorType VecTy) const* = `0`;
2134	virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2135	unsigned ChainSizeInBytes,
2136	VectorType VecTy) const* = `0`;
2137	virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2138	ReductionFlags) const = `0`;
2139	virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2140	ReductionFlags) const = `0`;
2141	virtual bool preferEpilogueVectorization() const = `0`;
2142
2143	virtual bool shouldExpandReduction(const IntrinsicInst II) const* = `0`;
2144	virtual unsigned getGISelRematGlobalCost() const = `0`;
2145	virtual unsigned getMinTripCountTailFoldingThreshold() const = `0`;
2146	virtual bool enableScalableVectorization() const = `0`;
2147	virtual bool supportsScalableVectors() const = `0`;
2148	virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2149	Align Alignment) const = `0`;
2150	virtual VPLegalization
2151	getVPLegalizationStrategy(const VPIntrinsic &PI) const = `0`;
2152	virtual bool hasArmWideBranch(bool Thumb) const = `0`;
2153	virtual unsigned getMaxNumArgs() const = `0`;
2154	};
2155
2156	template <typename T>
2157	class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
2158	T Impl;
2159
2160	public:
2161	Model(T Impl) : Impl(std::move(Impl)) {}
2162	~Model() override = default;
2163
2164	const DataLayout &getDataLayout() const override {
2165	return Impl.getDataLayout();
2166	}
2167
2168	InstructionCost
2169	getGEPCost(Type PointeeType, const* Value *Ptr,
2170	ArrayRef<const Value > Operands, Type AccessType,
2171	TargetTransformInfo::TargetCostKind CostKind) override {
2172	return Impl.getGEPCost(PointeeType, Ptr, Operands, AccessType, CostKind);
2173	}
2174	InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2175	const Value *Base,
2176	const PointersChainInfo &Info,
2177	Type *AccessTy,
2178	TargetCostKind CostKind) override {
2179	return Impl.getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
2180	}
2181	unsigned getInliningThresholdMultiplier() const override {
2182	return Impl.getInliningThresholdMultiplier();
2183	}
2184	unsigned adjustInliningThreshold(const CallBase *CB) override {
2185	return Impl.adjustInliningThreshold(CB);
2186	}
2187	unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const override {
2188	return Impl.getInliningCostBenefitAnalysisSavingsMultiplier();
2189	}
2190	unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const override {
2191	return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
2192	}
2193	int getInlinerVectorBonusPercent() const override {
2194	return Impl.getInlinerVectorBonusPercent();
2195	}
2196	unsigned getCallerAllocaCost(const CallBase *CB,
2197	const AllocaInst AI) const* override {
2198	return Impl.getCallerAllocaCost(CB, AI);
2199	}
2200	InstructionCost getMemcpyCost(const Instruction *I) override {
2201	return Impl.getMemcpyCost(I);
2202	}
2203
2204	uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override {
2205	return Impl.getMaxMemIntrinsicInlineSizeThreshold();
2206	}
2207
2208	InstructionCost getInstructionCost(const User *U,
2209	ArrayRef<const Value *> Operands,
2210	TargetCostKind CostKind) override {
2211	return Impl.getInstructionCost(U, Operands, CostKind);
2212	}
2213	BranchProbability getPredictableBranchThreshold() override {
2214	return Impl.getPredictableBranchThreshold();
2215	}
2216	bool hasBranchDivergence(const Function F = nullptr*) override {
2217	return Impl.hasBranchDivergence(F);
2218	}
2219	bool isSourceOfDivergence(const Value *V) override {
2220	return Impl.isSourceOfDivergence(V);
2221	}
2222
2223	bool isAlwaysUniform(const Value *V) override {
2224	return Impl.isAlwaysUniform(V);
2225	}
2226
2227	bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
2228	return Impl.isValidAddrSpaceCast(FromAS, ToAS);
2229	}
2230
2231	bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const override {
2232	return Impl.addrspacesMayAlias(AS0, AS1);
2233	}
2234
2235	unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
2236
2237	bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2238	Intrinsic::ID IID) const override {
2239	return Impl.collectFlatAddressOperands(OpIndexes, IID);
2240	}
2241
2242	bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
2243	return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2244	}
2245
2246	bool
2247	canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
2248	return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2249	}
2250
2251	unsigned getAssumedAddrSpace(const Value V) const* override {
2252	return Impl.getAssumedAddrSpace(V);
2253	}
2254
2255	bool isSingleThreaded() const override { return Impl.isSingleThreaded(); }
2256
2257	std::pair<const Value , unsigned*>
2258	getPredicatedAddrSpace(const Value V) const* override {
2259	return Impl.getPredicatedAddrSpace(V);
2260	}
2261
2262	Value rewriteIntrinsicWithAddressSpace(IntrinsicInst II, Value *OldV,
2263	Value NewV) const* override {
2264	return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
2265	}
2266
2267	bool isLoweredToCall(const Function *F) override {
2268	return Impl.isLoweredToCall(F);
2269	}
2270	void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2271	UnrollingPreferences &UP,
2272	OptimizationRemarkEmitter *ORE) override {
2273	return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2274	}
2275	void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2276	PeelingPreferences &PP) override {
2277	return Impl.getPeelingPreferences(L, SE, PP);
2278	}
2279	bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2280	AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2281	HardwareLoopInfo &HWLoopInfo) override {
2282	return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2283	}
2284	bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) override {
2285	return Impl.preferPredicateOverEpilogue(TFI);
2286	}
2287	TailFoldingStyle
2288	getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) override {
2289	return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2290	}
2291	std::optional<Instruction *>
2292	instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) override {
2293	return Impl.instCombineIntrinsic(IC, II);
2294	}
2295	std::optional<Value *>
2296	simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
2297	APInt DemandedMask, KnownBits &Known,
2298	bool &KnownBitsComputed) override {
2299	return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
2300	KnownBitsComputed);
2301	}
2302	std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2303	InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
2304	APInt &UndefElts2, APInt &UndefElts3,
2305	std::function<void(Instruction , unsigned*, APInt, APInt &)>
2306	SimplifyAndSetOp) override {
2307	return Impl.simplifyDemandedVectorEltsIntrinsic(
2308	IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2309	SimplifyAndSetOp);
2310	}
2311	bool isLegalAddImmediate(int64_t Imm) override {
2312	return Impl.isLegalAddImmediate(Imm);
2313	}
2314	bool isLegalAddScalableImmediate(int64_t Imm) override {
2315	return Impl.isLegalAddScalableImmediate(Imm);
2316	}
2317	bool isLegalICmpImmediate(int64_t Imm) override {
2318	return Impl.isLegalICmpImmediate(Imm);
2319	}
2320	bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
2321	bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
2322	Instruction *I, int64_t ScalableOffset) override {
2323	return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2324	AddrSpace, I, ScalableOffset);
2325	}
2326	bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
2327	const TargetTransformInfo::LSRCost &C2) override {
2328	return Impl.isLSRCostLess(C1, C2);
2329	}
2330	bool isNumRegsMajorCostOfLSR() override {
2331	return Impl.isNumRegsMajorCostOfLSR();
2332	}
2333	bool shouldFoldTerminatingConditionAfterLSR() const override {
2334	return Impl.shouldFoldTerminatingConditionAfterLSR();
2335	}
2336	bool isProfitableLSRChainElement(Instruction *I) override {
2337	return Impl.isProfitableLSRChainElement(I);
2338	}
2339	bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
2340	bool canSaveCmp(Loop L, BranchInst BI, ScalarEvolution SE, LoopInfo *LI,
2341	DominatorTree DT, AssumptionCache AC,
2342	TargetLibraryInfo *LibInfo) override {
2343	return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2344	}
2345	AddressingModeKind
2346	getPreferredAddressingMode(const Loop *L,
2347	ScalarEvolution SE) const* override {
2348	return Impl.getPreferredAddressingMode(L, SE);
2349	}
2350	bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
2351	return Impl.isLegalMaskedStore(DataType, Alignment);
2352	}
2353	bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
2354	return Impl.isLegalMaskedLoad(DataType, Alignment);
2355	}
2356	bool isLegalNTStore(Type *DataType, Align Alignment) override {
2357	return Impl.isLegalNTStore(DataType, Alignment);
2358	}
2359	bool isLegalNTLoad(Type *DataType, Align Alignment) override {
2360	return Impl.isLegalNTLoad(DataType, Alignment);
2361	}
2362	bool isLegalBroadcastLoad(Type *ElementTy,
2363	ElementCount NumElements) const override {
2364	return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2365	}
2366	bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
2367	return Impl.isLegalMaskedScatter(DataType, Alignment);
2368	}
2369	bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
2370	return Impl.isLegalMaskedGather(DataType, Alignment);
2371	}
2372	bool forceScalarizeMaskedGather(VectorType *DataType,
2373	Align Alignment) override {
2374	return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2375	}
2376	bool forceScalarizeMaskedScatter(VectorType *DataType,
2377	Align Alignment) override {
2378	return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2379	}
2380	bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) override {
2381	return Impl.isLegalMaskedCompressStore(DataType, Alignment);
2382	}
2383	bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) override {
2384	return Impl.isLegalMaskedExpandLoad(DataType, Alignment);
2385	}
2386	bool isLegalStridedLoadStore(Type *DataType, Align Alignment) override {
2387	return Impl.isLegalStridedLoadStore(DataType, Alignment);
2388	}
2389	bool isLegalAltInstr(VectorType VecTy, unsigned* Opcode0, unsigned Opcode1,
2390	const SmallBitVector &OpcodeMask) const override {
2391	return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2392	}
2393	bool enableOrderedReductions() override {
2394	return Impl.enableOrderedReductions();
2395	}
2396	bool hasDivRemOp(Type DataType, bool* IsSigned) override {
2397	return Impl.hasDivRemOp(DataType, IsSigned);
2398	}
2399	bool hasVolatileVariant(Instruction I, unsigned* AddrSpace) override {
2400	return Impl.hasVolatileVariant(I, AddrSpace);
2401	}
2402	bool prefersVectorizedAddressing() override {
2403	return Impl.prefersVectorizedAddressing();
2404	}
2405	InstructionCost getScalingFactorCost(Type Ty, GlobalValue BaseGV,
2406	int64_t BaseOffset, bool HasBaseReg,
2407	int64_t Scale,
2408	unsigned AddrSpace) override {
2409	return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2410	AddrSpace);
2411	}
2412	bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
2413	bool isTruncateFree(Type Ty1, Type Ty2) override {
2414	return Impl.isTruncateFree(Ty1, Ty2);
2415	}
2416	bool isProfitableToHoist(Instruction *I) override {
2417	return Impl.isProfitableToHoist(I);
2418	}
2419	bool useAA() override { return Impl.useAA(); }
2420	bool isTypeLegal(Type Ty) override { return* Impl.isTypeLegal(Ty); }
2421	unsigned getRegUsageForType(Type *Ty) override {
2422	return Impl.getRegUsageForType(Ty);
2423	}
2424	bool shouldBuildLookupTables() override {
2425	return Impl.shouldBuildLookupTables();
2426	}
2427	bool shouldBuildLookupTablesForConstant(Constant *C) override {
2428	return Impl.shouldBuildLookupTablesForConstant(C);
2429	}
2430	bool shouldBuildRelLookupTables() override {
2431	return Impl.shouldBuildRelLookupTables();
2432	}
2433	bool useColdCCForColdCall(Function &F) override {
2434	return Impl.useColdCCForColdCall(F);
2435	}
2436
2437	InstructionCost getScalarizationOverhead(VectorType *Ty,
2438	const APInt &DemandedElts,
2439	bool Insert, bool Extract,
2440	TargetCostKind CostKind) override {
2441	return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2442	CostKind);
2443	}
2444	InstructionCost
2445	getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2446	ArrayRef<Type *> Tys,
2447	TargetCostKind CostKind) override {
2448	return Impl.getOperandsScalarizationOverhead(Args, Tys, CostKind);
2449	}
2450
2451	bool supportsEfficientVectorElementLoadStore() override {
2452	return Impl.supportsEfficientVectorElementLoadStore();
2453	}
2454
2455	bool supportsTailCalls() override { return Impl.supportsTailCalls(); }
2456	bool supportsTailCallFor(const CallBase *CB) override {
2457	return Impl.supportsTailCallFor(CB);
2458	}
2459
2460	bool enableAggressiveInterleaving(bool LoopHasReductions) override {
2461	return Impl.enableAggressiveInterleaving(LoopHasReductions);
2462	}
2463	MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
2464	bool IsZeroCmp) const override {
2465	return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2466	}
2467	bool enableSelectOptimize() override {
2468	return Impl.enableSelectOptimize();
2469	}
2470	bool shouldTreatInstructionLikeSelect(const Instruction *I) override {
2471	return Impl.shouldTreatInstructionLikeSelect(I);
2472	}
2473	bool enableInterleavedAccessVectorization() override {
2474	return Impl.enableInterleavedAccessVectorization();
2475	}
2476	bool enableMaskedInterleavedAccessVectorization() override {
2477	return Impl.enableMaskedInterleavedAccessVectorization();
2478	}
2479	bool isFPVectorizationPotentiallyUnsafe() override {
2480	return Impl.isFPVectorizationPotentiallyUnsafe();
2481	}
2482	bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
2483	unsigned AddressSpace, Align Alignment,
2484	unsigned *Fast) override {
2485	return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
2486	Alignment, Fast);
2487	}
2488	PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
2489	return Impl.getPopcntSupport(IntTyWidthInBit);
2490	}
2491	bool haveFastSqrt(Type Ty) override { return* Impl.haveFastSqrt(Ty); }
2492
2493	bool isExpensiveToSpeculativelyExecute(const Instruction* I) override {
2494	return Impl.isExpensiveToSpeculativelyExecute(I);
2495	}
2496
2497	bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
2498	return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2499	}
2500
2501	InstructionCost getFPOpCost(Type *Ty) override {
2502	return Impl.getFPOpCost(Ty);
2503	}
2504
2505	InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
2506	const APInt &Imm, Type *Ty) override {
2507	return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
2508	}
2509	InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
2510	TargetCostKind CostKind) override {
2511	return Impl.getIntImmCost(Imm, Ty, CostKind);
2512	}
2513	InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
2514	const APInt &Imm, Type *Ty,
2515	TargetCostKind CostKind,
2516	Instruction Inst = nullptr*) override {
2517	return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
2518	}
2519	InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
2520	const APInt &Imm, Type *Ty,
2521	TargetCostKind CostKind) override {
2522	return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
2523	}
2524	bool preferToKeepConstantsAttached(const Instruction &Inst,
2525	const Function &Fn) const override {
2526	return Impl.preferToKeepConstantsAttached(Inst, Fn);
2527	}
2528	unsigned getNumberOfRegisters(unsigned ClassID) const override {
2529	return Impl.getNumberOfRegisters(ClassID);
2530	}
2531	unsigned getRegisterClassForType(bool Vector,
2532	Type Ty = nullptr) const* override {
2533	return Impl.getRegisterClassForType(Vector, Ty);
2534	}
2535	const char getRegisterClassName(unsigned* ClassID) const override {
2536	return Impl.getRegisterClassName(ClassID);
2537	}
2538	TypeSize getRegisterBitWidth(RegisterKind K) const override {
2539	return Impl.getRegisterBitWidth(K);
2540	}
2541	unsigned getMinVectorRegisterBitWidth() const override {
2542	return Impl.getMinVectorRegisterBitWidth();
2543	}
2544	std::optional<unsigned> getMaxVScale() const override {
2545	return Impl.getMaxVScale();
2546	}
2547	std::optional<unsigned> getVScaleForTuning() const override {
2548	return Impl.getVScaleForTuning();
2549	}
2550	bool isVScaleKnownToBeAPowerOfTwo() const override {
2551	return Impl.isVScaleKnownToBeAPowerOfTwo();
2552	}
2553	bool shouldMaximizeVectorBandwidth(
2554	TargetTransformInfo::RegisterKind K) const override {
2555	return Impl.shouldMaximizeVectorBandwidth(K);
2556	}
2557	ElementCount getMinimumVF(unsigned ElemWidth,
2558	bool IsScalable) const override {
2559	return Impl.getMinimumVF(ElemWidth, IsScalable);
2560	}
2561	unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
2562	return Impl.getMaximumVF(ElemWidth, Opcode);
2563	}
2564	unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
2565	Type ScalarValTy) const* override {
2566	return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2567	}
2568	bool shouldConsiderAddressTypePromotion(
2569	const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
2570	return Impl.shouldConsiderAddressTypePromotion(
2571	I, AllowPromotionWithoutCommonHeader);
2572	}
2573	unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
2574	std::optional<unsigned> getCacheSize(CacheLevel Level) const override {
2575	return Impl.getCacheSize(Level);
2576	}
2577	std::optional<unsigned>
2578	getCacheAssociativity(CacheLevel Level) const override {
2579	return Impl.getCacheAssociativity(Level);
2580	}
2581
2582	std::optional<unsigned> getMinPageSize() const override {
2583	return Impl.getMinPageSize();
2584	}
2585
2586	/// Return the preferred prefetch distance in terms of instructions.
2587	///
2588	unsigned getPrefetchDistance() const override {
2589	return Impl.getPrefetchDistance();
2590	}
2591
2592	/// Return the minimum stride necessary to trigger software
2593	/// prefetching.
2594	///
2595	unsigned getMinPrefetchStride(unsigned NumMemAccesses,
2596	unsigned NumStridedMemAccesses,
2597	unsigned NumPrefetches,
2598	bool HasCall) const override {
2599	return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2600	NumPrefetches, HasCall);
2601	}
2602
2603	/// Return the maximum prefetch distance in terms of loop
2604	/// iterations.
2605	///
2606	unsigned getMaxPrefetchIterationsAhead() const override {
2607	return Impl.getMaxPrefetchIterationsAhead();
2608	}
2609
2610	/// \return True if prefetching should also be done for writes.
2611	bool enableWritePrefetching() const override {
2612	return Impl.enableWritePrefetching();
2613	}
2614
2615	/// \return if target want to issue a prefetch in address space \p AS.
2616	bool shouldPrefetchAddressSpace(unsigned AS) const override {
2617	return Impl.shouldPrefetchAddressSpace(AS);
2618	}
2619
2620	unsigned getMaxInterleaveFactor(ElementCount VF) override {
2621	return Impl.getMaxInterleaveFactor(VF);
2622	}
2623	unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
2624	unsigned &JTSize,
2625	ProfileSummaryInfo *PSI,
2626	BlockFrequencyInfo *BFI) override {
2627	return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2628	}
2629	InstructionCost getArithmeticInstrCost(
2630	unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2631	OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2632	ArrayRef<const Value *> Args,
2633	const Instruction CxtI = nullptr*) override {
2634	return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
2635	Args, CxtI);
2636	}
2637	InstructionCost getAltInstrCost(VectorType VecTy, unsigned* Opcode0,
2638	unsigned Opcode1,
2639	const SmallBitVector &OpcodeMask,
2640	TTI::TargetCostKind CostKind) const override {
2641	return Impl.getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask, CostKind);
2642	}
2643
2644	InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
2645	ArrayRef<int> Mask,
2646	TTI::TargetCostKind CostKind, int Index,
2647	VectorType *SubTp,
2648	ArrayRef<const Value *> Args,
2649	const Instruction *CxtI) override {
2650	return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args,
2651	CxtI);
2652	}
2653	InstructionCost getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
2654	CastContextHint CCH,
2655	TTI::TargetCostKind CostKind,
2656	const Instruction *I) override {
2657	return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
2658	}
2659	InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2660	VectorType *VecTy,
2661	unsigned Index) override {
2662	return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
2663	}
2664	InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
2665	const Instruction I = nullptr*) override {
2666	return Impl.getCFInstrCost(Opcode, CostKind, I);
2667	}
2668	InstructionCost getCmpSelInstrCost(unsigned Opcode, Type ValTy, Type CondTy,
2669	CmpInst::Predicate VecPred,
2670	TTI::TargetCostKind CostKind,
2671	const Instruction *I) override {
2672	return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
2673	}
2674	InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2675	TTI::TargetCostKind CostKind,
2676	unsigned Index, Value *Op0,
2677	Value *Op1) override {
2678	return Impl.getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
2679	}
2680	InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
2681	TTI::TargetCostKind CostKind,
2682	unsigned Index) override {
2683	return Impl.getVectorInstrCost(I, Val, CostKind, Index);
2684	}
2685	InstructionCost
2686	getReplicationShuffleCost(Type EltTy, int* ReplicationFactor, int VF,
2687	const APInt &DemandedDstElts,
2688	TTI::TargetCostKind CostKind) override {
2689	return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2690	DemandedDstElts, CostKind);
2691	}
2692	InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2693	unsigned AddressSpace,
2694	TTI::TargetCostKind CostKind,
2695	OperandValueInfo OpInfo,
2696	const Instruction *I) override {
2697	return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind,
2698	OpInfo, I);
2699	}
2700	InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2701	unsigned AddressSpace,
2702	TTI::TargetCostKind CostKind,
2703	const Instruction *I) override {
2704	return Impl.getVPMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2705	CostKind, I);
2706	}
2707	InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
2708	Align Alignment, unsigned AddressSpace,
2709	TTI::TargetCostKind CostKind) override {
2710	return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2711	CostKind);
2712	}
2713	InstructionCost
2714	getGatherScatterOpCost(unsigned Opcode, Type DataTy, const* Value *Ptr,
2715	bool VariableMask, Align Alignment,
2716	TTI::TargetCostKind CostKind,
2717	const Instruction I = nullptr*) override {
2718	return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
2719	Alignment, CostKind, I);
2720	}
2721	InstructionCost
2722	getStridedMemoryOpCost(unsigned Opcode, Type DataTy, const* Value *Ptr,
2723	bool VariableMask, Align Alignment,
2724	TTI::TargetCostKind CostKind,
2725	const Instruction I = nullptr*) override {
2726	return Impl.getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
2727	Alignment, CostKind, I);
2728	}
2729	InstructionCost getInterleavedMemoryOpCost(
2730	unsigned Opcode, Type VecTy, unsigned* Factor, ArrayRef<unsigned> Indices,
2731	Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2732	bool UseMaskForCond, bool UseMaskForGaps) override {
2733	return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2734	Alignment, AddressSpace, CostKind,
2735	UseMaskForCond, UseMaskForGaps);
2736	}
2737	InstructionCost
2738	getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
2739	std::optional<FastMathFlags> FMF,
2740	TTI::TargetCostKind CostKind) override {
2741	return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
2742	}
2743	InstructionCost
2744	getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
2745	TTI::TargetCostKind CostKind) override {
2746	return Impl.getMinMaxReductionCost(IID, Ty, FMF, CostKind);
2747	}
2748	InstructionCost
2749	getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
2750	VectorType *Ty, FastMathFlags FMF,
2751	TTI::TargetCostKind CostKind) override {
2752	return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
2753	CostKind);
2754	}
2755	InstructionCost
2756	getMulAccReductionCost(bool IsUnsigned, Type ResTy, VectorType Ty,
2757	TTI::TargetCostKind CostKind) override {
2758	return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty, CostKind);
2759	}
2760	InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2761	TTI::TargetCostKind CostKind) override {
2762	return Impl.getIntrinsicInstrCost(ICA, CostKind);
2763	}
2764	InstructionCost getCallInstrCost(Function F, Type RetTy,
2765	ArrayRef<Type *> Tys,
2766	TTI::TargetCostKind CostKind) override {
2767	return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
2768	}
2769	unsigned getNumberOfParts(Type *Tp) override {
2770	return Impl.getNumberOfParts(Tp);
2771	}
2772	InstructionCost getAddressComputationCost(Type Ty, ScalarEvolution SE,
2773	const SCEV *Ptr) override {
2774	return Impl.getAddressComputationCost(Ty, SE, Ptr);
2775	}
2776	InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
2777	return Impl.getCostOfKeepingLiveOverCall(Tys);
2778	}
2779	bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2780	MemIntrinsicInfo &Info) override {
2781	return Impl.getTgtMemIntrinsic(Inst, Info);
2782	}
2783	unsigned getAtomicMemIntrinsicMaxElementSize() const override {
2784	return Impl.getAtomicMemIntrinsicMaxElementSize();
2785	}
2786	Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
2787	Type *ExpectedType) override {
2788	return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2789	}
2790	Type *getMemcpyLoopLoweringType(
2791	LLVMContext &Context, Value Length, unsigned* SrcAddrSpace,
2792	unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
2793	std::optional<uint32_t> AtomicElementSize) const override {
2794	return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
2795	DestAddrSpace, SrcAlign, DestAlign,
2796	AtomicElementSize);
2797	}
2798	void getMemcpyLoopResidualLoweringType(
2799	SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2800	unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2801	unsigned SrcAlign, unsigned DestAlign,
2802	std::optional<uint32_t> AtomicCpySize) const override {
2803	Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2804	SrcAddrSpace, DestAddrSpace,
2805	SrcAlign, DestAlign, AtomicCpySize);
2806	}
2807	bool areInlineCompatible(const Function *Caller,
2808	const Function Callee) const* override {
2809	return Impl.areInlineCompatible(Caller, Callee);
2810	}
2811	unsigned getInlineCallPenalty(const Function F, const* CallBase &Call,
2812	unsigned DefaultCallPenalty) const override {
2813	return Impl.getInlineCallPenalty(F, Call, DefaultCallPenalty);
2814	}
2815	bool areTypesABICompatible(const Function Caller, const* Function *Callee,
2816	const ArrayRef<Type > &Types) const* override {
2817	return Impl.areTypesABICompatible(Caller, Callee, Types);
2818	}
2819	bool isIndexedLoadLegal(MemIndexedMode Mode, Type Ty) const* override {
2820	return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
2821	}
2822	bool isIndexedStoreLegal(MemIndexedMode Mode, Type Ty) const* override {
2823	return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
2824	}
2825	unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
2826	return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2827	}
2828	bool isLegalToVectorizeLoad(LoadInst LI) const* override {
2829	return Impl.isLegalToVectorizeLoad(LI);
2830	}
2831	bool isLegalToVectorizeStore(StoreInst SI) const* override {
2832	return Impl.isLegalToVectorizeStore(SI);
2833	}
2834	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
2835	unsigned AddrSpace) const override {
2836	return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2837	AddrSpace);
2838	}
2839	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
2840	unsigned AddrSpace) const override {
2841	return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2842	AddrSpace);
2843	}
2844	bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
2845	ElementCount VF) const override {
2846	return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2847	}
2848	bool isElementTypeLegalForScalableVector(Type Ty) const* override {
2849	return Impl.isElementTypeLegalForScalableVector(Ty);
2850	}
2851	unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2852	unsigned ChainSizeInBytes,
2853	VectorType VecTy) const* override {
2854	return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2855	}
2856	unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2857	unsigned ChainSizeInBytes,
2858	VectorType VecTy) const* override {
2859	return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2860	}
2861	bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2862	ReductionFlags Flags) const override {
2863	return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2864	}
2865	bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2866	ReductionFlags Flags) const override {
2867	return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2868	}
2869	bool preferEpilogueVectorization() const override {
2870	return Impl.preferEpilogueVectorization();
2871	}
2872
2873	bool shouldExpandReduction(const IntrinsicInst II) const* override {
2874	return Impl.shouldExpandReduction(II);
2875	}
2876
2877	unsigned getGISelRematGlobalCost() const override {
2878	return Impl.getGISelRematGlobalCost();
2879	}
2880
2881	unsigned getMinTripCountTailFoldingThreshold() const override {
2882	return Impl.getMinTripCountTailFoldingThreshold();
2883	}
2884
2885	bool supportsScalableVectors() const override {
2886	return Impl.supportsScalableVectors();
2887	}
2888
2889	bool enableScalableVectorization() const override {
2890	return Impl.enableScalableVectorization();
2891	}
2892
2893	bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2894	Align Alignment) const override {
2895	return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
2896	}
2897
2898	VPLegalization
2899	getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
2900	return Impl.getVPLegalizationStrategy(PI);
2901	}
2902
2903	bool hasArmWideBranch(bool Thumb) const override {
2904	return Impl.hasArmWideBranch(Thumb);
2905	}
2906
2907	unsigned getMaxNumArgs() const override {
2908	return Impl.getMaxNumArgs();
2909	}
2910	};
2911
2912	template <typename T>
2913	TargetTransformInfo::TargetTransformInfo(T Impl)
2914	: TTIImpl(new Model<T>(Impl)) {}
2915
2916	/// Analysis pass providing the \c TargetTransformInfo.
2917	///
2918	/// The core idea of the TargetIRAnalysis is to expose an interface through
2919	/// which LLVM targets can analyze and provide information about the middle
2920	/// end's target-independent IR. This supports use cases such as target-aware
2921	/// cost modeling of IR constructs.
2922	///
2923	/// This is a function analysis because much of the cost modeling for targets
2924	/// is done in a subtarget specific way and LLVM supports compiling different
2925	/// functions targeting different subtargets in order to support runtime
2926	/// dispatch according to the observed subtarget.
2927	class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
2928	public:
2929	typedef TargetTransformInfo Result;
2930
2931	/// Default construct a target IR analysis.
2932	///
2933	/// This will use the module's datalayout to construct a baseline
2934	/// conservative TTI result.
2935	TargetIRAnalysis();
2936
2937	/// Construct an IR analysis pass around a target-provide callback.
2938	///
2939	/// The callback will be called with a particular function for which the TTI
2940	/// is needed and must return a TTI object for that function.
2941	TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
2942
2943	// Value semantics. We spell out the constructors for MSVC.
2944	TargetIRAnalysis(const TargetIRAnalysis &Arg)
2945	: TTICallback (Arg.TTICallback) {}
2946	TargetIRAnalysis(TargetIRAnalysis &&Arg)
2947	: TTICallback (std::move(Arg.TTICallback)) {}
2948	TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
2949	TTICallback = RHS.TTICallback;
2950	return *this;
2951	}
2952	TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
2953	TTICallback = std::move(RHS.TTICallback);
2954	return *this;
2955	}
2956
2957	Result run(const Function &F, FunctionAnalysisManager &);
2958
2959	private:
2960	friend AnalysisInfoMixin<TargetIRAnalysis>;
2961	static AnalysisKey Key;
2962
2963	/// The callback used to produce a result.
2964	///
2965	/// We use a completely opaque callback so that targets can provide whatever
2966	/// mechanism they desire for constructing the TTI for a given function.
2967	///
2968	/// FIXME: Should we really use std::function? It's relatively inefficient.
2969	/// It might be possible to arrange for even stateful callbacks to outlive
2970	/// the analysis and thus use a function_ref which would be lighter weight.
2971	/// This may also be less error prone as the callback is likely to reference
2972	/// the external TargetMachine, and that reference needs to never dangle.
2973	std::function<Result(const Function &)> TTICallback;
2974
2975	/// Helper function used as the callback in the default constructor.
2976	static Result getDefaultTTI(const Function &F);
2977	};
2978
2979	/// Wrapper pass for TargetTransformInfo.
2980	///
2981	/// This pass can be constructed from a TTI object which it stores internally
2982	/// and is queried by passes.
2983	class TargetTransformInfoWrapperPass : public ImmutablePass {
2984	TargetIRAnalysis TIRA;
2985	std::optional<TargetTransformInfo> TTI;
2986
2987	virtual void anchor();
2988
2989	public:
2990	static char ID;
2991
2992	/// We must provide a default constructor for the pass but it should
2993	/// never be used.
2994	///
2995	/// Use the constructor below or call one of the creation routines.
2996	TargetTransformInfoWrapperPass();
2997
2998	explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
2999
3000	TargetTransformInfo &getTTI(const Function &F);
3001	};
3002
3003	/// Create an analysis pass wrapper around a TTI object.
3004	///
3005	/// This analysis pass just holds the TTI instance and makes it available to
3006	/// clients.
3007	ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
3008
3009	} // namespace llvm
3010
3011	#endif
3012

source code of llvm/include/llvm/Analysis/TargetTransformInfo.h