1 | //===- llvm/CodeGen/TargetLowering.h - Target Lowering Info -----*- C++ -*-===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file describes how to lower LLVM code to machine code. This has two |
11 | /// main components: |
12 | /// |
13 | /// 1. Which ValueTypes are natively supported by the target. |
14 | /// 2. Which operations are supported for supported ValueTypes. |
15 | /// 3. Cost thresholds for alternative implementations of certain operations. |
16 | /// |
17 | /// In addition it has a few other components, like information about FP |
18 | /// immediates. |
19 | /// |
20 | //===----------------------------------------------------------------------===// |
21 | |
22 | #ifndef LLVM_CODEGEN_TARGETLOWERING_H |
23 | #define LLVM_CODEGEN_TARGETLOWERING_H |
24 | |
25 | #include "llvm/ADT/APInt.h" |
26 | #include "llvm/ADT/ArrayRef.h" |
27 | #include "llvm/ADT/DenseMap.h" |
28 | #include "llvm/ADT/SmallVector.h" |
29 | #include "llvm/ADT/StringRef.h" |
30 | #include "llvm/CodeGen/DAGCombine.h" |
31 | #include "llvm/CodeGen/ISDOpcodes.h" |
32 | #include "llvm/CodeGen/LowLevelTypeUtils.h" |
33 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
34 | #include "llvm/CodeGen/RuntimeLibcallUtil.h" |
35 | #include "llvm/CodeGen/SelectionDAG.h" |
36 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
37 | #include "llvm/CodeGen/TargetCallingConv.h" |
38 | #include "llvm/CodeGen/ValueTypes.h" |
39 | #include "llvm/CodeGenTypes/MachineValueType.h" |
40 | #include "llvm/IR/Attributes.h" |
41 | #include "llvm/IR/CallingConv.h" |
42 | #include "llvm/IR/DataLayout.h" |
43 | #include "llvm/IR/DerivedTypes.h" |
44 | #include "llvm/IR/Function.h" |
45 | #include "llvm/IR/InlineAsm.h" |
46 | #include "llvm/IR/Instruction.h" |
47 | #include "llvm/IR/Instructions.h" |
48 | #include "llvm/IR/RuntimeLibcalls.h" |
49 | #include "llvm/IR/Type.h" |
50 | #include "llvm/Support/Alignment.h" |
51 | #include "llvm/Support/AtomicOrdering.h" |
52 | #include "llvm/Support/Casting.h" |
53 | #include "llvm/Support/Compiler.h" |
54 | #include "llvm/Support/ErrorHandling.h" |
55 | #include "llvm/Support/KnownFPClass.h" |
56 | #include <algorithm> |
57 | #include <cassert> |
58 | #include <climits> |
59 | #include <cstdint> |
60 | #include <iterator> |
61 | #include <map> |
62 | #include <string> |
63 | #include <utility> |
64 | #include <vector> |
65 | |
66 | namespace llvm { |
67 | |
68 | class AssumptionCache; |
69 | class CCState; |
70 | class CCValAssign; |
71 | enum class ComplexDeinterleavingOperation; |
72 | enum class ComplexDeinterleavingRotation; |
73 | class Constant; |
74 | class FastISel; |
75 | class FunctionLoweringInfo; |
76 | class GlobalValue; |
77 | class Loop; |
78 | class GISelValueTracking; |
79 | class IntrinsicInst; |
80 | class IRBuilderBase; |
81 | struct KnownBits; |
82 | class LLVMContext; |
83 | class MachineBasicBlock; |
84 | class MachineFunction; |
85 | class MachineInstr; |
86 | class MachineJumpTableInfo; |
87 | class MachineLoop; |
88 | class MachineRegisterInfo; |
89 | class MCContext; |
90 | class MCExpr; |
91 | class Module; |
92 | class ProfileSummaryInfo; |
93 | class TargetLibraryInfo; |
94 | class TargetMachine; |
95 | class TargetRegisterClass; |
96 | class TargetRegisterInfo; |
97 | class TargetTransformInfo; |
98 | class Value; |
99 | class VPIntrinsic; |
100 | |
101 | namespace Sched { |
102 | |
103 | enum Preference : uint8_t { |
104 | None, // No preference |
105 | Source, // Follow source order. |
106 | RegPressure, // Scheduling for lowest register pressure. |
107 | Hybrid, // Scheduling for both latency and register pressure. |
108 | ILP, // Scheduling for ILP in low register pressure mode. |
109 | VLIW, // Scheduling for VLIW targets. |
110 | Fast, // Fast suboptimal list scheduling |
111 | Linearize, // Linearize DAG, no scheduling |
112 | Last = Linearize // Marker for the last Sched::Preference |
113 | }; |
114 | |
115 | } // end namespace Sched |
116 | |
117 | // MemOp models a memory operation, either memset or memcpy/memmove. |
118 | struct MemOp { |
119 | private: |
120 | // Shared |
121 | uint64_t Size; |
122 | bool DstAlignCanChange; // true if destination alignment can satisfy any |
123 | // constraint. |
124 | Align DstAlign; // Specified alignment of the memory operation. |
125 | |
126 | bool AllowOverlap; |
127 | // memset only |
128 | bool IsMemset; // If setthis memory operation is a memset. |
129 | bool ZeroMemset; // If set clears out memory with zeros. |
130 | // memcpy only |
131 | bool MemcpyStrSrc; // Indicates whether the memcpy source is an in-register |
132 | // constant so it does not need to be loaded. |
133 | Align SrcAlign; // Inferred alignment of the source or default value if the |
134 | // memory operation does not need to load the value. |
135 | public: |
136 | static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, |
137 | Align SrcAlign, bool IsVolatile, |
138 | bool MemcpyStrSrc = false) { |
139 | MemOp Op; |
140 | Op.Size = Size; |
141 | Op.DstAlignCanChange = DstAlignCanChange; |
142 | Op.DstAlign = DstAlign; |
143 | Op.AllowOverlap = !IsVolatile; |
144 | Op.IsMemset = false; |
145 | Op.ZeroMemset = false; |
146 | Op.MemcpyStrSrc = MemcpyStrSrc; |
147 | Op.SrcAlign = SrcAlign; |
148 | return Op; |
149 | } |
150 | |
151 | static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, |
152 | bool IsZeroMemset, bool IsVolatile) { |
153 | MemOp Op; |
154 | Op.Size = Size; |
155 | Op.DstAlignCanChange = DstAlignCanChange; |
156 | Op.DstAlign = DstAlign; |
157 | Op.AllowOverlap = !IsVolatile; |
158 | Op.IsMemset = true; |
159 | Op.ZeroMemset = IsZeroMemset; |
160 | Op.MemcpyStrSrc = false; |
161 | return Op; |
162 | } |
163 | |
164 | uint64_t size() const { return Size; } |
165 | Align getDstAlign() const { |
166 | assert(!DstAlignCanChange); |
167 | return DstAlign; |
168 | } |
169 | bool isFixedDstAlign() const { return !DstAlignCanChange; } |
170 | bool allowOverlap() const { return AllowOverlap; } |
171 | bool isMemset() const { return IsMemset; } |
172 | bool isMemcpy() const { return !IsMemset; } |
173 | bool isMemcpyWithFixedDstAlign() const { |
174 | return isMemcpy() && !DstAlignCanChange; |
175 | } |
176 | bool isZeroMemset() const { return isMemset() && ZeroMemset; } |
177 | bool isMemcpyStrSrc() const { |
178 | assert(isMemcpy() && "Must be a memcpy"); |
179 | return MemcpyStrSrc; |
180 | } |
181 | Align getSrcAlign() const { |
182 | assert(isMemcpy() && "Must be a memcpy"); |
183 | return SrcAlign; |
184 | } |
185 | bool isSrcAligned(Align AlignCheck) const { |
186 | return isMemset() || llvm::isAligned(Lhs: AlignCheck, SizeInBytes: SrcAlign.value()); |
187 | } |
188 | bool isDstAligned(Align AlignCheck) const { |
189 | return DstAlignCanChange || llvm::isAligned(Lhs: AlignCheck, SizeInBytes: DstAlign.value()); |
190 | } |
191 | bool isAligned(Align AlignCheck) const { |
192 | return isSrcAligned(AlignCheck) && isDstAligned(AlignCheck); |
193 | } |
194 | }; |
195 | |
196 | /// This base class for TargetLowering contains the SelectionDAG-independent |
197 | /// parts that can be used from the rest of CodeGen. |
198 | class LLVM_ABI TargetLoweringBase { |
199 | public: |
200 | /// This enum indicates whether operations are valid for a target, and if not, |
201 | /// what action should be used to make them valid. |
202 | enum LegalizeAction : uint8_t { |
203 | Legal, // The target natively supports this operation. |
204 | Promote, // This operation should be executed in a larger type. |
205 | Expand, // Try to expand this to other ops, otherwise use a libcall. |
206 | LibCall, // Don't try to expand this to other ops, always use a libcall. |
207 | Custom // Use the LowerOperation hook to implement custom lowering. |
208 | }; |
209 | |
210 | /// This enum indicates whether a types are legal for a target, and if not, |
211 | /// what action should be used to make them valid. |
212 | enum LegalizeTypeAction : uint8_t { |
213 | TypeLegal, // The target natively supports this type. |
214 | TypePromoteInteger, // Replace this integer with a larger one. |
215 | TypeExpandInteger, // Split this integer into two of half the size. |
216 | TypeSoftenFloat, // Convert this float to a same size integer type. |
217 | TypeExpandFloat, // Split this float into two of half the size. |
218 | TypeScalarizeVector, // Replace this one-element vector with its element. |
219 | TypeSplitVector, // Split this vector into two of half the size. |
220 | TypeWidenVector, // This vector should be widened into a larger vector. |
221 | TypePromoteFloat, // Replace this float with a larger one. |
222 | TypeSoftPromoteHalf, // Soften half to i16 and use float to do arithmetic. |
223 | TypeScalarizeScalableVector, // This action is explicitly left unimplemented. |
224 | // While it is theoretically possible to |
225 | // legalize operations on scalable types with a |
226 | // loop that handles the vscale * #lanes of the |
227 | // vector, this is non-trivial at SelectionDAG |
228 | // level and these types are better to be |
229 | // widened or promoted. |
230 | }; |
231 | |
232 | /// LegalizeKind holds the legalization kind that needs to happen to EVT |
233 | /// in order to type-legalize it. |
234 | using LegalizeKind = std::pair<LegalizeTypeAction, EVT>; |
235 | |
236 | /// Enum that describes how the target represents true/false values. |
237 | enum BooleanContent { |
238 | UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage. |
239 | ZeroOrOneBooleanContent, // All bits zero except for bit 0. |
240 | ZeroOrNegativeOneBooleanContent // All bits equal to bit 0. |
241 | }; |
242 | |
243 | /// Enum that describes what type of support for selects the target has. |
244 | enum SelectSupportKind { |
245 | ScalarValSelect, // The target supports scalar selects (ex: cmov). |
246 | ScalarCondVectorVal, // The target supports selects with a scalar condition |
247 | // and vector values (ex: cmov). |
248 | VectorMaskSelect // The target supports vector selects with a vector |
249 | // mask (ex: x86 blends). |
250 | }; |
251 | |
252 | /// Enum that specifies what an atomic load/AtomicRMWInst is expanded |
253 | /// to, if at all. Exists because different targets have different levels of |
254 | /// support for these atomic instructions, and also have different options |
255 | /// w.r.t. what they should expand to. |
256 | enum class AtomicExpansionKind { |
257 | None, // Don't expand the instruction. |
258 | CastToInteger, // Cast the atomic instruction to another type, e.g. from |
259 | // floating-point to integer type. |
260 | LLSC, // Expand the instruction into loadlinked/storeconditional; used |
261 | // by ARM/AArch64. |
262 | LLOnly, // Expand the (load) instruction into just a load-linked, which has |
263 | // greater atomic guarantees than a normal load. |
264 | CmpXChg, // Expand the instruction into cmpxchg; used by at least X86. |
265 | MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop. |
266 | BitTestIntrinsic, // Use a target-specific intrinsic for special bit |
267 | // operations; used by X86. |
268 | CmpArithIntrinsic,// Use a target-specific intrinsic for special compare |
269 | // operations; used by X86. |
270 | Expand, // Generic expansion in terms of other atomic operations. |
271 | |
272 | // Rewrite to a non-atomic form for use in a known non-preemptible |
273 | // environment. |
274 | NotAtomic |
275 | }; |
276 | |
277 | /// Enum that specifies when a multiplication should be expanded. |
278 | enum class MulExpansionKind { |
279 | Always, // Always expand the instruction. |
280 | OnlyLegalOrCustom, // Only expand when the resulting instructions are legal |
281 | // or custom. |
282 | }; |
283 | |
284 | /// Enum that specifies when a float negation is beneficial. |
285 | enum class NegatibleCost { |
286 | Cheaper = 0, // Negated expression is cheaper. |
287 | Neutral = 1, // Negated expression has the same cost. |
288 | Expensive = 2 // Negated expression is more expensive. |
289 | }; |
290 | |
291 | /// Enum of different potentially desirable ways to fold (and/or (setcc ...), |
292 | /// (setcc ...)). |
293 | enum AndOrSETCCFoldKind : uint8_t { |
294 | None = 0, // No fold is preferable. |
295 | AddAnd = 1, // Fold with `Add` op and `And` op is preferable. |
296 | NotAnd = 2, // Fold with `Not` op and `And` op is preferable. |
297 | ABS = 4, // Fold with `llvm.abs` op is preferable. |
298 | }; |
299 | |
300 | class ArgListEntry { |
301 | public: |
302 | Value *Val = nullptr; |
303 | SDValue Node = SDValue(); |
304 | Type *Ty = nullptr; |
305 | bool IsSExt : 1; |
306 | bool IsZExt : 1; |
307 | bool IsNoExt : 1; |
308 | bool IsInReg : 1; |
309 | bool IsSRet : 1; |
310 | bool IsNest : 1; |
311 | bool IsByVal : 1; |
312 | bool IsByRef : 1; |
313 | bool IsInAlloca : 1; |
314 | bool IsPreallocated : 1; |
315 | bool IsReturned : 1; |
316 | bool IsSwiftSelf : 1; |
317 | bool IsSwiftAsync : 1; |
318 | bool IsSwiftError : 1; |
319 | bool IsCFGuardTarget : 1; |
320 | MaybeAlign Alignment = std::nullopt; |
321 | Type *IndirectType = nullptr; |
322 | |
323 | ArgListEntry() |
324 | : IsSExt(false), IsZExt(false), IsNoExt(false), IsInReg(false), |
325 | IsSRet(false), IsNest(false), IsByVal(false), IsByRef(false), |
326 | IsInAlloca(false), IsPreallocated(false), IsReturned(false), |
327 | IsSwiftSelf(false), IsSwiftAsync(false), IsSwiftError(false), |
328 | IsCFGuardTarget(false) {} |
329 | |
330 | LLVM_ABI void setAttributes(const CallBase *Call, unsigned ArgIdx); |
331 | }; |
332 | using ArgListTy = std::vector<ArgListEntry>; |
333 | |
334 | static ISD::NodeType getExtendForContent(BooleanContent Content) { |
335 | switch (Content) { |
336 | case UndefinedBooleanContent: |
337 | // Extend by adding rubbish bits. |
338 | return ISD::ANY_EXTEND; |
339 | case ZeroOrOneBooleanContent: |
340 | // Extend by adding zero bits. |
341 | return ISD::ZERO_EXTEND; |
342 | case ZeroOrNegativeOneBooleanContent: |
343 | // Extend by copying the sign bit. |
344 | return ISD::SIGN_EXTEND; |
345 | } |
346 | llvm_unreachable("Invalid content kind"); |
347 | } |
348 | |
349 | explicit TargetLoweringBase(const TargetMachine &TM); |
350 | TargetLoweringBase(const TargetLoweringBase &) = delete; |
351 | TargetLoweringBase &operator=(const TargetLoweringBase &) = delete; |
352 | virtual ~TargetLoweringBase(); |
353 | |
354 | /// Return true if the target support strict float operation |
355 | bool isStrictFPEnabled() const { |
356 | return IsStrictFPEnabled; |
357 | } |
358 | |
359 | protected: |
360 | /// Initialize all of the actions to default values. |
361 | void initActions(); |
362 | |
363 | public: |
364 | const TargetMachine &getTargetMachine() const { return TM; } |
365 | |
366 | virtual bool useSoftFloat() const { return false; } |
367 | |
368 | /// Return the pointer type for the given address space, defaults to |
369 | /// the pointer type from the data layout. |
370 | /// FIXME: The default needs to be removed once all the code is updated. |
371 | virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const { |
372 | return MVT::getIntegerVT(BitWidth: DL.getPointerSizeInBits(AS)); |
373 | } |
374 | |
375 | /// Return the in-memory pointer type for the given address space, defaults to |
376 | /// the pointer type from the data layout. |
377 | /// FIXME: The default needs to be removed once all the code is updated. |
378 | virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const { |
379 | return MVT::getIntegerVT(BitWidth: DL.getPointerSizeInBits(AS)); |
380 | } |
381 | |
382 | /// Return the type for frame index, which is determined by |
383 | /// the alloca address space specified through the data layout. |
384 | MVT getFrameIndexTy(const DataLayout &DL) const { |
385 | return getPointerTy(DL, AS: DL.getAllocaAddrSpace()); |
386 | } |
387 | |
388 | /// Return the type for code pointers, which is determined by the program |
389 | /// address space specified through the data layout. |
390 | MVT getProgramPointerTy(const DataLayout &DL) const { |
391 | return getPointerTy(DL, AS: DL.getProgramAddressSpace()); |
392 | } |
393 | |
394 | /// Return the type for operands of fence. |
395 | /// TODO: Let fence operands be of i32 type and remove this. |
396 | virtual MVT getFenceOperandTy(const DataLayout &DL) const { |
397 | return getPointerTy(DL); |
398 | } |
399 | |
400 | /// Return the type to use for a scalar shift opcode, given the shifted amount |
401 | /// type. Targets should return a legal type if the input type is legal. |
402 | /// Targets can return a type that is too small if the input type is illegal. |
403 | virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const; |
404 | |
405 | /// Returns the type for the shift amount of a shift opcode. For vectors, |
406 | /// returns the input type. For scalars, calls getScalarShiftAmountTy. |
407 | /// If getScalarShiftAmountTy type cannot represent all possible shift |
408 | /// amounts, returns MVT::i32. |
409 | EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const; |
410 | |
411 | /// Return the preferred type to use for a shift opcode, given the shifted |
412 | /// amount type is \p ShiftValueTy. |
413 | LLVM_READONLY |
414 | virtual LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const { |
415 | return ShiftValueTy; |
416 | } |
417 | |
418 | /// Returns the type to be used for the index operand vector operations. By |
419 | /// default we assume it will have the same size as an address space 0 |
420 | /// pointer. |
421 | virtual unsigned getVectorIdxWidth(const DataLayout &DL) const { |
422 | return DL.getPointerSizeInBits(AS: 0); |
423 | } |
424 | |
425 | /// Returns the type to be used for the index operand of: |
426 | /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, |
427 | /// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR |
428 | MVT getVectorIdxTy(const DataLayout &DL) const { |
429 | return MVT::getIntegerVT(BitWidth: getVectorIdxWidth(DL)); |
430 | } |
431 | |
432 | /// Returns the type to be used for the index operand of: |
433 | /// G_INSERT_VECTOR_ELT, G_EXTRACT_VECTOR_ELT, |
434 | /// G_INSERT_SUBVECTOR, and G_EXTRACT_SUBVECTOR |
435 | LLT getVectorIdxLLT(const DataLayout &DL) const { |
436 | return LLT::scalar(SizeInBits: getVectorIdxWidth(DL)); |
437 | } |
438 | |
439 | /// Returns the type to be used for the EVL/AVL operand of VP nodes: |
440 | /// ISD::VP_ADD, ISD::VP_SUB, etc. It must be a legal scalar integer type, |
441 | /// and must be at least as large as i32. The EVL is implicitly zero-extended |
442 | /// to any larger type. |
443 | virtual MVT getVPExplicitVectorLengthTy() const { return MVT::i32; } |
444 | |
445 | /// This callback is used to inspect load/store instructions and add |
446 | /// target-specific MachineMemOperand flags to them. The default |
447 | /// implementation does nothing. |
448 | virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const { |
449 | return MachineMemOperand::MONone; |
450 | } |
451 | |
452 | /// This callback is used to inspect load/store SDNode. |
453 | /// The default implementation does nothing. |
454 | virtual MachineMemOperand::Flags |
455 | getTargetMMOFlags(const MemSDNode &Node) const { |
456 | return MachineMemOperand::MONone; |
457 | } |
458 | |
459 | MachineMemOperand::Flags |
460 | getLoadMemOperandFlags(const LoadInst &LI, const DataLayout &DL, |
461 | AssumptionCache *AC = nullptr, |
462 | const TargetLibraryInfo *LibInfo = nullptr) const; |
463 | MachineMemOperand::Flags getStoreMemOperandFlags(const StoreInst &SI, |
464 | const DataLayout &DL) const; |
465 | MachineMemOperand::Flags getAtomicMemOperandFlags(const Instruction &AI, |
466 | const DataLayout &DL) const; |
467 | |
468 | virtual bool isSelectSupported(SelectSupportKind /*kind*/) const { |
469 | return true; |
470 | } |
471 | |
472 | /// Return true if the @llvm.experimental.vector.partial.reduce.* intrinsic |
473 | /// should be expanded using generic code in SelectionDAGBuilder. |
474 | virtual bool |
475 | shouldExpandPartialReductionIntrinsic(const IntrinsicInst *I) const { |
476 | return true; |
477 | } |
478 | |
479 | /// Return true if the @llvm.get.active.lane.mask intrinsic should be expanded |
480 | /// using generic code in SelectionDAGBuilder. |
481 | virtual bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const { |
482 | return true; |
483 | } |
484 | |
485 | virtual bool shouldExpandGetVectorLength(EVT CountVT, unsigned VF, |
486 | bool IsScalable) const { |
487 | return true; |
488 | } |
489 | |
490 | /// Return true if the @llvm.experimental.cttz.elts intrinsic should be |
491 | /// expanded using generic code in SelectionDAGBuilder. |
492 | virtual bool shouldExpandCttzElements(EVT VT) const { return true; } |
493 | |
494 | /// Return the minimum number of bits required to hold the maximum possible |
495 | /// number of trailing zero vector elements. |
496 | unsigned getBitWidthForCttzElements(Type *RetTy, ElementCount EC, |
497 | bool ZeroIsPoison, |
498 | const ConstantRange *VScaleRange) const; |
499 | |
500 | /// Return true if the @llvm.experimental.vector.match intrinsic should be |
501 | /// expanded for vector type `VT' and search size `SearchSize' using generic |
502 | /// code in SelectionDAGBuilder. |
503 | virtual bool shouldExpandVectorMatch(EVT VT, unsigned SearchSize) const { |
504 | return true; |
505 | } |
506 | |
507 | // Return true if op(vecreduce(x), vecreduce(y)) should be reassociated to |
508 | // vecreduce(op(x, y)) for the reduction opcode RedOpc. |
509 | virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const { |
510 | return true; |
511 | } |
512 | |
513 | /// Return true if it is profitable to convert a select of FP constants into |
514 | /// a constant pool load whose address depends on the select condition. The |
515 | /// parameter may be used to differentiate a select with FP compare from |
516 | /// integer compare. |
517 | virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const { |
518 | return true; |
519 | } |
520 | |
521 | /// Return true if multiple condition registers are available. |
522 | bool hasMultipleConditionRegisters() const { |
523 | return HasMultipleConditionRegisters; |
524 | } |
525 | |
526 | /// Return true if the target has BitExtract instructions. |
527 | bool hasExtractBitsInsn() const { return HasExtractBitsInsn; } |
528 | |
529 | /// Return the preferred vector type legalization action. |
530 | virtual TargetLoweringBase::LegalizeTypeAction |
531 | getPreferredVectorAction(MVT VT) const { |
532 | // The default action for one element vectors is to scalarize |
533 | if (VT.getVectorElementCount().isScalar()) |
534 | return TypeScalarizeVector; |
535 | // The default action for an odd-width vector is to widen. |
536 | if (!VT.isPow2VectorType()) |
537 | return TypeWidenVector; |
538 | // The default action for other vectors is to promote |
539 | return TypePromoteInteger; |
540 | } |
541 | |
542 | // Return true if the half type should be promoted using soft promotion rules |
543 | // where each operation is promoted to f32 individually, then converted to |
544 | // fp16. The default behavior is to promote chains of operations, keeping |
545 | // intermediate results in f32 precision and range. |
546 | virtual bool softPromoteHalfType() const { return false; } |
547 | |
548 | // Return true if, for soft-promoted half, the half type should be passed |
549 | // passed to and returned from functions as f32. The default behavior is to |
550 | // pass as i16. If soft-promoted half is not used, this function is ignored |
551 | // and values are always passed and returned as f32. |
552 | virtual bool useFPRegsForHalfType() const { return false; } |
553 | |
554 | // There are two general methods for expanding a BUILD_VECTOR node: |
555 | // 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle |
556 | // them together. |
557 | // 2. Build the vector on the stack and then load it. |
558 | // If this function returns true, then method (1) will be used, subject to |
559 | // the constraint that all of the necessary shuffles are legal (as determined |
560 | // by isShuffleMaskLegal). If this function returns false, then method (2) is |
561 | // always used. The vector type, and the number of defined values, are |
562 | // provided. |
563 | virtual bool |
564 | shouldExpandBuildVectorWithShuffles(EVT /* VT */, |
565 | unsigned DefinedValues) const { |
566 | return DefinedValues < 3; |
567 | } |
568 | |
569 | /// Return true if integer divide is usually cheaper than a sequence of |
570 | /// several shifts, adds, and multiplies for this target. |
571 | /// The definition of "cheaper" may depend on whether we're optimizing |
572 | /// for speed or for size. |
573 | virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const { return false; } |
574 | |
575 | /// Return true if the target can handle a standalone remainder operation. |
576 | virtual bool hasStandaloneRem(EVT VT) const { |
577 | return true; |
578 | } |
579 | |
580 | /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X). |
581 | virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const { |
582 | // Default behavior is to replace SQRT(X) with X*RSQRT(X). |
583 | return false; |
584 | } |
585 | |
586 | /// Reciprocal estimate status values used by the functions below. |
587 | enum ReciprocalEstimate : int { |
588 | Unspecified = -1, |
589 | Disabled = 0, |
590 | Enabled = 1 |
591 | }; |
592 | |
593 | /// Return a ReciprocalEstimate enum value for a square root of the given type |
594 | /// based on the function's attributes. If the operation is not overridden by |
595 | /// the function's attributes, "Unspecified" is returned and target defaults |
596 | /// are expected to be used for instruction selection. |
597 | int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const; |
598 | |
599 | /// Return a ReciprocalEstimate enum value for a division of the given type |
600 | /// based on the function's attributes. If the operation is not overridden by |
601 | /// the function's attributes, "Unspecified" is returned and target defaults |
602 | /// are expected to be used for instruction selection. |
603 | int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const; |
604 | |
605 | /// Return the refinement step count for a square root of the given type based |
606 | /// on the function's attributes. If the operation is not overridden by |
607 | /// the function's attributes, "Unspecified" is returned and target defaults |
608 | /// are expected to be used for instruction selection. |
609 | int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const; |
610 | |
611 | /// Return the refinement step count for a division of the given type based |
612 | /// on the function's attributes. If the operation is not overridden by |
613 | /// the function's attributes, "Unspecified" is returned and target defaults |
614 | /// are expected to be used for instruction selection. |
615 | int getDivRefinementSteps(EVT VT, MachineFunction &MF) const; |
616 | |
617 | /// Returns true if target has indicated at least one type should be bypassed. |
618 | bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); } |
619 | |
620 | /// Returns map of slow types for division or remainder with corresponding |
621 | /// fast types |
622 | const DenseMap<unsigned int, unsigned int> &getBypassSlowDivWidths() const { |
623 | return BypassSlowDivWidths; |
624 | } |
625 | |
626 | /// Return true only if vscale must be a power of two. |
627 | virtual bool isVScaleKnownToBeAPowerOfTwo() const { return false; } |
628 | |
629 | /// Return true if Flow Control is an expensive operation that should be |
630 | /// avoided. |
631 | bool isJumpExpensive() const { return JumpIsExpensive; } |
632 | |
633 | // Costs parameters used by |
634 | // SelectionDAGBuilder::shouldKeepJumpConditionsTogether. |
635 | // shouldKeepJumpConditionsTogether will use these parameter value to |
636 | // determine if two conditions in the form `br (and/or cond1, cond2)` should |
637 | // be split into two branches or left as one. |
638 | // |
639 | // BaseCost is the cost threshold (in latency). If the estimated latency of |
640 | // computing both `cond1` and `cond2` is below the cost of just computing |
641 | // `cond1` + BaseCost, the two conditions will be kept together. Otherwise |
642 | // they will be split. |
643 | // |
644 | // LikelyBias increases BaseCost if branch probability info indicates that it |
645 | // is likely that both `cond1` and `cond2` will be computed. |
646 | // |
647 | // UnlikelyBias decreases BaseCost if branch probability info indicates that |
648 | // it is likely that both `cond1` and `cond2` will be computed. |
649 | // |
650 | // Set any field to -1 to make it ignored (setting BaseCost to -1 results in |
651 | // `shouldKeepJumpConditionsTogether` always returning false). |
652 | struct CondMergingParams { |
653 | int BaseCost; |
654 | int LikelyBias; |
655 | int UnlikelyBias; |
656 | }; |
657 | // Return params for deciding if we should keep two branch conditions merged |
658 | // or split them into two separate branches. |
659 | // Arg0: The binary op joining the two conditions (and/or). |
660 | // Arg1: The first condition (cond1) |
661 | // Arg2: The second condition (cond2) |
662 | virtual CondMergingParams |
663 | getJumpConditionMergingParams(Instruction::BinaryOps, const Value *, |
664 | const Value *) const { |
665 | // -1 will always result in splitting. |
666 | return {.BaseCost: -1, .LikelyBias: -1, .UnlikelyBias: -1}; |
667 | } |
668 | |
669 | /// Return true if selects are only cheaper than branches if the branch is |
670 | /// unlikely to be predicted right. |
671 | bool isPredictableSelectExpensive() const { |
672 | return PredictableSelectIsExpensive; |
673 | } |
674 | |
675 | virtual bool fallBackToDAGISel(const Instruction &Inst) const { |
676 | return false; |
677 | } |
678 | |
679 | /// Return true if the following transform is beneficial: |
680 | /// fold (conv (load x)) -> (load (conv*)x) |
681 | /// On architectures that don't natively support some vector loads |
682 | /// efficiently, casting the load to a smaller vector of larger types and |
683 | /// loading is more efficient, however, this can be undone by optimizations in |
684 | /// dag combiner. |
685 | virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, |
686 | const SelectionDAG &DAG, |
687 | const MachineMemOperand &MMO) const; |
688 | |
689 | /// Return true if the following transform is beneficial: |
690 | /// (store (y (conv x)), y*)) -> (store x, (x*)) |
691 | virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, |
692 | const SelectionDAG &DAG, |
693 | const MachineMemOperand &MMO) const { |
694 | // Default to the same logic as loads. |
695 | return isLoadBitCastBeneficial(LoadVT: StoreVT, BitcastVT, DAG, MMO); |
696 | } |
697 | |
698 | /// Return true if it is expected to be cheaper to do a store of vector |
699 | /// constant with the given size and type for the address space than to |
700 | /// store the individual scalar element constants. |
701 | virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, |
702 | unsigned NumElem, |
703 | unsigned AddrSpace) const { |
704 | return IsZero; |
705 | } |
706 | |
707 | /// Allow store merging for the specified type after legalization in addition |
708 | /// to before legalization. This may transform stores that do not exist |
709 | /// earlier (for example, stores created from intrinsics). |
710 | virtual bool mergeStoresAfterLegalization(EVT MemVT) const { |
711 | return true; |
712 | } |
713 | |
714 | /// Returns if it's reasonable to merge stores to MemVT size. |
715 | virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, |
716 | const MachineFunction &MF) const { |
717 | return true; |
718 | } |
719 | |
720 | /// Return true if it is cheap to speculate a call to intrinsic cttz. |
721 | virtual bool isCheapToSpeculateCttz(Type *Ty) const { |
722 | return false; |
723 | } |
724 | |
725 | /// Return true if it is cheap to speculate a call to intrinsic ctlz. |
726 | virtual bool isCheapToSpeculateCtlz(Type *Ty) const { |
727 | return false; |
728 | } |
729 | |
730 | /// Return true if ctlz instruction is fast. |
731 | virtual bool isCtlzFast() const { |
732 | return false; |
733 | } |
734 | |
735 | /// Return true if ctpop instruction is fast. |
736 | virtual bool isCtpopFast(EVT VT) const { |
737 | return isOperationLegal(Op: ISD::CTPOP, VT); |
738 | } |
739 | |
740 | /// Return the maximum number of "x & (x - 1)" operations that can be done |
741 | /// instead of deferring to a custom CTPOP. |
742 | virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const { |
743 | return 1; |
744 | } |
745 | |
746 | /// Return true if instruction generated for equality comparison is folded |
747 | /// with instruction generated for signed comparison. |
748 | virtual bool isEqualityCmpFoldedWithSignedCmp() const { return true; } |
749 | |
750 | /// Return true if the heuristic to prefer icmp eq zero should be used in code |
751 | /// gen prepare. |
752 | virtual bool preferZeroCompareBranch() const { return false; } |
753 | |
754 | /// Return true if it is cheaper to split the store of a merged int val |
755 | /// from a pair of smaller values into multiple stores. |
756 | virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const { |
757 | return false; |
758 | } |
759 | |
760 | /// Return if the target supports combining a |
761 | /// chain like: |
762 | /// \code |
763 | /// %andResult = and %val1, #mask |
764 | /// %icmpResult = icmp %andResult, 0 |
765 | /// \endcode |
766 | /// into a single machine instruction of a form like: |
767 | /// \code |
768 | /// cc = test %register, #mask |
769 | /// \endcode |
770 | virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const { |
771 | return false; |
772 | } |
773 | |
774 | /// Return true if it is valid to merge the TargetMMOFlags in two SDNodes. |
775 | virtual bool |
776 | areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, |
777 | const MemSDNode &NodeY) const { |
778 | return true; |
779 | } |
780 | |
781 | /// Use bitwise logic to make pairs of compares more efficient. For example: |
782 | /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0 |
783 | /// This should be true when it takes more than one instruction to lower |
784 | /// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on |
785 | /// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win. |
786 | virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const { |
787 | return false; |
788 | } |
789 | |
790 | /// Return the preferred operand type if the target has a quick way to compare |
791 | /// integer values of the given size. Assume that any legal integer type can |
792 | /// be compared efficiently. Targets may override this to allow illegal wide |
793 | /// types to return a vector type if there is support to compare that type. |
794 | virtual MVT hasFastEqualityCompare(unsigned NumBits) const { |
795 | MVT VT = MVT::getIntegerVT(BitWidth: NumBits); |
796 | return isTypeLegal(VT) ? VT : MVT::INVALID_SIMPLE_VALUE_TYPE; |
797 | } |
798 | |
799 | /// Return true if the target should transform: |
800 | /// (X & Y) == Y ---> (~X & Y) == 0 |
801 | /// (X & Y) != Y ---> (~X & Y) != 0 |
802 | /// |
803 | /// This may be profitable if the target has a bitwise and-not operation that |
804 | /// sets comparison flags. A target may want to limit the transformation based |
805 | /// on the type of Y or if Y is a constant. |
806 | /// |
807 | /// Note that the transform will not occur if Y is known to be a power-of-2 |
808 | /// because a mask and compare of a single bit can be handled by inverting the |
809 | /// predicate, for example: |
810 | /// (X & 8) == 8 ---> (X & 8) != 0 |
811 | virtual bool hasAndNotCompare(SDValue Y) const { |
812 | return false; |
813 | } |
814 | |
815 | /// Return true if the target has a bitwise and-not operation: |
816 | /// X = ~A & B |
817 | /// This can be used to simplify select or other instructions. |
818 | virtual bool hasAndNot(SDValue X) const { |
819 | // If the target has the more complex version of this operation, assume that |
820 | // it has this operation too. |
821 | return hasAndNotCompare(Y: X); |
822 | } |
823 | |
824 | /// Return true if the target has a bit-test instruction: |
825 | /// (X & (1 << Y)) ==/!= 0 |
826 | /// This knowledge can be used to prevent breaking the pattern, |
827 | /// or creating it if it could be recognized. |
828 | virtual bool hasBitTest(SDValue X, SDValue Y) const { return false; } |
829 | |
830 | /// There are two ways to clear extreme bits (either low or high): |
831 | /// Mask: x & (-1 << y) (the instcombine canonical form) |
832 | /// Shifts: x >> y << y |
833 | /// Return true if the variant with 2 variable shifts is preferred. |
834 | /// Return false if there is no preference. |
835 | virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const { |
836 | // By default, let's assume that no one prefers shifts. |
837 | return false; |
838 | } |
839 | |
840 | /// Return true if it is profitable to fold a pair of shifts into a mask. |
841 | /// This is usually true on most targets. But some targets, like Thumb1, |
842 | /// have immediate shift instructions, but no immediate "and" instruction; |
843 | /// this makes the fold unprofitable. |
844 | virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, |
845 | CombineLevel Level) const { |
846 | return true; |
847 | } |
848 | |
849 | /// Should we tranform the IR-optimal check for whether given truncation |
850 | /// down into KeptBits would be truncating or not: |
851 | /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits) |
852 | /// Into it's more traditional form: |
853 | /// ((%x << C) a>> C) dstcond %x |
854 | /// Return true if we should transform. |
855 | /// Return false if there is no preference. |
856 | virtual bool shouldTransformSignedTruncationCheck(EVT XVT, |
857 | unsigned KeptBits) const { |
858 | // By default, let's assume that no one prefers shifts. |
859 | return false; |
860 | } |
861 | |
862 | /// Given the pattern |
863 | /// (X & (C l>>/<< Y)) ==/!= 0 |
864 | /// return true if it should be transformed into: |
865 | /// ((X <</l>> Y) & C) ==/!= 0 |
866 | /// WARNING: if 'X' is a constant, the fold may deadlock! |
867 | /// FIXME: we could avoid passing XC, but we can't use isConstOrConstSplat() |
868 | /// here because it can end up being not linked in. |
869 | virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( |
870 | SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, |
871 | unsigned OldShiftOpcode, unsigned NewShiftOpcode, |
872 | SelectionDAG &DAG) const { |
873 | if (hasBitTest(X, Y)) { |
874 | // One interesting pattern that we'd want to form is 'bit test': |
875 | // ((1 << Y) & C) ==/!= 0 |
876 | // But we also need to be careful not to try to reverse that fold. |
877 | |
878 | // Is this '1 << Y' ? |
879 | if (OldShiftOpcode == ISD::SHL && CC->isOne()) |
880 | return false; // Keep the 'bit test' pattern. |
881 | |
882 | // Will it be '1 << Y' after the transform ? |
883 | if (XC && NewShiftOpcode == ISD::SHL && XC->isOne()) |
884 | return true; // Do form the 'bit test' pattern. |
885 | } |
886 | |
887 | // If 'X' is a constant, and we transform, then we will immediately |
888 | // try to undo the fold, thus causing endless combine loop. |
889 | // So by default, let's assume everyone prefers the fold |
890 | // iff 'X' is not a constant. |
891 | return !XC; |
892 | } |
893 | |
894 | // Return true if its desirable to perform the following transform: |
895 | // (fmul C, (uitofp Pow2)) |
896 | // -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa)) |
897 | // (fdiv C, (uitofp Pow2)) |
898 | // -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa)) |
899 | // |
900 | // This is only queried after we have verified the transform will be bitwise |
901 | // equals. |
902 | // |
903 | // SDNode *N : The FDiv/FMul node we want to transform. |
904 | // SDValue FPConst: The Float constant operand in `N`. |
905 | // SDValue IntPow2: The Integer power of 2 operand in `N`. |
906 | virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, |
907 | SDValue IntPow2) const { |
908 | // Default to avoiding fdiv which is often very expensive. |
909 | return N->getOpcode() == ISD::FDIV; |
910 | } |
911 | |
912 | // Given: |
913 | // (icmp eq/ne (and X, C0), (shift X, C1)) |
914 | // or |
915 | // (icmp eq/ne X, (rotate X, CPow2)) |
916 | |
917 | // If C0 is a mask or shifted mask and the shift amt (C1) isolates the |
918 | // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`) |
919 | // Do we prefer the shift to be shift-right, shift-left, or rotate. |
920 | // Note: Its only valid to convert the rotate version to the shift version iff |
921 | // the shift-amt (`C1`) is a power of 2 (including 0). |
922 | // If ShiftOpc (current Opcode) is returned, do nothing. |
923 | virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand( |
924 | EVT VT, unsigned ShiftOpc, bool MayTransformRotate, |
925 | const APInt &ShiftOrRotateAmt, |
926 | const std::optional<APInt> &AndMask) const { |
927 | return ShiftOpc; |
928 | } |
929 | |
930 | /// These two forms are equivalent: |
931 | /// sub %y, (xor %x, -1) |
932 | /// add (add %x, 1), %y |
933 | /// The variant with two add's is IR-canonical. |
934 | /// Some targets may prefer one to the other. |
935 | virtual bool preferIncOfAddToSubOfNot(EVT VT) const { |
936 | // By default, let's assume that everyone prefers the form with two add's. |
937 | return true; |
938 | } |
939 | |
940 | // By default prefer folding (abs (sub nsw x, y)) -> abds(x, y). Some targets |
941 | // may want to avoid this to prevent loss of sub_nsw pattern. |
942 | virtual bool preferABDSToABSWithNSW(EVT VT) const { |
943 | return true; |
944 | } |
945 | |
946 | // Return true if the target wants to transform Op(Splat(X)) -> Splat(Op(X)) |
947 | virtual bool preferScalarizeSplat(SDNode *N) const { return true; } |
948 | |
949 | // Return true if the target wants to transform: |
950 | // (TruncVT truncate(sext_in_reg(VT X, ExtVT)) |
951 | // -> (TruncVT sext_in_reg(truncate(VT X), ExtVT)) |
952 | // Some targets might prefer pre-sextinreg to improve truncation/saturation. |
953 | virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const { |
954 | return true; |
955 | } |
956 | |
957 | /// Return true if the target wants to use the optimization that |
958 | /// turns ext(promotableInst1(...(promotableInstN(load)))) into |
959 | /// promotedInst1(...(promotedInstN(ext(load)))). |
960 | bool enableExtLdPromotion() const { return EnableExtLdPromotion; } |
961 | |
962 | /// Return true if the target can combine store(extractelement VectorTy, |
963 | /// Idx). |
964 | /// \p Cost[out] gives the cost of that transformation when this is true. |
965 | virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, |
966 | unsigned &Cost) const { |
967 | return false; |
968 | } |
969 | |
970 | /// Return true if the target shall perform extract vector element and store |
971 | /// given that the vector is known to be splat of constant. |
972 | /// \p Index[out] gives the index of the vector element to be extracted when |
973 | /// this is true. |
974 | virtual bool shallExtractConstSplatVectorElementToStore( |
975 | Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const { |
976 | return false; |
977 | } |
978 | |
979 | /// Return true if inserting a scalar into a variable element of an undef |
980 | /// vector is more efficiently handled by splatting the scalar instead. |
981 | virtual bool shouldSplatInsEltVarIndex(EVT) const { |
982 | return false; |
983 | } |
984 | |
985 | /// Return true if target always benefits from combining into FMA for a |
986 | /// given value type. This must typically return false on targets where FMA |
987 | /// takes more cycles to execute than FADD. |
988 | virtual bool enableAggressiveFMAFusion(EVT VT) const { return false; } |
989 | |
990 | /// Return true if target always benefits from combining into FMA for a |
991 | /// given value type. This must typically return false on targets where FMA |
992 | /// takes more cycles to execute than FADD. |
993 | virtual bool enableAggressiveFMAFusion(LLT Ty) const { return false; } |
994 | |
995 | /// Return the ValueType of the result of SETCC operations. |
996 | virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, |
997 | EVT VT) const; |
998 | |
999 | /// Return the ValueType for comparison libcalls. Comparison libcalls include |
1000 | /// floating point comparison calls, and Ordered/Unordered check calls on |
1001 | /// floating point numbers. |
1002 | virtual |
1003 | MVT::SimpleValueType getCmpLibcallReturnType() const; |
1004 | |
1005 | /// For targets without i1 registers, this gives the nature of the high-bits |
1006 | /// of boolean values held in types wider than i1. |
1007 | /// |
1008 | /// "Boolean values" are special true/false values produced by nodes like |
1009 | /// SETCC and consumed (as the condition) by nodes like SELECT and BRCOND. |
1010 | /// Not to be confused with general values promoted from i1. Some cpus |
1011 | /// distinguish between vectors of boolean and scalars; the isVec parameter |
1012 | /// selects between the two kinds. For example on X86 a scalar boolean should |
1013 | /// be zero extended from i1, while the elements of a vector of booleans |
1014 | /// should be sign extended from i1. |
1015 | /// |
1016 | /// Some cpus also treat floating point types the same way as they treat |
1017 | /// vectors instead of the way they treat scalars. |
1018 | BooleanContent getBooleanContents(bool isVec, bool isFloat) const { |
1019 | if (isVec) |
1020 | return BooleanVectorContents; |
1021 | return isFloat ? BooleanFloatContents : BooleanContents; |
1022 | } |
1023 | |
1024 | BooleanContent getBooleanContents(EVT Type) const { |
1025 | return getBooleanContents(isVec: Type.isVector(), isFloat: Type.isFloatingPoint()); |
1026 | } |
1027 | |
1028 | /// Promote the given target boolean to a target boolean of the given type. |
1029 | /// A target boolean is an integer value, not necessarily of type i1, the bits |
1030 | /// of which conform to getBooleanContents. |
1031 | /// |
1032 | /// ValVT is the type of values that produced the boolean. |
1033 | SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, |
1034 | EVT ValVT) const { |
1035 | SDLoc dl(Bool); |
1036 | EVT BoolVT = |
1037 | getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: ValVT); |
1038 | ISD::NodeType ExtendCode = getExtendForContent(Content: getBooleanContents(Type: ValVT)); |
1039 | return DAG.getNode(Opcode: ExtendCode, DL: dl, VT: BoolVT, Operand: Bool); |
1040 | } |
1041 | |
1042 | /// Return target scheduling preference. |
1043 | Sched::Preference getSchedulingPreference() const { |
1044 | return SchedPreferenceInfo; |
1045 | } |
1046 | |
1047 | /// Some scheduler, e.g. hybrid, can switch to different scheduling heuristics |
1048 | /// for different nodes. This function returns the preference (or none) for |
1049 | /// the given node. |
1050 | virtual Sched::Preference getSchedulingPreference(SDNode *) const { |
1051 | return Sched::None; |
1052 | } |
1053 | |
1054 | /// Return the register class that should be used for the specified value |
1055 | /// type. |
1056 | virtual const TargetRegisterClass *getRegClassFor(MVT VT, bool isDivergent = false) const { |
1057 | (void)isDivergent; |
1058 | const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; |
1059 | assert(RC && "This value type is not natively supported!"); |
1060 | return RC; |
1061 | } |
1062 | |
1063 | /// Allows target to decide about the register class of the |
1064 | /// specific value that is live outside the defining block. |
1065 | /// Returns true if the value needs uniform register class. |
1066 | virtual bool requiresUniformRegister(MachineFunction &MF, |
1067 | const Value *) const { |
1068 | return false; |
1069 | } |
1070 | |
1071 | /// Return the 'representative' register class for the specified value |
1072 | /// type. |
1073 | /// |
1074 | /// The 'representative' register class is the largest legal super-reg |
1075 | /// register class for the register class of the value type. For example, on |
1076 | /// i386 the rep register class for i8, i16, and i32 are GR32; while the rep |
1077 | /// register class is GR64 on x86_64. |
1078 | virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const { |
1079 | const TargetRegisterClass *RC = RepRegClassForVT[VT.SimpleTy]; |
1080 | return RC; |
1081 | } |
1082 | |
1083 | /// Return the cost of the 'representative' register class for the specified |
1084 | /// value type. |
1085 | virtual uint8_t getRepRegClassCostFor(MVT VT) const { |
1086 | return RepRegClassCostForVT[VT.SimpleTy]; |
1087 | } |
1088 | |
1089 | /// Return the preferred strategy to legalize tihs SHIFT instruction, with |
1090 | /// \p ExpansionFactor being the recursion depth - how many expansion needed. |
1091 | enum class ShiftLegalizationStrategy { |
1092 | ExpandToParts, |
1093 | ExpandThroughStack, |
1094 | LowerToLibcall |
1095 | }; |
1096 | virtual ShiftLegalizationStrategy |
1097 | preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, |
1098 | unsigned ExpansionFactor) const { |
1099 | if (ExpansionFactor == 1) |
1100 | return ShiftLegalizationStrategy::ExpandToParts; |
1101 | return ShiftLegalizationStrategy::ExpandThroughStack; |
1102 | } |
1103 | |
1104 | /// Return true if the target has native support for the specified value type. |
1105 | /// This means that it has a register that directly holds it without |
1106 | /// promotions or expansions. |
1107 | bool isTypeLegal(EVT VT) const { |
1108 | assert(!VT.isSimple() || |
1109 | (unsigned)VT.getSimpleVT().SimpleTy < std::size(RegClassForVT)); |
1110 | return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr; |
1111 | } |
1112 | |
1113 | class ValueTypeActionImpl { |
1114 | /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum |
1115 | /// that indicates how instruction selection should deal with the type. |
1116 | LegalizeTypeAction ValueTypeActions[MVT::VALUETYPE_SIZE]; |
1117 | |
1118 | public: |
1119 | ValueTypeActionImpl() { |
1120 | std::fill(first: std::begin(arr&: ValueTypeActions), last: std::end(arr&: ValueTypeActions), |
1121 | value: TypeLegal); |
1122 | } |
1123 | |
1124 | LegalizeTypeAction getTypeAction(MVT VT) const { |
1125 | return ValueTypeActions[VT.SimpleTy]; |
1126 | } |
1127 | |
1128 | void setTypeAction(MVT VT, LegalizeTypeAction Action) { |
1129 | ValueTypeActions[VT.SimpleTy] = Action; |
1130 | } |
1131 | }; |
1132 | |
1133 | const ValueTypeActionImpl &getValueTypeActions() const { |
1134 | return ValueTypeActions; |
1135 | } |
1136 | |
1137 | /// Return pair that represents the legalization kind (first) that needs to |
1138 | /// happen to EVT (second) in order to type-legalize it. |
1139 | /// |
1140 | /// First: how we should legalize values of this type, either it is already |
1141 | /// legal (return 'Legal') or we need to promote it to a larger type (return |
1142 | /// 'Promote'), or we need to expand it into multiple registers of smaller |
1143 | /// integer type (return 'Expand'). 'Custom' is not an option. |
1144 | /// |
1145 | /// Second: for types supported by the target, this is an identity function. |
1146 | /// For types that must be promoted to larger types, this returns the larger |
1147 | /// type to promote to. For integer types that are larger than the largest |
1148 | /// integer register, this contains one step in the expansion to get to the |
1149 | /// smaller register. For illegal floating point types, this returns the |
1150 | /// integer type to transform to. |
1151 | LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const; |
1152 | |
1153 | /// Return how we should legalize values of this type, either it is already |
1154 | /// legal (return 'Legal') or we need to promote it to a larger type (return |
1155 | /// 'Promote'), or we need to expand it into multiple registers of smaller |
1156 | /// integer type (return 'Expand'). 'Custom' is not an option. |
1157 | LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const { |
1158 | return getTypeConversion(Context, VT).first; |
1159 | } |
1160 | LegalizeTypeAction getTypeAction(MVT VT) const { |
1161 | return ValueTypeActions.getTypeAction(VT); |
1162 | } |
1163 | |
1164 | /// For types supported by the target, this is an identity function. For |
1165 | /// types that must be promoted to larger types, this returns the larger type |
1166 | /// to promote to. For integer types that are larger than the largest integer |
1167 | /// register, this contains one step in the expansion to get to the smaller |
1168 | /// register. For illegal floating point types, this returns the integer type |
1169 | /// to transform to. |
1170 | virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const { |
1171 | return getTypeConversion(Context, VT).second; |
1172 | } |
1173 | |
1174 | /// For types supported by the target, this is an identity function. For |
1175 | /// types that must be expanded (i.e. integer types that are larger than the |
1176 | /// largest integer register or illegal floating point types), this returns |
1177 | /// the largest legal type it will be expanded to. |
1178 | EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const { |
1179 | assert(!VT.isVector()); |
1180 | while (true) { |
1181 | switch (getTypeAction(Context, VT)) { |
1182 | case TypeLegal: |
1183 | return VT; |
1184 | case TypeExpandInteger: |
1185 | VT = getTypeToTransformTo(Context, VT); |
1186 | break; |
1187 | default: |
1188 | llvm_unreachable("Type is not legal nor is it to be expanded!"); |
1189 | } |
1190 | } |
1191 | } |
1192 | |
1193 | /// Vector types are broken down into some number of legal first class types. |
1194 | /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8 |
1195 | /// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64 |
1196 | /// turns into 4 EVT::i32 values with both PPC and X86. |
1197 | /// |
1198 | /// This method returns the number of registers needed, and the VT for each |
1199 | /// register. It also returns the VT and quantity of the intermediate values |
1200 | /// before they are promoted/expanded. |
1201 | unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT, |
1202 | EVT &IntermediateVT, |
1203 | unsigned &NumIntermediates, |
1204 | MVT &RegisterVT) const; |
1205 | |
1206 | /// Certain targets such as MIPS require that some types such as vectors are |
1207 | /// always broken down into scalars in some contexts. This occurs even if the |
1208 | /// vector type is legal. |
1209 | virtual unsigned getVectorTypeBreakdownForCallingConv( |
1210 | LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, |
1211 | unsigned &NumIntermediates, MVT &RegisterVT) const { |
1212 | return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates, |
1213 | RegisterVT); |
1214 | } |
1215 | |
1216 | struct IntrinsicInfo { |
1217 | unsigned opc = 0; // target opcode |
1218 | EVT memVT; // memory VT |
1219 | |
1220 | // value representing memory location |
1221 | PointerUnion<const Value *, const PseudoSourceValue *> ptrVal; |
1222 | |
1223 | // Fallback address space for use if ptrVal is nullptr. std::nullopt means |
1224 | // unknown address space. |
1225 | std::optional<unsigned> fallbackAddressSpace; |
1226 | |
1227 | int offset = 0; // offset off of ptrVal |
1228 | uint64_t size = 0; // the size of the memory location |
1229 | // (taken from memVT if zero) |
1230 | MaybeAlign align = Align(1); // alignment |
1231 | |
1232 | MachineMemOperand::Flags flags = MachineMemOperand::MONone; |
1233 | SyncScope::ID ssid = SyncScope::System; |
1234 | AtomicOrdering order = AtomicOrdering::NotAtomic; |
1235 | AtomicOrdering failureOrder = AtomicOrdering::NotAtomic; |
1236 | IntrinsicInfo() = default; |
1237 | }; |
1238 | |
1239 | /// Given an intrinsic, checks if on the target the intrinsic will need to map |
1240 | /// to a MemIntrinsicNode (touches memory). If this is the case, it returns |
1241 | /// true and store the intrinsic information into the IntrinsicInfo that was |
1242 | /// passed to the function. |
1243 | virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, |
1244 | MachineFunction &, |
1245 | unsigned /*Intrinsic*/) const { |
1246 | return false; |
1247 | } |
1248 | |
1249 | /// Returns true if the target can instruction select the specified FP |
1250 | /// immediate natively. If false, the legalizer will materialize the FP |
1251 | /// immediate as a load from a constant pool. |
1252 | virtual bool isFPImmLegal(const APFloat & /*Imm*/, EVT /*VT*/, |
1253 | bool ForCodeSize = false) const { |
1254 | return false; |
1255 | } |
1256 | |
1257 | /// Targets can use this to indicate that they only support *some* |
1258 | /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a |
1259 | /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to be |
1260 | /// legal. |
1261 | virtual bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const { |
1262 | return true; |
1263 | } |
1264 | |
1265 | /// Returns true if the operation can trap for the value type. |
1266 | /// |
1267 | /// VT must be a legal type. By default, we optimistically assume most |
1268 | /// operations don't trap except for integer divide and remainder. |
1269 | virtual bool canOpTrap(unsigned Op, EVT VT) const; |
1270 | |
1271 | /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there |
1272 | /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a |
1273 | /// constant pool entry. |
1274 | virtual bool isVectorClearMaskLegal(ArrayRef<int> /*Mask*/, |
1275 | EVT /*VT*/) const { |
1276 | return false; |
1277 | } |
1278 | |
1279 | /// How to legalize this custom operation? |
1280 | virtual LegalizeAction getCustomOperationAction(SDNode &Op) const { |
1281 | return Legal; |
1282 | } |
1283 | |
1284 | /// Return how this operation should be treated: either it is legal, needs to |
1285 | /// be promoted to a larger size, needs to be expanded to some other code |
1286 | /// sequence, or the target has a custom expander for it. |
1287 | LegalizeAction getOperationAction(unsigned Op, EVT VT) const { |
1288 | // If a target-specific SDNode requires legalization, require the target |
1289 | // to provide custom legalization for it. |
1290 | if (Op >= std::size(OpActions[0])) |
1291 | return Custom; |
1292 | if (VT.isExtended()) |
1293 | return Expand; |
1294 | return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op]; |
1295 | } |
1296 | |
1297 | /// Custom method defined by each target to indicate if an operation which |
1298 | /// may require a scale is supported natively by the target. |
1299 | /// If not, the operation is illegal. |
1300 | virtual bool isSupportedFixedPointOperation(unsigned Op, EVT VT, |
1301 | unsigned Scale) const { |
1302 | return false; |
1303 | } |
1304 | |
1305 | /// Some fixed point operations may be natively supported by the target but |
1306 | /// only for specific scales. This method allows for checking |
1307 | /// if the width is supported by the target for a given operation that may |
1308 | /// depend on scale. |
1309 | LegalizeAction getFixedPointOperationAction(unsigned Op, EVT VT, |
1310 | unsigned Scale) const { |
1311 | auto Action = getOperationAction(Op, VT); |
1312 | if (Action != Legal) |
1313 | return Action; |
1314 | |
1315 | // This operation is supported in this type but may only work on specific |
1316 | // scales. |
1317 | bool Supported; |
1318 | switch (Op) { |
1319 | default: |
1320 | llvm_unreachable("Unexpected fixed point operation."); |
1321 | case ISD::SMULFIX: |
1322 | case ISD::SMULFIXSAT: |
1323 | case ISD::UMULFIX: |
1324 | case ISD::UMULFIXSAT: |
1325 | case ISD::SDIVFIX: |
1326 | case ISD::SDIVFIXSAT: |
1327 | case ISD::UDIVFIX: |
1328 | case ISD::UDIVFIXSAT: |
1329 | Supported = isSupportedFixedPointOperation(Op, VT, Scale); |
1330 | break; |
1331 | } |
1332 | |
1333 | return Supported ? Action : Expand; |
1334 | } |
1335 | |
1336 | // If Op is a strict floating-point operation, return the result |
1337 | // of getOperationAction for the equivalent non-strict operation. |
1338 | LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const { |
1339 | unsigned EqOpc; |
1340 | switch (Op) { |
1341 | default: llvm_unreachable("Unexpected FP pseudo-opcode"); |
1342 | #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ |
1343 | case ISD::STRICT_##DAGN: EqOpc = ISD::DAGN; break; |
1344 | #define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ |
1345 | case ISD::STRICT_##DAGN: EqOpc = ISD::SETCC; break; |
1346 | #include "llvm/IR/ConstrainedOps.def" |
1347 | } |
1348 | |
1349 | return getOperationAction(Op: EqOpc, VT); |
1350 | } |
1351 | |
1352 | /// Return true if the specified operation is legal on this target or can be |
1353 | /// made legal with custom lowering. This is used to help guide high-level |
1354 | /// lowering decisions. LegalOnly is an optional convenience for code paths |
1355 | /// traversed pre and post legalisation. |
1356 | bool isOperationLegalOrCustom(unsigned Op, EVT VT, |
1357 | bool LegalOnly = false) const { |
1358 | if (LegalOnly) |
1359 | return isOperationLegal(Op, VT); |
1360 | |
1361 | return (VT == MVT::Other || isTypeLegal(VT)) && |
1362 | (getOperationAction(Op, VT) == Legal || |
1363 | getOperationAction(Op, VT) == Custom); |
1364 | } |
1365 | |
1366 | /// Return true if the specified operation is legal on this target or can be |
1367 | /// made legal using promotion. This is used to help guide high-level lowering |
1368 | /// decisions. LegalOnly is an optional convenience for code paths traversed |
1369 | /// pre and post legalisation. |
1370 | bool isOperationLegalOrPromote(unsigned Op, EVT VT, |
1371 | bool LegalOnly = false) const { |
1372 | if (LegalOnly) |
1373 | return isOperationLegal(Op, VT); |
1374 | |
1375 | return (VT == MVT::Other || isTypeLegal(VT)) && |
1376 | (getOperationAction(Op, VT) == Legal || |
1377 | getOperationAction(Op, VT) == Promote); |
1378 | } |
1379 | |
1380 | /// Return true if the specified operation is legal on this target or can be |
1381 | /// made legal with custom lowering or using promotion. This is used to help |
1382 | /// guide high-level lowering decisions. LegalOnly is an optional convenience |
1383 | /// for code paths traversed pre and post legalisation. |
1384 | bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, |
1385 | bool LegalOnly = false) const { |
1386 | if (LegalOnly) |
1387 | return isOperationLegal(Op, VT); |
1388 | |
1389 | return (VT == MVT::Other || isTypeLegal(VT)) && |
1390 | (getOperationAction(Op, VT) == Legal || |
1391 | getOperationAction(Op, VT) == Custom || |
1392 | getOperationAction(Op, VT) == Promote); |
1393 | } |
1394 | |
1395 | /// Return true if the operation uses custom lowering, regardless of whether |
1396 | /// the type is legal or not. |
1397 | bool isOperationCustom(unsigned Op, EVT VT) const { |
1398 | return getOperationAction(Op, VT) == Custom; |
1399 | } |
1400 | |
1401 | /// Return true if lowering to a jump table is allowed. |
1402 | virtual bool areJTsAllowed(const Function *Fn) const { |
1403 | if (Fn->getFnAttribute(Kind: "no-jump-tables").getValueAsBool()) |
1404 | return false; |
1405 | |
1406 | return isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || |
1407 | isOperationLegalOrCustom(ISD::BRIND, MVT::Other); |
1408 | } |
1409 | |
1410 | /// Check whether the range [Low,High] fits in a machine word. |
1411 | bool rangeFitsInWord(const APInt &Low, const APInt &High, |
1412 | const DataLayout &DL) const { |
1413 | // FIXME: Using the pointer type doesn't seem ideal. |
1414 | uint64_t BW = DL.getIndexSizeInBits(AS: 0u); |
1415 | uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; |
1416 | return Range <= BW; |
1417 | } |
1418 | |
1419 | /// Return true if lowering to a jump table is suitable for a set of case |
1420 | /// clusters which may contain \p NumCases cases, \p Range range of values. |
1421 | virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, |
1422 | uint64_t Range, ProfileSummaryInfo *PSI, |
1423 | BlockFrequencyInfo *BFI) const; |
1424 | |
1425 | /// Returns preferred type for switch condition. |
1426 | virtual MVT getPreferredSwitchConditionType(LLVMContext &Context, |
1427 | EVT ConditionVT) const; |
1428 | |
1429 | /// Return true if lowering to a bit test is suitable for a set of case |
1430 | /// clusters which contains \p NumDests unique destinations, \p Low and |
1431 | /// \p High as its lowest and highest case values, and expects \p NumCmps |
1432 | /// case value comparisons. Check if the number of destinations, comparison |
1433 | /// metric, and range are all suitable. |
1434 | bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, |
1435 | const APInt &Low, const APInt &High, |
1436 | const DataLayout &DL) const { |
1437 | // FIXME: I don't think NumCmps is the correct metric: a single case and a |
1438 | // range of cases both require only one branch to lower. Just looking at the |
1439 | // number of clusters and destinations should be enough to decide whether to |
1440 | // build bit tests. |
1441 | |
1442 | // To lower a range with bit tests, the range must fit the bitwidth of a |
1443 | // machine word. |
1444 | if (!rangeFitsInWord(Low, High, DL)) |
1445 | return false; |
1446 | |
1447 | // Decide whether it's profitable to lower this range with bit tests. Each |
1448 | // destination requires a bit test and branch, and there is an overall range |
1449 | // check branch. For a small number of clusters, separate comparisons might |
1450 | // be cheaper, and for many destinations, splitting the range might be |
1451 | // better. |
1452 | return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) || |
1453 | (NumDests == 3 && NumCmps >= 6); |
1454 | } |
1455 | |
1456 | /// Return true if the specified operation is illegal on this target or |
1457 | /// unlikely to be made legal with custom lowering. This is used to help guide |
1458 | /// high-level lowering decisions. |
1459 | bool isOperationExpand(unsigned Op, EVT VT) const { |
1460 | return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand); |
1461 | } |
1462 | |
1463 | /// Return true if the specified operation is legal on this target. |
1464 | bool isOperationLegal(unsigned Op, EVT VT) const { |
1465 | return (VT == MVT::Other || isTypeLegal(VT)) && |
1466 | getOperationAction(Op, VT) == Legal; |
1467 | } |
1468 | |
1469 | /// Return how this load with extension should be treated: either it is legal, |
1470 | /// needs to be promoted to a larger size, needs to be expanded to some other |
1471 | /// code sequence, or the target has a custom expander for it. |
1472 | LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, |
1473 | EVT MemVT) const { |
1474 | if (ValVT.isExtended() || MemVT.isExtended()) return Expand; |
1475 | unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; |
1476 | unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; |
1477 | assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::VALUETYPE_SIZE && |
1478 | MemI < MVT::VALUETYPE_SIZE && "Table isn't big enough!"); |
1479 | unsigned Shift = 4 * ExtType; |
1480 | return (LegalizeAction)((LoadExtActions[ValI][MemI] >> Shift) & 0xf); |
1481 | } |
1482 | |
1483 | /// Return true if the specified load with extension is legal on this target. |
1484 | bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const { |
1485 | return getLoadExtAction(ExtType, ValVT, MemVT) == Legal; |
1486 | } |
1487 | |
1488 | /// Return true if the specified load with extension is legal or custom |
1489 | /// on this target. |
1490 | bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const { |
1491 | return getLoadExtAction(ExtType, ValVT, MemVT) == Legal || |
1492 | getLoadExtAction(ExtType, ValVT, MemVT) == Custom; |
1493 | } |
1494 | |
1495 | /// Same as getLoadExtAction, but for atomic loads. |
1496 | LegalizeAction getAtomicLoadExtAction(unsigned ExtType, EVT ValVT, |
1497 | EVT MemVT) const { |
1498 | if (ValVT.isExtended() || MemVT.isExtended()) return Expand; |
1499 | unsigned ValI = (unsigned)ValVT.getSimpleVT().SimpleTy; |
1500 | unsigned MemI = (unsigned)MemVT.getSimpleVT().SimpleTy; |
1501 | assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::VALUETYPE_SIZE && |
1502 | MemI < MVT::VALUETYPE_SIZE && "Table isn't big enough!"); |
1503 | unsigned Shift = 4 * ExtType; |
1504 | LegalizeAction Action = |
1505 | (LegalizeAction)((AtomicLoadExtActions[ValI][MemI] >> Shift) & 0xf); |
1506 | assert((Action == Legal || Action == Expand) && |
1507 | "Unsupported atomic load extension action."); |
1508 | return Action; |
1509 | } |
1510 | |
1511 | /// Return true if the specified atomic load with extension is legal on |
1512 | /// this target. |
1513 | bool isAtomicLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const { |
1514 | return getAtomicLoadExtAction(ExtType, ValVT, MemVT) == Legal; |
1515 | } |
1516 | |
1517 | /// Return how this store with truncation should be treated: either it is |
1518 | /// legal, needs to be promoted to a larger size, needs to be expanded to some |
1519 | /// other code sequence, or the target has a custom expander for it. |
1520 | LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const { |
1521 | if (ValVT.isExtended() || MemVT.isExtended()) return Expand; |
1522 | unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; |
1523 | unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; |
1524 | assert(ValI < MVT::VALUETYPE_SIZE && MemI < MVT::VALUETYPE_SIZE && |
1525 | "Table isn't big enough!"); |
1526 | return TruncStoreActions[ValI][MemI]; |
1527 | } |
1528 | |
1529 | /// Return true if the specified store with truncation is legal on this |
1530 | /// target. |
1531 | bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const { |
1532 | return isTypeLegal(VT: ValVT) && getTruncStoreAction(ValVT, MemVT) == Legal; |
1533 | } |
1534 | |
1535 | /// Return true if the specified store with truncation has solution on this |
1536 | /// target. |
1537 | bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const { |
1538 | return isTypeLegal(VT: ValVT) && |
1539 | (getTruncStoreAction(ValVT, MemVT) == Legal || |
1540 | getTruncStoreAction(ValVT, MemVT) == Custom); |
1541 | } |
1542 | |
1543 | virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, |
1544 | bool LegalOnly) const { |
1545 | if (LegalOnly) |
1546 | return isTruncStoreLegal(ValVT, MemVT); |
1547 | |
1548 | return isTruncStoreLegalOrCustom(ValVT, MemVT); |
1549 | } |
1550 | |
1551 | /// Return how the indexed load should be treated: either it is legal, needs |
1552 | /// to be promoted to a larger size, needs to be expanded to some other code |
1553 | /// sequence, or the target has a custom expander for it. |
1554 | LegalizeAction getIndexedLoadAction(unsigned IdxMode, MVT VT) const { |
1555 | return getIndexedModeAction(IdxMode, VT, Shift: IMAB_Load); |
1556 | } |
1557 | |
1558 | /// Return true if the specified indexed load is legal on this target. |
1559 | bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const { |
1560 | return VT.isSimple() && |
1561 | (getIndexedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Legal || |
1562 | getIndexedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Custom); |
1563 | } |
1564 | |
1565 | /// Return how the indexed store should be treated: either it is legal, needs |
1566 | /// to be promoted to a larger size, needs to be expanded to some other code |
1567 | /// sequence, or the target has a custom expander for it. |
1568 | LegalizeAction getIndexedStoreAction(unsigned IdxMode, MVT VT) const { |
1569 | return getIndexedModeAction(IdxMode, VT, Shift: IMAB_Store); |
1570 | } |
1571 | |
1572 | /// Return true if the specified indexed load is legal on this target. |
1573 | bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const { |
1574 | return VT.isSimple() && |
1575 | (getIndexedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Legal || |
1576 | getIndexedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Custom); |
1577 | } |
1578 | |
1579 | /// Return how the indexed load should be treated: either it is legal, needs |
1580 | /// to be promoted to a larger size, needs to be expanded to some other code |
1581 | /// sequence, or the target has a custom expander for it. |
1582 | LegalizeAction getIndexedMaskedLoadAction(unsigned IdxMode, MVT VT) const { |
1583 | return getIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedLoad); |
1584 | } |
1585 | |
1586 | /// Return true if the specified indexed load is legal on this target. |
1587 | bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const { |
1588 | return VT.isSimple() && |
1589 | (getIndexedMaskedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Legal || |
1590 | getIndexedMaskedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Custom); |
1591 | } |
1592 | |
1593 | /// Return how the indexed store should be treated: either it is legal, needs |
1594 | /// to be promoted to a larger size, needs to be expanded to some other code |
1595 | /// sequence, or the target has a custom expander for it. |
1596 | LegalizeAction getIndexedMaskedStoreAction(unsigned IdxMode, MVT VT) const { |
1597 | return getIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedStore); |
1598 | } |
1599 | |
1600 | /// Return true if the specified indexed load is legal on this target. |
1601 | bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const { |
1602 | return VT.isSimple() && |
1603 | (getIndexedMaskedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Legal || |
1604 | getIndexedMaskedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Custom); |
1605 | } |
1606 | |
1607 | /// Returns true if the index type for a masked gather/scatter requires |
1608 | /// extending |
1609 | virtual bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const { return false; } |
1610 | |
1611 | // Returns true if Extend can be folded into the index of a masked gathers/scatters |
1612 | // on this target. |
1613 | virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const { |
1614 | return false; |
1615 | } |
1616 | |
1617 | // Return true if the target supports a scatter/gather instruction with |
1618 | // indices which are scaled by the particular value. Note that all targets |
1619 | // must by definition support scale of 1. |
1620 | virtual bool isLegalScaleForGatherScatter(uint64_t Scale, |
1621 | uint64_t ElemSize) const { |
1622 | // MGATHER/MSCATTER are only required to support scaling by one or by the |
1623 | // element size. |
1624 | if (Scale != ElemSize && Scale != 1) |
1625 | return false; |
1626 | return true; |
1627 | } |
1628 | |
1629 | /// Return how the condition code should be treated: either it is legal, needs |
1630 | /// to be expanded to some other code sequence, or the target has a custom |
1631 | /// expander for it. |
1632 | LegalizeAction |
1633 | getCondCodeAction(ISD::CondCode CC, MVT VT) const { |
1634 | assert((unsigned)CC < std::size(CondCodeActions) && |
1635 | ((unsigned)VT.SimpleTy >> 3) < std::size(CondCodeActions[0]) && |
1636 | "Table isn't big enough!"); |
1637 | // See setCondCodeAction for how this is encoded. |
1638 | uint32_t Shift = 4 * (VT.SimpleTy & 0x7); |
1639 | uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3]; |
1640 | LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF); |
1641 | assert(Action != Promote && "Can't promote condition code!"); |
1642 | return Action; |
1643 | } |
1644 | |
1645 | /// Return true if the specified condition code is legal for a comparison of |
1646 | /// the specified types on this target. |
1647 | bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const { |
1648 | return getCondCodeAction(CC, VT) == Legal; |
1649 | } |
1650 | |
1651 | /// Return true if the specified condition code is legal or custom for a |
1652 | /// comparison of the specified types on this target. |
1653 | bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const { |
1654 | return getCondCodeAction(CC, VT) == Legal || |
1655 | getCondCodeAction(CC, VT) == Custom; |
1656 | } |
1657 | |
1658 | /// Return how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type |
1659 | /// InputVT should be treated. Either it's legal, needs to be promoted to a |
1660 | /// larger size, needs to be expanded to some other code sequence, or the |
1661 | /// target has a custom expander for it. |
1662 | LegalizeAction getPartialReduceMLAAction(unsigned Opc, EVT AccVT, |
1663 | EVT InputVT) const { |
1664 | assert(Opc == ISD::PARTIAL_REDUCE_SMLA || Opc == ISD::PARTIAL_REDUCE_UMLA || |
1665 | Opc == ISD::PARTIAL_REDUCE_SUMLA); |
1666 | PartialReduceActionTypes Key = {Opc, AccVT.getSimpleVT().SimpleTy, |
1667 | InputVT.getSimpleVT().SimpleTy}; |
1668 | auto It = PartialReduceMLAActions.find(Val: Key); |
1669 | return It != PartialReduceMLAActions.end() ? It->second : Expand; |
1670 | } |
1671 | |
1672 | /// Return true if a PARTIAL_REDUCE_U/SMLA node with the specified types is |
1673 | /// legal or custom for this target. |
1674 | bool isPartialReduceMLALegalOrCustom(unsigned Opc, EVT AccVT, |
1675 | EVT InputVT) const { |
1676 | LegalizeAction Action = getPartialReduceMLAAction(Opc, AccVT, InputVT); |
1677 | return Action == Legal || Action == Custom; |
1678 | } |
1679 | |
1680 | /// If the action for this operation is to promote, this method returns the |
1681 | /// ValueType to promote to. |
1682 | MVT getTypeToPromoteTo(unsigned Op, MVT VT) const { |
1683 | assert(getOperationAction(Op, VT) == Promote && |
1684 | "This operation isn't promoted!"); |
1685 | |
1686 | // See if this has an explicit type specified. |
1687 | std::map<std::pair<unsigned, MVT::SimpleValueType>, |
1688 | MVT::SimpleValueType>::const_iterator PTTI = |
1689 | PromoteToType.find(x: std::make_pair(x&: Op, y&: VT.SimpleTy)); |
1690 | if (PTTI != PromoteToType.end()) return PTTI->second; |
1691 | |
1692 | assert((VT.isInteger() || VT.isFloatingPoint()) && |
1693 | "Cannot autopromote this type, add it with AddPromotedToType."); |
1694 | |
1695 | uint64_t VTBits = VT.getScalarSizeInBits(); |
1696 | MVT NVT = VT; |
1697 | do { |
1698 | NVT = (MVT::SimpleValueType)(NVT.SimpleTy+1); |
1699 | assert(NVT.isInteger() == VT.isInteger() && |
1700 | NVT.isFloatingPoint() == VT.isFloatingPoint() && |
1701 | "Didn't find type to promote to!"); |
1702 | } while (VTBits >= NVT.getScalarSizeInBits() || !isTypeLegal(VT: NVT) || |
1703 | getOperationAction(Op, VT: NVT) == Promote); |
1704 | return NVT; |
1705 | } |
1706 | |
1707 | virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, |
1708 | bool AllowUnknown = false) const { |
1709 | return getValueType(DL, Ty, AllowUnknown); |
1710 | } |
1711 | |
1712 | /// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM |
1713 | /// operations except for the pointer size. If AllowUnknown is true, this |
1714 | /// will return MVT::Other for types with no EVT counterpart (e.g. structs), |
1715 | /// otherwise it will assert. |
1716 | EVT getValueType(const DataLayout &DL, Type *Ty, |
1717 | bool AllowUnknown = false) const { |
1718 | // Lower scalar pointers to native pointer types. |
1719 | if (auto *PTy = dyn_cast<PointerType>(Val: Ty)) |
1720 | return getPointerTy(DL, AS: PTy->getAddressSpace()); |
1721 | |
1722 | if (auto *VTy = dyn_cast<VectorType>(Val: Ty)) { |
1723 | Type *EltTy = VTy->getElementType(); |
1724 | // Lower vectors of pointers to native pointer types. |
1725 | if (auto *PTy = dyn_cast<PointerType>(Val: EltTy)) { |
1726 | EVT PointerTy(getPointerTy(DL, AS: PTy->getAddressSpace())); |
1727 | EltTy = PointerTy.getTypeForEVT(Context&: Ty->getContext()); |
1728 | } |
1729 | return EVT::getVectorVT(Context&: Ty->getContext(), VT: EVT::getEVT(Ty: EltTy, HandleUnknown: false), |
1730 | EC: VTy->getElementCount()); |
1731 | } |
1732 | |
1733 | return EVT::getEVT(Ty, HandleUnknown: AllowUnknown); |
1734 | } |
1735 | |
1736 | EVT getMemValueType(const DataLayout &DL, Type *Ty, |
1737 | bool AllowUnknown = false) const { |
1738 | // Lower scalar pointers to native pointer types. |
1739 | if (auto *PTy = dyn_cast<PointerType>(Val: Ty)) |
1740 | return getPointerMemTy(DL, AS: PTy->getAddressSpace()); |
1741 | |
1742 | if (auto *VTy = dyn_cast<VectorType>(Val: Ty)) { |
1743 | Type *EltTy = VTy->getElementType(); |
1744 | if (auto *PTy = dyn_cast<PointerType>(Val: EltTy)) { |
1745 | EVT PointerTy(getPointerMemTy(DL, AS: PTy->getAddressSpace())); |
1746 | EltTy = PointerTy.getTypeForEVT(Context&: Ty->getContext()); |
1747 | } |
1748 | return EVT::getVectorVT(Context&: Ty->getContext(), VT: EVT::getEVT(Ty: EltTy, HandleUnknown: false), |
1749 | EC: VTy->getElementCount()); |
1750 | } |
1751 | |
1752 | return getValueType(DL, Ty, AllowUnknown); |
1753 | } |
1754 | |
1755 | |
1756 | /// Return the MVT corresponding to this LLVM type. See getValueType. |
1757 | MVT getSimpleValueType(const DataLayout &DL, Type *Ty, |
1758 | bool AllowUnknown = false) const { |
1759 | return getValueType(DL, Ty, AllowUnknown).getSimpleVT(); |
1760 | } |
1761 | |
1762 | /// Returns the desired alignment for ByVal or InAlloca aggregate function |
1763 | /// arguments in the caller parameter area. |
1764 | virtual Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const; |
1765 | |
1766 | /// Return the type of registers that this ValueType will eventually require. |
1767 | MVT getRegisterType(MVT VT) const { |
1768 | assert((unsigned)VT.SimpleTy < std::size(RegisterTypeForVT)); |
1769 | return RegisterTypeForVT[VT.SimpleTy]; |
1770 | } |
1771 | |
1772 | /// Return the type of registers that this ValueType will eventually require. |
1773 | MVT getRegisterType(LLVMContext &Context, EVT VT) const { |
1774 | if (VT.isSimple()) |
1775 | return getRegisterType(VT: VT.getSimpleVT()); |
1776 | if (VT.isVector()) { |
1777 | EVT VT1; |
1778 | MVT RegisterVT; |
1779 | unsigned NumIntermediates; |
1780 | (void)getVectorTypeBreakdown(Context, VT, IntermediateVT&: VT1, |
1781 | NumIntermediates, RegisterVT); |
1782 | return RegisterVT; |
1783 | } |
1784 | if (VT.isInteger()) { |
1785 | return getRegisterType(Context, VT: getTypeToTransformTo(Context, VT)); |
1786 | } |
1787 | llvm_unreachable("Unsupported extended type!"); |
1788 | } |
1789 | |
1790 | /// Return the number of registers that this ValueType will eventually |
1791 | /// require. |
1792 | /// |
1793 | /// This is one for any types promoted to live in larger registers, but may be |
1794 | /// more than one for types (like i64) that are split into pieces. For types |
1795 | /// like i140, which are first promoted then expanded, it is the number of |
1796 | /// registers needed to hold all the bits of the original type. For an i140 |
1797 | /// on a 32 bit machine this means 5 registers. |
1798 | /// |
1799 | /// RegisterVT may be passed as a way to override the default settings, for |
1800 | /// instance with i128 inline assembly operands on SystemZ. |
1801 | virtual unsigned |
1802 | getNumRegisters(LLVMContext &Context, EVT VT, |
1803 | std::optional<MVT> RegisterVT = std::nullopt) const { |
1804 | if (VT.isSimple()) { |
1805 | assert((unsigned)VT.getSimpleVT().SimpleTy < |
1806 | std::size(NumRegistersForVT)); |
1807 | return NumRegistersForVT[VT.getSimpleVT().SimpleTy]; |
1808 | } |
1809 | if (VT.isVector()) { |
1810 | EVT VT1; |
1811 | MVT VT2; |
1812 | unsigned NumIntermediates; |
1813 | return getVectorTypeBreakdown(Context, VT, IntermediateVT&: VT1, NumIntermediates, RegisterVT&: VT2); |
1814 | } |
1815 | if (VT.isInteger()) { |
1816 | unsigned BitWidth = VT.getSizeInBits(); |
1817 | unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits(); |
1818 | return (BitWidth + RegWidth - 1) / RegWidth; |
1819 | } |
1820 | llvm_unreachable("Unsupported extended type!"); |
1821 | } |
1822 | |
1823 | /// Certain combinations of ABIs, Targets and features require that types |
1824 | /// are legal for some operations and not for other operations. |
1825 | /// For MIPS all vector types must be passed through the integer register set. |
1826 | virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, |
1827 | CallingConv::ID CC, EVT VT) const { |
1828 | return getRegisterType(Context, VT); |
1829 | } |
1830 | |
1831 | /// Certain targets require unusual breakdowns of certain types. For MIPS, |
1832 | /// this occurs when a vector type is used, as vector are passed through the |
1833 | /// integer register set. |
1834 | virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, |
1835 | CallingConv::ID CC, |
1836 | EVT VT) const { |
1837 | return getNumRegisters(Context, VT); |
1838 | } |
1839 | |
1840 | /// Certain targets have context sensitive alignment requirements, where one |
1841 | /// type has the alignment requirement of another type. |
1842 | virtual Align getABIAlignmentForCallingConv(Type *ArgTy, |
1843 | const DataLayout &DL) const { |
1844 | return DL.getABITypeAlign(Ty: ArgTy); |
1845 | } |
1846 | |
1847 | /// If true, then instruction selection should seek to shrink the FP constant |
1848 | /// of the specified type to a smaller type in order to save space and / or |
1849 | /// reduce runtime. |
1850 | virtual bool ShouldShrinkFPConstant(EVT) const { return true; } |
1851 | |
1852 | /// Return true if it is profitable to reduce a load to a smaller type. |
1853 | /// \p ByteOffset is only set if we know the pointer offset at compile time |
1854 | /// otherwise we should assume that additional pointer math is required. |
1855 | /// Example: (i16 (trunc (i32 (load x))) -> i16 load x |
1856 | /// Example: (i16 (trunc (srl (i32 (load x)), 16)) -> i16 load x+2 |
1857 | virtual bool shouldReduceLoadWidth( |
1858 | SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, |
1859 | std::optional<unsigned> ByteOffset = std::nullopt) const { |
1860 | // By default, assume that it is cheaper to extract a subvector from a wide |
1861 | // vector load rather than creating multiple narrow vector loads. |
1862 | if (NewVT.isVector() && !SDValue(Load, 0).hasOneUse()) |
1863 | return false; |
1864 | |
1865 | return true; |
1866 | } |
1867 | |
1868 | /// Return true (the default) if it is profitable to remove a sext_inreg(x) |
1869 | /// where the sext is redundant, and use x directly. |
1870 | virtual bool shouldRemoveRedundantExtend(SDValue Op) const { return true; } |
1871 | |
1872 | /// Indicates if any padding is guaranteed to go at the most significant bits |
1873 | /// when storing the type to memory and the type size isn't equal to the store |
1874 | /// size. |
1875 | bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const { |
1876 | return VT.isScalarInteger() && !VT.isByteSized(); |
1877 | } |
1878 | |
1879 | /// When splitting a value of the specified type into parts, does the Lo |
1880 | /// or Hi part come first? This usually follows the endianness, except |
1881 | /// for ppcf128, where the Hi part always comes first. |
1882 | bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const { |
1883 | return DL.isBigEndian() || VT == MVT::ppcf128; |
1884 | } |
1885 | |
1886 | /// If true, the target has custom DAG combine transformations that it can |
1887 | /// perform for the specified node. |
1888 | bool hasTargetDAGCombine(ISD::NodeType NT) const { |
1889 | assert(unsigned(NT >> 3) < std::size(TargetDAGCombineArray)); |
1890 | return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7)); |
1891 | } |
1892 | |
1893 | unsigned getGatherAllAliasesMaxDepth() const { |
1894 | return GatherAllAliasesMaxDepth; |
1895 | } |
1896 | |
1897 | /// Returns the size of the platform's va_list object. |
1898 | virtual unsigned getVaListSizeInBits(const DataLayout &DL) const { |
1899 | return getPointerTy(DL).getSizeInBits(); |
1900 | } |
1901 | |
1902 | /// Get maximum # of store operations permitted for llvm.memset |
1903 | /// |
1904 | /// This function returns the maximum number of store operations permitted |
1905 | /// to replace a call to llvm.memset. The value is set by the target at the |
1906 | /// performance threshold for such a replacement. If OptSize is true, |
1907 | /// return the limit for functions that have OptSize attribute. |
1908 | unsigned getMaxStoresPerMemset(bool OptSize) const { |
1909 | return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset; |
1910 | } |
1911 | |
1912 | /// Get maximum # of store operations permitted for llvm.memcpy |
1913 | /// |
1914 | /// This function returns the maximum number of store operations permitted |
1915 | /// to replace a call to llvm.memcpy. The value is set by the target at the |
1916 | /// performance threshold for such a replacement. If OptSize is true, |
1917 | /// return the limit for functions that have OptSize attribute. |
1918 | unsigned getMaxStoresPerMemcpy(bool OptSize) const { |
1919 | return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy; |
1920 | } |
1921 | |
1922 | /// \brief Get maximum # of store operations to be glued together |
1923 | /// |
1924 | /// This function returns the maximum number of store operations permitted |
1925 | /// to glue together during lowering of llvm.memcpy. The value is set by |
1926 | // the target at the performance threshold for such a replacement. |
1927 | virtual unsigned getMaxGluedStoresPerMemcpy() const { |
1928 | return MaxGluedStoresPerMemcpy; |
1929 | } |
1930 | |
1931 | /// Get maximum # of load operations permitted for memcmp |
1932 | /// |
1933 | /// This function returns the maximum number of load operations permitted |
1934 | /// to replace a call to memcmp. The value is set by the target at the |
1935 | /// performance threshold for such a replacement. If OptSize is true, |
1936 | /// return the limit for functions that have OptSize attribute. |
1937 | unsigned getMaxExpandSizeMemcmp(bool OptSize) const { |
1938 | return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp; |
1939 | } |
1940 | |
1941 | /// Get maximum # of store operations permitted for llvm.memmove |
1942 | /// |
1943 | /// This function returns the maximum number of store operations permitted |
1944 | /// to replace a call to llvm.memmove. The value is set by the target at the |
1945 | /// performance threshold for such a replacement. If OptSize is true, |
1946 | /// return the limit for functions that have OptSize attribute. |
1947 | unsigned getMaxStoresPerMemmove(bool OptSize) const { |
1948 | return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove; |
1949 | } |
1950 | |
1951 | /// Determine if the target supports unaligned memory accesses. |
1952 | /// |
1953 | /// This function returns true if the target allows unaligned memory accesses |
1954 | /// of the specified type in the given address space. If true, it also returns |
1955 | /// a relative speed of the unaligned memory access in the last argument by |
1956 | /// reference. The higher the speed number the faster the operation comparing |
1957 | /// to a number returned by another such call. This is used, for example, in |
1958 | /// situations where an array copy/move/set is converted to a sequence of |
1959 | /// store operations. Its use helps to ensure that such replacements don't |
1960 | /// generate code that causes an alignment error (trap) on the target machine. |
1961 | virtual bool allowsMisalignedMemoryAccesses( |
1962 | EVT, unsigned AddrSpace = 0, Align Alignment = Align(1), |
1963 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
1964 | unsigned * /*Fast*/ = nullptr) const { |
1965 | return false; |
1966 | } |
1967 | |
1968 | /// LLT handling variant. |
1969 | virtual bool allowsMisalignedMemoryAccesses( |
1970 | LLT, unsigned AddrSpace = 0, Align Alignment = Align(1), |
1971 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
1972 | unsigned * /*Fast*/ = nullptr) const { |
1973 | return false; |
1974 | } |
1975 | |
1976 | /// This function returns true if the memory access is aligned or if the |
1977 | /// target allows this specific unaligned memory access. If the access is |
1978 | /// allowed, the optional final parameter returns a relative speed of the |
1979 | /// access (as defined by the target). |
1980 | bool allowsMemoryAccessForAlignment( |
1981 | LLVMContext &Context, const DataLayout &DL, EVT VT, |
1982 | unsigned AddrSpace = 0, Align Alignment = Align(1), |
1983 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
1984 | unsigned *Fast = nullptr) const; |
1985 | |
1986 | /// Return true if the memory access of this type is aligned or if the target |
1987 | /// allows this specific unaligned access for the given MachineMemOperand. |
1988 | /// If the access is allowed, the optional final parameter returns a relative |
1989 | /// speed of the access (as defined by the target). |
1990 | bool allowsMemoryAccessForAlignment(LLVMContext &Context, |
1991 | const DataLayout &DL, EVT VT, |
1992 | const MachineMemOperand &MMO, |
1993 | unsigned *Fast = nullptr) const; |
1994 | |
1995 | /// Return true if the target supports a memory access of this type for the |
1996 | /// given address space and alignment. If the access is allowed, the optional |
1997 | /// final parameter returns the relative speed of the access (as defined by |
1998 | /// the target). |
1999 | virtual bool |
2000 | allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, |
2001 | unsigned AddrSpace = 0, Align Alignment = Align(1), |
2002 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
2003 | unsigned *Fast = nullptr) const; |
2004 | |
2005 | /// Return true if the target supports a memory access of this type for the |
2006 | /// given MachineMemOperand. If the access is allowed, the optional |
2007 | /// final parameter returns the relative access speed (as defined by the |
2008 | /// target). |
2009 | bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, |
2010 | const MachineMemOperand &MMO, |
2011 | unsigned *Fast = nullptr) const; |
2012 | |
2013 | /// LLT handling variant. |
2014 | bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, LLT Ty, |
2015 | const MachineMemOperand &MMO, |
2016 | unsigned *Fast = nullptr) const; |
2017 | |
2018 | /// Returns the target specific optimal type for load and store operations as |
2019 | /// a result of memset, memcpy, and memmove lowering. |
2020 | /// It returns EVT::Other if the type should be determined using generic |
2021 | /// target-independent logic. |
2022 | virtual EVT |
2023 | getOptimalMemOpType(const MemOp &Op, |
2024 | const AttributeList & /*FuncAttributes*/) const { |
2025 | return MVT::Other; |
2026 | } |
2027 | |
2028 | /// LLT returning variant. |
2029 | virtual LLT |
2030 | getOptimalMemOpLLT(const MemOp &Op, |
2031 | const AttributeList & /*FuncAttributes*/) const { |
2032 | return LLT(); |
2033 | } |
2034 | |
2035 | /// Returns true if it's safe to use load / store of the specified type to |
2036 | /// expand memcpy / memset inline. |
2037 | /// |
2038 | /// This is mostly true for all types except for some special cases. For |
2039 | /// example, on X86 targets without SSE2 f64 load / store are done with fldl / |
2040 | /// fstpl which also does type conversion. Note the specified type doesn't |
2041 | /// have to be legal as the hook is used before type legalization. |
2042 | virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; } |
2043 | |
2044 | /// Return lower limit for number of blocks in a jump table. |
2045 | virtual unsigned getMinimumJumpTableEntries() const; |
2046 | |
2047 | /// Return lower limit of the density in a jump table. |
2048 | unsigned getMinimumJumpTableDensity(bool OptForSize) const; |
2049 | |
2050 | /// Return upper limit for number of entries in a jump table. |
2051 | /// Zero if no limit. |
2052 | unsigned getMaximumJumpTableSize() const; |
2053 | |
2054 | virtual bool isJumpTableRelative() const; |
2055 | |
2056 | /// If a physical register, this specifies the register that |
2057 | /// llvm.savestack/llvm.restorestack should save and restore. |
2058 | Register getStackPointerRegisterToSaveRestore() const { |
2059 | return StackPointerRegisterToSaveRestore; |
2060 | } |
2061 | |
2062 | /// If a physical register, this returns the register that receives the |
2063 | /// exception address on entry to an EH pad. |
2064 | virtual Register |
2065 | getExceptionPointerRegister(const Constant *PersonalityFn) const { |
2066 | return Register(); |
2067 | } |
2068 | |
2069 | /// If a physical register, this returns the register that receives the |
2070 | /// exception typeid on entry to a landing pad. |
2071 | virtual Register |
2072 | getExceptionSelectorRegister(const Constant *PersonalityFn) const { |
2073 | return Register(); |
2074 | } |
2075 | |
2076 | virtual bool needsFixedCatchObjects() const { |
2077 | report_fatal_error(reason: "Funclet EH is not implemented for this target"); |
2078 | } |
2079 | |
2080 | /// Return the minimum stack alignment of an argument. |
2081 | Align getMinStackArgumentAlignment() const { |
2082 | return MinStackArgumentAlignment; |
2083 | } |
2084 | |
2085 | /// Return the minimum function alignment. |
2086 | Align getMinFunctionAlignment() const { return MinFunctionAlignment; } |
2087 | |
2088 | /// Return the preferred function alignment. |
2089 | Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; } |
2090 | |
2091 | /// Return the preferred loop alignment. |
2092 | virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const; |
2093 | |
2094 | /// Return the maximum amount of bytes allowed to be emitted when padding for |
2095 | /// alignment |
2096 | virtual unsigned |
2097 | getMaxPermittedBytesForAlignment(MachineBasicBlock *MBB) const; |
2098 | |
2099 | /// Should loops be aligned even when the function is marked OptSize (but not |
2100 | /// MinSize). |
2101 | virtual bool alignLoopsWithOptSize() const { return false; } |
2102 | |
2103 | /// If the target has a standard location for the stack protector guard, |
2104 | /// returns the address of that location. Otherwise, returns nullptr. |
2105 | /// DEPRECATED: please override useLoadStackGuardNode and customize |
2106 | /// LOAD_STACK_GUARD, or customize \@llvm.stackguard(). |
2107 | virtual Value *getIRStackGuard(IRBuilderBase &IRB) const; |
2108 | |
2109 | /// Inserts necessary declarations for SSP (stack protection) purpose. |
2110 | /// Should be used only when getIRStackGuard returns nullptr. |
2111 | virtual void insertSSPDeclarations(Module &M) const; |
2112 | |
2113 | /// Return the variable that's previously inserted by insertSSPDeclarations, |
2114 | /// if any, otherwise return nullptr. Should be used only when |
2115 | /// getIRStackGuard returns nullptr. |
2116 | virtual Value *getSDagStackGuard(const Module &M) const; |
2117 | |
2118 | /// If this function returns true, stack protection checks should XOR the |
2119 | /// frame pointer (or whichever pointer is used to address locals) into the |
2120 | /// stack guard value before checking it. getIRStackGuard must return nullptr |
2121 | /// if this returns true. |
2122 | virtual bool useStackGuardXorFP() const { return false; } |
2123 | |
2124 | /// If the target has a standard stack protection check function that |
2125 | /// performs validation and error handling, returns the function. Otherwise, |
2126 | /// returns nullptr. Must be previously inserted by insertSSPDeclarations. |
2127 | /// Should be used only when getIRStackGuard returns nullptr. |
2128 | virtual Function *getSSPStackGuardCheck(const Module &M) const; |
2129 | |
2130 | protected: |
2131 | Value *getDefaultSafeStackPointerLocation(IRBuilderBase &IRB, |
2132 | bool UseTLS) const; |
2133 | |
2134 | public: |
2135 | /// Returns the target-specific address of the unsafe stack pointer. |
2136 | virtual Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const; |
2137 | |
2138 | /// Returns the name of the symbol used to emit stack probes or the empty |
2139 | /// string if not applicable. |
2140 | virtual bool hasStackProbeSymbol(const MachineFunction &MF) const { return false; } |
2141 | |
2142 | virtual bool hasInlineStackProbe(const MachineFunction &MF) const { return false; } |
2143 | |
2144 | virtual StringRef getStackProbeSymbolName(const MachineFunction &MF) const { |
2145 | return ""; |
2146 | } |
2147 | |
2148 | /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we |
2149 | /// are happy to sink it into basic blocks. A cast may be free, but not |
2150 | /// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer. |
2151 | virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const; |
2152 | |
2153 | /// Return true if the pointer arguments to CI should be aligned by aligning |
2154 | /// the object whose address is being passed. If so then MinSize is set to the |
2155 | /// minimum size the object must be to be aligned and PrefAlign is set to the |
2156 | /// preferred alignment. |
2157 | virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/, |
2158 | Align & /*PrefAlign*/) const { |
2159 | return false; |
2160 | } |
2161 | |
2162 | //===--------------------------------------------------------------------===// |
2163 | /// \name Helpers for TargetTransformInfo implementations |
2164 | /// @{ |
2165 | |
2166 | /// Get the ISD node that corresponds to the Instruction class opcode. |
2167 | int InstructionOpcodeToISD(unsigned Opcode) const; |
2168 | |
2169 | /// Get the ISD node that corresponds to the Intrinsic ID. Returns |
2170 | /// ISD::DELETED_NODE by default for an unsupported Intrinsic ID. |
2171 | int IntrinsicIDToISD(Intrinsic::ID ID) const; |
2172 | |
2173 | /// @} |
2174 | |
2175 | //===--------------------------------------------------------------------===// |
2176 | /// \name Helpers for atomic expansion. |
2177 | /// @{ |
2178 | |
2179 | /// Returns the maximum atomic operation size (in bits) supported by |
2180 | /// the backend. Atomic operations greater than this size (as well |
2181 | /// as ones that are not naturally aligned), will be expanded by |
2182 | /// AtomicExpandPass into an __atomic_* library call. |
2183 | unsigned getMaxAtomicSizeInBitsSupported() const { |
2184 | return MaxAtomicSizeInBitsSupported; |
2185 | } |
2186 | |
2187 | /// Returns the size in bits of the maximum div/rem the backend supports. |
2188 | /// Larger operations will be expanded by ExpandLargeDivRem. |
2189 | unsigned getMaxDivRemBitWidthSupported() const { |
2190 | return MaxDivRemBitWidthSupported; |
2191 | } |
2192 | |
2193 | /// Returns the size in bits of the maximum fp to/from int conversion the |
2194 | /// backend supports. Larger operations will be expanded by ExpandFp. |
2195 | unsigned getMaxLargeFPConvertBitWidthSupported() const { |
2196 | return MaxLargeFPConvertBitWidthSupported; |
2197 | } |
2198 | |
2199 | /// Returns the size of the smallest cmpxchg or ll/sc instruction |
2200 | /// the backend supports. Any smaller operations are widened in |
2201 | /// AtomicExpandPass. |
2202 | /// |
2203 | /// Note that *unlike* operations above the maximum size, atomic ops |
2204 | /// are still natively supported below the minimum; they just |
2205 | /// require a more complex expansion. |
2206 | unsigned getMinCmpXchgSizeInBits() const { return MinCmpXchgSizeInBits; } |
2207 | |
2208 | /// Whether the target supports unaligned atomic operations. |
2209 | bool supportsUnalignedAtomics() const { return SupportsUnalignedAtomics; } |
2210 | |
2211 | /// Whether AtomicExpandPass should automatically insert fences and reduce |
2212 | /// ordering for this atomic. This should be true for most architectures with |
2213 | /// weak memory ordering. Defaults to false. |
2214 | virtual bool shouldInsertFencesForAtomic(const Instruction *I) const { |
2215 | return false; |
2216 | } |
2217 | |
2218 | // The memory ordering that AtomicExpandPass should assign to a atomic |
2219 | // instruction that it has lowered by adding fences. This can be used |
2220 | // to "fold" one of the fences into the atomic instruction. |
2221 | virtual AtomicOrdering |
2222 | atomicOperationOrderAfterFenceSplit(const Instruction *I) const { |
2223 | return AtomicOrdering::Monotonic; |
2224 | } |
2225 | |
2226 | /// Whether AtomicExpandPass should automatically insert a trailing fence |
2227 | /// without reducing the ordering for this atomic. Defaults to false. |
2228 | virtual bool |
2229 | shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const { |
2230 | return false; |
2231 | } |
2232 | |
2233 | /// Perform a load-linked operation on Addr, returning a "Value *" with the |
2234 | /// corresponding pointee type. This may entail some non-trivial operations to |
2235 | /// truncate or reconstruct types that will be illegal in the backend. See |
2236 | /// ARMISelLowering for an example implementation. |
2237 | virtual Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, |
2238 | Value *Addr, AtomicOrdering Ord) const { |
2239 | llvm_unreachable("Load linked unimplemented on this target"); |
2240 | } |
2241 | |
2242 | /// Perform a store-conditional operation to Addr. Return the status of the |
2243 | /// store. This should be 0 if the store succeeded, non-zero otherwise. |
2244 | virtual Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, |
2245 | Value *Addr, AtomicOrdering Ord) const { |
2246 | llvm_unreachable("Store conditional unimplemented on this target"); |
2247 | } |
2248 | |
2249 | /// Perform a masked atomicrmw using a target-specific intrinsic. This |
2250 | /// represents the core LL/SC loop which will be lowered at a late stage by |
2251 | /// the backend. The target-specific intrinsic returns the loaded value and |
2252 | /// is not responsible for masking and shifting the result. |
2253 | virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, |
2254 | AtomicRMWInst *AI, |
2255 | Value *AlignedAddr, Value *Incr, |
2256 | Value *Mask, Value *ShiftAmt, |
2257 | AtomicOrdering Ord) const { |
2258 | llvm_unreachable("Masked atomicrmw expansion unimplemented on this target"); |
2259 | } |
2260 | |
2261 | /// Perform a atomicrmw expansion using a target-specific way. This is |
2262 | /// expected to be called when masked atomicrmw and bit test atomicrmw don't |
2263 | /// work, and the target supports another way to lower atomicrmw. |
2264 | virtual void emitExpandAtomicRMW(AtomicRMWInst *AI) const { |
2265 | llvm_unreachable( |
2266 | "Generic atomicrmw expansion unimplemented on this target"); |
2267 | } |
2268 | |
2269 | /// Perform a cmpxchg expansion using a target-specific method. |
2270 | virtual void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const { |
2271 | llvm_unreachable("Generic cmpxchg expansion unimplemented on this target"); |
2272 | } |
2273 | |
2274 | /// Perform a bit test atomicrmw using a target-specific intrinsic. This |
2275 | /// represents the combined bit test intrinsic which will be lowered at a late |
2276 | /// stage by the backend. |
2277 | virtual void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const { |
2278 | llvm_unreachable( |
2279 | "Bit test atomicrmw expansion unimplemented on this target"); |
2280 | } |
2281 | |
2282 | /// Perform a atomicrmw which the result is only used by comparison, using a |
2283 | /// target-specific intrinsic. This represents the combined atomic and compare |
2284 | /// intrinsic which will be lowered at a late stage by the backend. |
2285 | virtual void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const { |
2286 | llvm_unreachable( |
2287 | "Compare arith atomicrmw expansion unimplemented on this target"); |
2288 | } |
2289 | |
2290 | /// Perform a masked cmpxchg using a target-specific intrinsic. This |
2291 | /// represents the core LL/SC loop which will be lowered at a late stage by |
2292 | /// the backend. The target-specific intrinsic returns the loaded value and |
2293 | /// is not responsible for masking and shifting the result. |
2294 | virtual Value *emitMaskedAtomicCmpXchgIntrinsic( |
2295 | IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, |
2296 | Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { |
2297 | llvm_unreachable("Masked cmpxchg expansion unimplemented on this target"); |
2298 | } |
2299 | |
2300 | //===--------------------------------------------------------------------===// |
2301 | /// \name KCFI check lowering. |
2302 | /// @{ |
2303 | |
2304 | virtual MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, |
2305 | MachineBasicBlock::instr_iterator &MBBI, |
2306 | const TargetInstrInfo *TII) const { |
2307 | llvm_unreachable("KCFI is not supported on this target"); |
2308 | } |
2309 | |
2310 | /// @} |
2311 | |
2312 | /// Inserts in the IR a target-specific intrinsic specifying a fence. |
2313 | /// It is called by AtomicExpandPass before expanding an |
2314 | /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad |
2315 | /// if shouldInsertFencesForAtomic returns true. |
2316 | /// |
2317 | /// Inst is the original atomic instruction, prior to other expansions that |
2318 | /// may be performed. |
2319 | /// |
2320 | /// This function should either return a nullptr, or a pointer to an IR-level |
2321 | /// Instruction*. Even complex fence sequences can be represented by a |
2322 | /// single Instruction* through an intrinsic to be lowered later. |
2323 | /// |
2324 | /// The default implementation emits an IR fence before any release (or |
2325 | /// stronger) operation that stores, and after any acquire (or stronger) |
2326 | /// operation. This is generally a correct implementation, but backends may |
2327 | /// override if they wish to use alternative schemes (e.g. the PowerPC |
2328 | /// standard ABI uses a fence before a seq_cst load instead of after a |
2329 | /// seq_cst store). |
2330 | /// @{ |
2331 | virtual Instruction *emitLeadingFence(IRBuilderBase &Builder, |
2332 | Instruction *Inst, |
2333 | AtomicOrdering Ord) const; |
2334 | |
2335 | virtual Instruction *emitTrailingFence(IRBuilderBase &Builder, |
2336 | Instruction *Inst, |
2337 | AtomicOrdering Ord) const; |
2338 | /// @} |
2339 | |
2340 | // Emits code that executes when the comparison result in the ll/sc |
2341 | // expansion of a cmpxchg instruction is such that the store-conditional will |
2342 | // not execute. This makes it possible to balance out the load-linked with |
2343 | // a dedicated instruction, if desired. |
2344 | // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would |
2345 | // be unnecessarily held, except if clrex, inserted by this hook, is executed. |
2346 | virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const {} |
2347 | |
2348 | /// Returns true if arguments should be sign-extended in lib calls. |
2349 | virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const { |
2350 | return IsSigned; |
2351 | } |
2352 | |
2353 | /// Returns true if arguments should be extended in lib calls. |
2354 | virtual bool shouldExtendTypeInLibCall(EVT Type) const { |
2355 | return true; |
2356 | } |
2357 | |
2358 | /// Returns how the given (atomic) load should be expanded by the |
2359 | /// IR-level AtomicExpand pass. |
2360 | virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const { |
2361 | return AtomicExpansionKind::None; |
2362 | } |
2363 | |
2364 | /// Returns how the given (atomic) load should be cast by the IR-level |
2365 | /// AtomicExpand pass. |
2366 | virtual AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const { |
2367 | if (LI->getType()->isFloatingPointTy()) |
2368 | return AtomicExpansionKind::CastToInteger; |
2369 | return AtomicExpansionKind::None; |
2370 | } |
2371 | |
2372 | /// Returns how the given (atomic) store should be expanded by the IR-level |
2373 | /// AtomicExpand pass into. For instance AtomicExpansionKind::Expand will try |
2374 | /// to use an atomicrmw xchg. |
2375 | virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const { |
2376 | return AtomicExpansionKind::None; |
2377 | } |
2378 | |
2379 | /// Returns how the given (atomic) store should be cast by the IR-level |
2380 | /// AtomicExpand pass into. For instance AtomicExpansionKind::CastToInteger |
2381 | /// will try to cast the operands to integer values. |
2382 | virtual AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const { |
2383 | if (SI->getValueOperand()->getType()->isFloatingPointTy()) |
2384 | return AtomicExpansionKind::CastToInteger; |
2385 | return AtomicExpansionKind::None; |
2386 | } |
2387 | |
2388 | /// Returns how the given atomic cmpxchg should be expanded by the IR-level |
2389 | /// AtomicExpand pass. |
2390 | virtual AtomicExpansionKind |
2391 | shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { |
2392 | return AtomicExpansionKind::None; |
2393 | } |
2394 | |
2395 | /// Returns how the IR-level AtomicExpand pass should expand the given |
2396 | /// AtomicRMW, if at all. Default is to never expand. |
2397 | virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { |
2398 | return RMW->isFloatingPointOperation() ? |
2399 | AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None; |
2400 | } |
2401 | |
2402 | /// Returns how the given atomic atomicrmw should be cast by the IR-level |
2403 | /// AtomicExpand pass. |
2404 | virtual AtomicExpansionKind |
2405 | shouldCastAtomicRMWIInIR(AtomicRMWInst *RMWI) const { |
2406 | if (RMWI->getOperation() == AtomicRMWInst::Xchg && |
2407 | (RMWI->getValOperand()->getType()->isFloatingPointTy() || |
2408 | RMWI->getValOperand()->getType()->isPointerTy())) |
2409 | return AtomicExpansionKind::CastToInteger; |
2410 | |
2411 | return AtomicExpansionKind::None; |
2412 | } |
2413 | |
2414 | /// On some platforms, an AtomicRMW that never actually modifies the value |
2415 | /// (such as fetch_add of 0) can be turned into a fence followed by an |
2416 | /// atomic load. This may sound useless, but it makes it possible for the |
2417 | /// processor to keep the cacheline shared, dramatically improving |
2418 | /// performance. And such idempotent RMWs are useful for implementing some |
2419 | /// kinds of locks, see for example (justification + benchmarks): |
2420 | /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf |
2421 | /// This method tries doing that transformation, returning the atomic load if |
2422 | /// it succeeds, and nullptr otherwise. |
2423 | /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo |
2424 | /// another round of expansion. |
2425 | virtual LoadInst * |
2426 | lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const { |
2427 | return nullptr; |
2428 | } |
2429 | |
2430 | /// Returns how the platform's atomic operations are extended (ZERO_EXTEND, |
2431 | /// SIGN_EXTEND, or ANY_EXTEND). |
2432 | virtual ISD::NodeType getExtendForAtomicOps() const { |
2433 | return ISD::ZERO_EXTEND; |
2434 | } |
2435 | |
2436 | /// Returns how the platform's atomic compare and swap expects its comparison |
2437 | /// value to be extended (ZERO_EXTEND, SIGN_EXTEND, or ANY_EXTEND). This is |
2438 | /// separate from getExtendForAtomicOps, which is concerned with the |
2439 | /// sign-extension of the instruction's output, whereas here we are concerned |
2440 | /// with the sign-extension of the input. For targets with compare-and-swap |
2441 | /// instructions (or sub-word comparisons in their LL/SC loop expansions), |
2442 | /// the input can be ANY_EXTEND, but the output will still have a specific |
2443 | /// extension. |
2444 | virtual ISD::NodeType getExtendForAtomicCmpSwapArg() const { |
2445 | return ISD::ANY_EXTEND; |
2446 | } |
2447 | |
2448 | /// @} |
2449 | |
2450 | /// Returns true if we should normalize |
2451 | /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and |
2452 | /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely |
2453 | /// that it saves us from materializing N0 and N1 in an integer register. |
2454 | /// Targets that are able to perform and/or on flags should return false here. |
2455 | virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, |
2456 | EVT VT) const { |
2457 | // If a target has multiple condition registers, then it likely has logical |
2458 | // operations on those registers. |
2459 | if (hasMultipleConditionRegisters()) |
2460 | return false; |
2461 | // Only do the transform if the value won't be split into multiple |
2462 | // registers. |
2463 | LegalizeTypeAction Action = getTypeAction(Context, VT); |
2464 | return Action != TypeExpandInteger && Action != TypeExpandFloat && |
2465 | Action != TypeSplitVector; |
2466 | } |
2467 | |
2468 | virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; } |
2469 | |
2470 | /// Return true if a select of constants (select Cond, C1, C2) should be |
2471 | /// transformed into simple math ops with the condition value. For example: |
2472 | /// select Cond, C1, C1-1 --> add (zext Cond), C1-1 |
2473 | virtual bool convertSelectOfConstantsToMath(EVT VT) const { |
2474 | return false; |
2475 | } |
2476 | |
2477 | /// Return true if it is profitable to transform an integer |
2478 | /// multiplication-by-constant into simpler operations like shifts and adds. |
2479 | /// This may be true if the target does not directly support the |
2480 | /// multiplication operation for the specified type or the sequence of simpler |
2481 | /// ops is faster than the multiply. |
2482 | virtual bool decomposeMulByConstant(LLVMContext &Context, |
2483 | EVT VT, SDValue C) const { |
2484 | return false; |
2485 | } |
2486 | |
2487 | /// Return true if it may be profitable to transform |
2488 | /// (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). |
2489 | /// This may not be true if c1 and c2 can be represented as immediates but |
2490 | /// c1*c2 cannot, for example. |
2491 | /// The target should check if c1, c2 and c1*c2 can be represented as |
2492 | /// immediates, or have to be materialized into registers. If it is not sure |
2493 | /// about some cases, a default true can be returned to let the DAGCombiner |
2494 | /// decide. |
2495 | /// AddNode is (add x, c1), and ConstNode is c2. |
2496 | virtual bool isMulAddWithConstProfitable(SDValue AddNode, |
2497 | SDValue ConstNode) const { |
2498 | return true; |
2499 | } |
2500 | |
2501 | /// Return true if it is more correct/profitable to use strict FP_TO_INT |
2502 | /// conversion operations - canonicalizing the FP source value instead of |
2503 | /// converting all cases and then selecting based on value. |
2504 | /// This may be true if the target throws exceptions for out of bounds |
2505 | /// conversions or has fast FP CMOV. |
2506 | virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, |
2507 | bool IsSigned) const { |
2508 | return false; |
2509 | } |
2510 | |
2511 | /// Return true if it is beneficial to expand an @llvm.powi.* intrinsic. |
2512 | /// If not optimizing for size, expanding @llvm.powi.* intrinsics is always |
2513 | /// considered beneficial. |
2514 | /// If optimizing for size, expansion is only considered beneficial for upto |
2515 | /// 5 multiplies and a divide (if the exponent is negative). |
2516 | bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const { |
2517 | if (Exponent < 0) |
2518 | Exponent = -Exponent; |
2519 | uint64_t E = static_cast<uint64_t>(Exponent); |
2520 | return !OptForSize || (llvm::popcount(Value: E) + Log2_64(Value: E) < 7); |
2521 | } |
2522 | |
2523 | //===--------------------------------------------------------------------===// |
2524 | // TargetLowering Configuration Methods - These methods should be invoked by |
2525 | // the derived class constructor to configure this object for the target. |
2526 | // |
2527 | protected: |
2528 | /// Specify how the target extends the result of integer and floating point |
2529 | /// boolean values from i1 to a wider type. See getBooleanContents. |
2530 | void setBooleanContents(BooleanContent Ty) { |
2531 | BooleanContents = Ty; |
2532 | BooleanFloatContents = Ty; |
2533 | } |
2534 | |
2535 | /// Specify how the target extends the result of integer and floating point |
2536 | /// boolean values from i1 to a wider type. See getBooleanContents. |
2537 | void setBooleanContents(BooleanContent IntTy, BooleanContent FloatTy) { |
2538 | BooleanContents = IntTy; |
2539 | BooleanFloatContents = FloatTy; |
2540 | } |
2541 | |
2542 | /// Specify how the target extends the result of a vector boolean value from a |
2543 | /// vector of i1 to a wider type. See getBooleanContents. |
2544 | void setBooleanVectorContents(BooleanContent Ty) { |
2545 | BooleanVectorContents = Ty; |
2546 | } |
2547 | |
2548 | /// Specify the target scheduling preference. |
2549 | void setSchedulingPreference(Sched::Preference Pref) { |
2550 | SchedPreferenceInfo = Pref; |
2551 | } |
2552 | |
2553 | /// Indicate the minimum number of blocks to generate jump tables. |
2554 | void setMinimumJumpTableEntries(unsigned Val); |
2555 | |
2556 | /// Indicate the maximum number of entries in jump tables. |
2557 | /// Set to zero to generate unlimited jump tables. |
2558 | void setMaximumJumpTableSize(unsigned); |
2559 | |
2560 | /// If set to a physical register, this specifies the register that |
2561 | /// llvm.savestack/llvm.restorestack should save and restore. |
2562 | void setStackPointerRegisterToSaveRestore(Register R) { |
2563 | StackPointerRegisterToSaveRestore = R; |
2564 | } |
2565 | |
2566 | /// Tells the code generator that the target has multiple (allocatable) |
2567 | /// condition registers that can be used to store the results of comparisons |
2568 | /// for use by selects and conditional branches. With multiple condition |
2569 | /// registers, the code generator will not aggressively sink comparisons into |
2570 | /// the blocks of their users. |
2571 | void setHasMultipleConditionRegisters(bool hasManyRegs = true) { |
2572 | HasMultipleConditionRegisters = hasManyRegs; |
2573 | } |
2574 | |
2575 | /// Tells the code generator that the target has BitExtract instructions. |
2576 | /// The code generator will aggressively sink "shift"s into the blocks of |
2577 | /// their users if the users will generate "and" instructions which can be |
2578 | /// combined with "shift" to BitExtract instructions. |
2579 | void setHasExtractBitsInsn(bool hasExtractInsn = true) { |
2580 | HasExtractBitsInsn = hasExtractInsn; |
2581 | } |
2582 | |
2583 | /// Tells the code generator not to expand logic operations on comparison |
2584 | /// predicates into separate sequences that increase the amount of flow |
2585 | /// control. |
2586 | void setJumpIsExpensive(bool isExpensive = true); |
2587 | |
2588 | /// Tells the code generator which bitwidths to bypass. |
2589 | void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) { |
2590 | BypassSlowDivWidths[SlowBitWidth] = FastBitWidth; |
2591 | } |
2592 | |
2593 | /// Add the specified register class as an available regclass for the |
2594 | /// specified value type. This indicates the selector can handle values of |
2595 | /// that class natively. |
2596 | void addRegisterClass(MVT VT, const TargetRegisterClass *RC) { |
2597 | assert((unsigned)VT.SimpleTy < std::size(RegClassForVT)); |
2598 | RegClassForVT[VT.SimpleTy] = RC; |
2599 | } |
2600 | |
2601 | /// Return the largest legal super-reg register class of the register class |
2602 | /// for the specified type and its associated "cost". |
2603 | virtual std::pair<const TargetRegisterClass *, uint8_t> |
2604 | findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const; |
2605 | |
2606 | /// Once all of the register classes are added, this allows us to compute |
2607 | /// derived properties we expose. |
2608 | void computeRegisterProperties(const TargetRegisterInfo *TRI); |
2609 | |
2610 | /// Indicate that the specified operation does not work with the specified |
2611 | /// type and indicate what to do about it. Note that VT may refer to either |
2612 | /// the type of a result or that of an operand of Op. |
2613 | void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action) { |
2614 | assert(Op < std::size(OpActions[0]) && "Table isn't big enough!"); |
2615 | OpActions[(unsigned)VT.SimpleTy][Op] = Action; |
2616 | } |
2617 | void setOperationAction(ArrayRef<unsigned> Ops, MVT VT, |
2618 | LegalizeAction Action) { |
2619 | for (auto Op : Ops) |
2620 | setOperationAction(Op, VT, Action); |
2621 | } |
2622 | void setOperationAction(ArrayRef<unsigned> Ops, ArrayRef<MVT> VTs, |
2623 | LegalizeAction Action) { |
2624 | for (auto VT : VTs) |
2625 | setOperationAction(Ops, VT, Action); |
2626 | } |
2627 | |
2628 | /// Indicate that the specified load with extension does not work with the |
2629 | /// specified type and indicate what to do about it. |
2630 | void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, |
2631 | LegalizeAction Action) { |
2632 | assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && |
2633 | MemVT.isValid() && "Table isn't big enough!"); |
2634 | assert((unsigned)Action < 0x10 && "too many bits for bitfield array"); |
2635 | unsigned Shift = 4 * ExtType; |
2636 | LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift); |
2637 | LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift; |
2638 | } |
2639 | void setLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT, MVT MemVT, |
2640 | LegalizeAction Action) { |
2641 | for (auto ExtType : ExtTypes) |
2642 | setLoadExtAction(ExtType, ValVT, MemVT, Action); |
2643 | } |
2644 | void setLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT, |
2645 | ArrayRef<MVT> MemVTs, LegalizeAction Action) { |
2646 | for (auto MemVT : MemVTs) |
2647 | setLoadExtAction(ExtTypes, ValVT, MemVT, Action); |
2648 | } |
2649 | |
2650 | /// Let target indicate that an extending atomic load of the specified type |
2651 | /// is legal. |
2652 | void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, |
2653 | LegalizeAction Action) { |
2654 | assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && |
2655 | MemVT.isValid() && "Table isn't big enough!"); |
2656 | assert((unsigned)Action < 0x10 && "too many bits for bitfield array"); |
2657 | unsigned Shift = 4 * ExtType; |
2658 | AtomicLoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= |
2659 | ~((uint16_t)0xF << Shift); |
2660 | AtomicLoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= |
2661 | ((uint16_t)Action << Shift); |
2662 | } |
2663 | void setAtomicLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT, MVT MemVT, |
2664 | LegalizeAction Action) { |
2665 | for (auto ExtType : ExtTypes) |
2666 | setAtomicLoadExtAction(ExtType, ValVT, MemVT, Action); |
2667 | } |
2668 | void setAtomicLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT, |
2669 | ArrayRef<MVT> MemVTs, LegalizeAction Action) { |
2670 | for (auto MemVT : MemVTs) |
2671 | setAtomicLoadExtAction(ExtTypes, ValVT, MemVT, Action); |
2672 | } |
2673 | |
2674 | /// Indicate that the specified truncating store does not work with the |
2675 | /// specified type and indicate what to do about it. |
2676 | void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action) { |
2677 | assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!"); |
2678 | TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action; |
2679 | } |
2680 | |
2681 | /// Indicate that the specified indexed load does or does not work with the |
2682 | /// specified type and indicate what to do abort it. |
2683 | /// |
2684 | /// NOTE: All indexed mode loads are initialized to Expand in |
2685 | /// TargetLowering.cpp |
2686 | void setIndexedLoadAction(ArrayRef<unsigned> IdxModes, MVT VT, |
2687 | LegalizeAction Action) { |
2688 | for (auto IdxMode : IdxModes) |
2689 | setIndexedModeAction(IdxMode, VT, Shift: IMAB_Load, Action); |
2690 | } |
2691 | |
2692 | void setIndexedLoadAction(ArrayRef<unsigned> IdxModes, ArrayRef<MVT> VTs, |
2693 | LegalizeAction Action) { |
2694 | for (auto VT : VTs) |
2695 | setIndexedLoadAction(IdxModes, VT, Action); |
2696 | } |
2697 | |
2698 | /// Indicate that the specified indexed store does or does not work with the |
2699 | /// specified type and indicate what to do about it. |
2700 | /// |
2701 | /// NOTE: All indexed mode stores are initialized to Expand in |
2702 | /// TargetLowering.cpp |
2703 | void setIndexedStoreAction(ArrayRef<unsigned> IdxModes, MVT VT, |
2704 | LegalizeAction Action) { |
2705 | for (auto IdxMode : IdxModes) |
2706 | setIndexedModeAction(IdxMode, VT, Shift: IMAB_Store, Action); |
2707 | } |
2708 | |
2709 | void setIndexedStoreAction(ArrayRef<unsigned> IdxModes, ArrayRef<MVT> VTs, |
2710 | LegalizeAction Action) { |
2711 | for (auto VT : VTs) |
2712 | setIndexedStoreAction(IdxModes, VT, Action); |
2713 | } |
2714 | |
2715 | /// Indicate that the specified indexed masked load does or does not work with |
2716 | /// the specified type and indicate what to do about it. |
2717 | /// |
2718 | /// NOTE: All indexed mode masked loads are initialized to Expand in |
2719 | /// TargetLowering.cpp |
2720 | void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT, |
2721 | LegalizeAction Action) { |
2722 | setIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedLoad, Action); |
2723 | } |
2724 | |
2725 | /// Indicate that the specified indexed masked store does or does not work |
2726 | /// with the specified type and indicate what to do about it. |
2727 | /// |
2728 | /// NOTE: All indexed mode masked stores are initialized to Expand in |
2729 | /// TargetLowering.cpp |
2730 | void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT, |
2731 | LegalizeAction Action) { |
2732 | setIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedStore, Action); |
2733 | } |
2734 | |
2735 | /// Indicate that the specified condition code is or isn't supported on the |
2736 | /// target and indicate what to do about it. |
2737 | void setCondCodeAction(ArrayRef<ISD::CondCode> CCs, MVT VT, |
2738 | LegalizeAction Action) { |
2739 | for (auto CC : CCs) { |
2740 | assert(VT.isValid() && (unsigned)CC < std::size(CondCodeActions) && |
2741 | "Table isn't big enough!"); |
2742 | assert((unsigned)Action < 0x10 && "too many bits for bitfield array"); |
2743 | /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the |
2744 | /// 32-bit value and the upper 29 bits index into the second dimension of |
2745 | /// the array to select what 32-bit value to use. |
2746 | uint32_t Shift = 4 * (VT.SimpleTy & 0x7); |
2747 | CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift); |
2748 | CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift; |
2749 | } |
2750 | } |
2751 | void setCondCodeAction(ArrayRef<ISD::CondCode> CCs, ArrayRef<MVT> VTs, |
2752 | LegalizeAction Action) { |
2753 | for (auto VT : VTs) |
2754 | setCondCodeAction(CCs, VT, Action); |
2755 | } |
2756 | |
2757 | /// Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input |
2758 | /// type InputVT should be treated by the target. Either it's legal, needs to |
2759 | /// be promoted to a larger size, needs to be expanded to some other code |
2760 | /// sequence, or the target has a custom expander for it. |
2761 | void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, |
2762 | LegalizeAction Action) { |
2763 | assert(Opc == ISD::PARTIAL_REDUCE_SMLA || Opc == ISD::PARTIAL_REDUCE_UMLA || |
2764 | Opc == ISD::PARTIAL_REDUCE_SUMLA); |
2765 | assert(AccVT.isValid() && InputVT.isValid() && |
2766 | "setPartialReduceMLAAction types aren't valid"); |
2767 | PartialReduceActionTypes Key = {Opc, AccVT.SimpleTy, InputVT.SimpleTy}; |
2768 | PartialReduceMLAActions[Key] = Action; |
2769 | } |
2770 | void setPartialReduceMLAAction(ArrayRef<unsigned> Opcodes, MVT AccVT, |
2771 | MVT InputVT, LegalizeAction Action) { |
2772 | for (unsigned Opc : Opcodes) |
2773 | setPartialReduceMLAAction(Opc, AccVT, InputVT, Action); |
2774 | } |
2775 | |
2776 | /// If Opc/OrigVT is specified as being promoted, the promotion code defaults |
2777 | /// to trying a larger integer/fp until it can find one that works. If that |
2778 | /// default is insufficient, this method can be used by the target to override |
2779 | /// the default. |
2780 | void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { |
2781 | PromoteToType[std::make_pair(x&: Opc, y&: OrigVT.SimpleTy)] = DestVT.SimpleTy; |
2782 | } |
2783 | |
2784 | /// Convenience method to set an operation to Promote and specify the type |
2785 | /// in a single call. |
2786 | void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { |
2787 | setOperationAction(Op: Opc, VT: OrigVT, Action: Promote); |
2788 | AddPromotedToType(Opc, OrigVT, DestVT); |
2789 | } |
2790 | void setOperationPromotedToType(ArrayRef<unsigned> Ops, MVT OrigVT, |
2791 | MVT DestVT) { |
2792 | for (auto Op : Ops) { |
2793 | setOperationAction(Op, VT: OrigVT, Action: Promote); |
2794 | AddPromotedToType(Opc: Op, OrigVT, DestVT); |
2795 | } |
2796 | } |
2797 | |
2798 | /// Targets should invoke this method for each target independent node that |
2799 | /// they want to provide a custom DAG combiner for by implementing the |
2800 | /// PerformDAGCombine virtual method. |
2801 | void setTargetDAGCombine(ArrayRef<ISD::NodeType> NTs) { |
2802 | for (auto NT : NTs) { |
2803 | assert(unsigned(NT >> 3) < std::size(TargetDAGCombineArray)); |
2804 | TargetDAGCombineArray[NT >> 3] |= 1 << (NT & 7); |
2805 | } |
2806 | } |
2807 | |
2808 | /// Set the target's minimum function alignment. |
2809 | void setMinFunctionAlignment(Align Alignment) { |
2810 | MinFunctionAlignment = Alignment; |
2811 | } |
2812 | |
2813 | /// Set the target's preferred function alignment. This should be set if |
2814 | /// there is a performance benefit to higher-than-minimum alignment |
2815 | void setPrefFunctionAlignment(Align Alignment) { |
2816 | PrefFunctionAlignment = Alignment; |
2817 | } |
2818 | |
2819 | /// Set the target's preferred loop alignment. Default alignment is one, it |
2820 | /// means the target does not care about loop alignment. The target may also |
2821 | /// override getPrefLoopAlignment to provide per-loop values. |
2822 | void setPrefLoopAlignment(Align Alignment) { PrefLoopAlignment = Alignment; } |
2823 | void setMaxBytesForAlignment(unsigned MaxBytes) { |
2824 | MaxBytesForAlignment = MaxBytes; |
2825 | } |
2826 | |
2827 | /// Set the minimum stack alignment of an argument. |
2828 | void setMinStackArgumentAlignment(Align Alignment) { |
2829 | MinStackArgumentAlignment = Alignment; |
2830 | } |
2831 | |
2832 | /// Set the maximum atomic operation size supported by the |
2833 | /// backend. Atomic operations greater than this size (as well as |
2834 | /// ones that are not naturally aligned), will be expanded by |
2835 | /// AtomicExpandPass into an __atomic_* library call. |
2836 | void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits) { |
2837 | MaxAtomicSizeInBitsSupported = SizeInBits; |
2838 | } |
2839 | |
2840 | /// Set the size in bits of the maximum div/rem the backend supports. |
2841 | /// Larger operations will be expanded by ExpandLargeDivRem. |
2842 | void setMaxDivRemBitWidthSupported(unsigned SizeInBits) { |
2843 | MaxDivRemBitWidthSupported = SizeInBits; |
2844 | } |
2845 | |
2846 | /// Set the size in bits of the maximum fp to/from int conversion the backend |
2847 | /// supports. Larger operations will be expanded by ExpandFp. |
2848 | void setMaxLargeFPConvertBitWidthSupported(unsigned SizeInBits) { |
2849 | MaxLargeFPConvertBitWidthSupported = SizeInBits; |
2850 | } |
2851 | |
2852 | /// Sets the minimum cmpxchg or ll/sc size supported by the backend. |
2853 | void setMinCmpXchgSizeInBits(unsigned SizeInBits) { |
2854 | MinCmpXchgSizeInBits = SizeInBits; |
2855 | } |
2856 | |
2857 | /// Sets whether unaligned atomic operations are supported. |
2858 | void setSupportsUnalignedAtomics(bool UnalignedSupported) { |
2859 | SupportsUnalignedAtomics = UnalignedSupported; |
2860 | } |
2861 | |
2862 | public: |
2863 | //===--------------------------------------------------------------------===// |
2864 | // Addressing mode description hooks (used by LSR etc). |
2865 | // |
2866 | |
2867 | /// CodeGenPrepare sinks address calculations into the same BB as Load/Store |
2868 | /// instructions reading the address. This allows as much computation as |
2869 | /// possible to be done in the address mode for that operand. This hook lets |
2870 | /// targets also pass back when this should be done on intrinsics which |
2871 | /// load/store. |
2872 | virtual bool getAddrModeArguments(const IntrinsicInst * /*I*/, |
2873 | SmallVectorImpl<Value *> & /*Ops*/, |
2874 | Type *& /*AccessTy*/) const { |
2875 | return false; |
2876 | } |
2877 | |
2878 | /// This represents an addressing mode of: |
2879 | /// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*vscale |
2880 | /// If BaseGV is null, there is no BaseGV. |
2881 | /// If BaseOffs is zero, there is no base offset. |
2882 | /// If HasBaseReg is false, there is no base register. |
2883 | /// If Scale is zero, there is no ScaleReg. Scale of 1 indicates a reg with |
2884 | /// no scale. |
2885 | /// If ScalableOffset is zero, there is no scalable offset. |
2886 | struct AddrMode { |
2887 | GlobalValue *BaseGV = nullptr; |
2888 | int64_t BaseOffs = 0; |
2889 | bool HasBaseReg = false; |
2890 | int64_t Scale = 0; |
2891 | int64_t ScalableOffset = 0; |
2892 | AddrMode() = default; |
2893 | }; |
2894 | |
2895 | /// Return true if the addressing mode represented by AM is legal for this |
2896 | /// target, for a load/store of the specified type. |
2897 | /// |
2898 | /// The type may be VoidTy, in which case only return true if the addressing |
2899 | /// mode is legal for a load/store of any legal type. TODO: Handle |
2900 | /// pre/postinc as well. |
2901 | /// |
2902 | /// If the address space cannot be determined, it will be -1. |
2903 | /// |
2904 | /// TODO: Remove default argument |
2905 | virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, |
2906 | Type *Ty, unsigned AddrSpace, |
2907 | Instruction *I = nullptr) const; |
2908 | |
2909 | /// Returns true if the targets addressing mode can target thread local |
2910 | /// storage (TLS). |
2911 | virtual bool addressingModeSupportsTLS(const GlobalValue &) const { |
2912 | return false; |
2913 | } |
2914 | |
2915 | /// Return the prefered common base offset. |
2916 | virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, |
2917 | int64_t MaxOffset) const { |
2918 | return 0; |
2919 | } |
2920 | |
2921 | /// Return true if the specified immediate is legal icmp immediate, that is |
2922 | /// the target has icmp instructions which can compare a register against the |
2923 | /// immediate without having to materialize the immediate into a register. |
2924 | virtual bool isLegalICmpImmediate(int64_t) const { |
2925 | return true; |
2926 | } |
2927 | |
2928 | /// Return true if the specified immediate is legal add immediate, that is the |
2929 | /// target has add instructions which can add a register with the immediate |
2930 | /// without having to materialize the immediate into a register. |
2931 | virtual bool isLegalAddImmediate(int64_t) const { |
2932 | return true; |
2933 | } |
2934 | |
2935 | /// Return true if adding the specified scalable immediate is legal, that is |
2936 | /// the target has add instructions which can add a register with the |
2937 | /// immediate (multiplied by vscale) without having to materialize the |
2938 | /// immediate into a register. |
2939 | virtual bool isLegalAddScalableImmediate(int64_t) const { return false; } |
2940 | |
2941 | /// Return true if the specified immediate is legal for the value input of a |
2942 | /// store instruction. |
2943 | virtual bool isLegalStoreImmediate(int64_t Value) const { |
2944 | // Default implementation assumes that at least 0 works since it is likely |
2945 | // that a zero register exists or a zero immediate is allowed. |
2946 | return Value == 0; |
2947 | } |
2948 | |
2949 | /// Given a shuffle vector SVI representing a vector splat, return a new |
2950 | /// scalar type of size equal to SVI's scalar type if the new type is more |
2951 | /// profitable. Returns nullptr otherwise. For example under MVE float splats |
2952 | /// are converted to integer to prevent the need to move from SPR to GPR |
2953 | /// registers. |
2954 | virtual Type* shouldConvertSplatType(ShuffleVectorInst* SVI) const { |
2955 | return nullptr; |
2956 | } |
2957 | |
2958 | /// Given a set in interconnected phis of type 'From' that are loaded/stored |
2959 | /// or bitcast to type 'To', return true if the set should be converted to |
2960 | /// 'To'. |
2961 | virtual bool shouldConvertPhiType(Type *From, Type *To) const { |
2962 | return (From->isIntegerTy() || From->isFloatingPointTy()) && |
2963 | (To->isIntegerTy() || To->isFloatingPointTy()); |
2964 | } |
2965 | |
2966 | /// Returns true if the opcode is a commutative binary operation. |
2967 | virtual bool isCommutativeBinOp(unsigned Opcode) const { |
2968 | // FIXME: This should get its info from the td file. |
2969 | switch (Opcode) { |
2970 | case ISD::ADD: |
2971 | case ISD::SMIN: |
2972 | case ISD::SMAX: |
2973 | case ISD::UMIN: |
2974 | case ISD::UMAX: |
2975 | case ISD::MUL: |
2976 | case ISD::MULHU: |
2977 | case ISD::MULHS: |
2978 | case ISD::SMUL_LOHI: |
2979 | case ISD::UMUL_LOHI: |
2980 | case ISD::FADD: |
2981 | case ISD::FMUL: |
2982 | case ISD::AND: |
2983 | case ISD::OR: |
2984 | case ISD::XOR: |
2985 | case ISD::SADDO: |
2986 | case ISD::UADDO: |
2987 | case ISD::ADDC: |
2988 | case ISD::ADDE: |
2989 | case ISD::SADDSAT: |
2990 | case ISD::UADDSAT: |
2991 | case ISD::FMINNUM: |
2992 | case ISD::FMAXNUM: |
2993 | case ISD::FMINNUM_IEEE: |
2994 | case ISD::FMAXNUM_IEEE: |
2995 | case ISD::FMINIMUM: |
2996 | case ISD::FMAXIMUM: |
2997 | case ISD::FMINIMUMNUM: |
2998 | case ISD::FMAXIMUMNUM: |
2999 | case ISD::AVGFLOORS: |
3000 | case ISD::AVGFLOORU: |
3001 | case ISD::AVGCEILS: |
3002 | case ISD::AVGCEILU: |
3003 | case ISD::ABDS: |
3004 | case ISD::ABDU: |
3005 | return true; |
3006 | default: return false; |
3007 | } |
3008 | } |
3009 | |
3010 | /// Return true if the node is a math/logic binary operator. |
3011 | virtual bool isBinOp(unsigned Opcode) const { |
3012 | // A commutative binop must be a binop. |
3013 | if (isCommutativeBinOp(Opcode)) |
3014 | return true; |
3015 | // These are non-commutative binops. |
3016 | switch (Opcode) { |
3017 | case ISD::SUB: |
3018 | case ISD::SHL: |
3019 | case ISD::SRL: |
3020 | case ISD::SRA: |
3021 | case ISD::ROTL: |
3022 | case ISD::ROTR: |
3023 | case ISD::SDIV: |
3024 | case ISD::UDIV: |
3025 | case ISD::SREM: |
3026 | case ISD::UREM: |
3027 | case ISD::SSUBSAT: |
3028 | case ISD::USUBSAT: |
3029 | case ISD::FSUB: |
3030 | case ISD::FDIV: |
3031 | case ISD::FREM: |
3032 | return true; |
3033 | default: |
3034 | return false; |
3035 | } |
3036 | } |
3037 | |
3038 | /// Return true if it's free to truncate a value of type FromTy to type |
3039 | /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 |
3040 | /// by referencing its sub-register AX. |
3041 | /// Targets must return false when FromTy <= ToTy. |
3042 | virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const { |
3043 | return false; |
3044 | } |
3045 | |
3046 | /// Return true if a truncation from FromTy to ToTy is permitted when deciding |
3047 | /// whether a call is in tail position. Typically this means that both results |
3048 | /// would be assigned to the same register or stack slot, but it could mean |
3049 | /// the target performs adequate checks of its own before proceeding with the |
3050 | /// tail call. Targets must return false when FromTy <= ToTy. |
3051 | virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const { |
3052 | return false; |
3053 | } |
3054 | |
3055 | virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const { return false; } |
3056 | virtual bool isTruncateFree(LLT FromTy, LLT ToTy, LLVMContext &Ctx) const { |
3057 | return isTruncateFree(FromVT: getApproximateEVTForLLT(Ty: FromTy, Ctx), |
3058 | ToVT: getApproximateEVTForLLT(Ty: ToTy, Ctx)); |
3059 | } |
3060 | |
3061 | /// Return true if truncating the specific node Val to type VT2 is free. |
3062 | virtual bool isTruncateFree(SDValue Val, EVT VT2) const { |
3063 | // Fallback to type matching. |
3064 | return isTruncateFree(FromVT: Val.getValueType(), ToVT: VT2); |
3065 | } |
3066 | |
3067 | virtual bool isProfitableToHoist(Instruction *I) const { return true; } |
3068 | |
3069 | /// Return true if the extension represented by \p I is free. |
3070 | /// Unlikely the is[Z|FP]ExtFree family which is based on types, |
3071 | /// this method can use the context provided by \p I to decide |
3072 | /// whether or not \p I is free. |
3073 | /// This method extends the behavior of the is[Z|FP]ExtFree family. |
3074 | /// In other words, if is[Z|FP]Free returns true, then this method |
3075 | /// returns true as well. The converse is not true. |
3076 | /// The target can perform the adequate checks by overriding isExtFreeImpl. |
3077 | /// \pre \p I must be a sign, zero, or fp extension. |
3078 | bool isExtFree(const Instruction *I) const { |
3079 | switch (I->getOpcode()) { |
3080 | case Instruction::FPExt: |
3081 | if (isFPExtFree(DestVT: EVT::getEVT(Ty: I->getType()), |
3082 | SrcVT: EVT::getEVT(Ty: I->getOperand(i: 0)->getType()))) |
3083 | return true; |
3084 | break; |
3085 | case Instruction::ZExt: |
3086 | if (isZExtFree(FromTy: I->getOperand(i: 0)->getType(), ToTy: I->getType())) |
3087 | return true; |
3088 | break; |
3089 | case Instruction::SExt: |
3090 | break; |
3091 | default: |
3092 | llvm_unreachable("Instruction is not an extension"); |
3093 | } |
3094 | return isExtFreeImpl(I); |
3095 | } |
3096 | |
3097 | /// Return true if \p Load and \p Ext can form an ExtLoad. |
3098 | /// For example, in AArch64 |
3099 | /// %L = load i8, i8* %ptr |
3100 | /// %E = zext i8 %L to i32 |
3101 | /// can be lowered into one load instruction |
3102 | /// ldrb w0, [x0] |
3103 | bool isExtLoad(const LoadInst *Load, const Instruction *Ext, |
3104 | const DataLayout &DL) const { |
3105 | EVT VT = getValueType(DL, Ty: Ext->getType()); |
3106 | EVT LoadVT = getValueType(DL, Ty: Load->getType()); |
3107 | |
3108 | // If the load has other users and the truncate is not free, the ext |
3109 | // probably isn't free. |
3110 | if (!Load->hasOneUse() && (isTypeLegal(VT: LoadVT) || !isTypeLegal(VT)) && |
3111 | !isTruncateFree(FromTy: Ext->getType(), ToTy: Load->getType())) |
3112 | return false; |
3113 | |
3114 | // Check whether the target supports casts folded into loads. |
3115 | unsigned LType; |
3116 | if (isa<ZExtInst>(Val: Ext)) |
3117 | LType = ISD::ZEXTLOAD; |
3118 | else { |
3119 | assert(isa<SExtInst>(Ext) && "Unexpected ext type!"); |
3120 | LType = ISD::SEXTLOAD; |
3121 | } |
3122 | |
3123 | return isLoadExtLegal(ExtType: LType, ValVT: VT, MemVT: LoadVT); |
3124 | } |
3125 | |
3126 | /// Return true if any actual instruction that defines a value of type FromTy |
3127 | /// implicitly zero-extends the value to ToTy in the result register. |
3128 | /// |
3129 | /// The function should return true when it is likely that the truncate can |
3130 | /// be freely folded with an instruction defining a value of FromTy. If |
3131 | /// the defining instruction is unknown (because you're looking at a |
3132 | /// function argument, PHI, etc.) then the target may require an |
3133 | /// explicit truncate, which is not necessarily free, but this function |
3134 | /// does not deal with those cases. |
3135 | /// Targets must return false when FromTy >= ToTy. |
3136 | virtual bool isZExtFree(Type *FromTy, Type *ToTy) const { |
3137 | return false; |
3138 | } |
3139 | |
3140 | virtual bool isZExtFree(EVT FromTy, EVT ToTy) const { return false; } |
3141 | virtual bool isZExtFree(LLT FromTy, LLT ToTy, LLVMContext &Ctx) const { |
3142 | return isZExtFree(FromTy: getApproximateEVTForLLT(Ty: FromTy, Ctx), |
3143 | ToTy: getApproximateEVTForLLT(Ty: ToTy, Ctx)); |
3144 | } |
3145 | |
3146 | /// Return true if zero-extending the specific node Val to type VT2 is free |
3147 | /// (either because it's implicitly zero-extended such as ARM ldrb / ldrh or |
3148 | /// because it's folded such as X86 zero-extending loads). |
3149 | virtual bool isZExtFree(SDValue Val, EVT VT2) const { |
3150 | return isZExtFree(FromTy: Val.getValueType(), ToTy: VT2); |
3151 | } |
3152 | |
3153 | /// Return true if sign-extension from FromTy to ToTy is cheaper than |
3154 | /// zero-extension. |
3155 | virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const { |
3156 | return false; |
3157 | } |
3158 | |
3159 | /// Return true if this constant should be sign extended when promoting to |
3160 | /// a larger type. |
3161 | virtual bool signExtendConstant(const ConstantInt *C) const { return false; } |
3162 | |
3163 | /// Try to optimize extending or truncating conversion instructions (like |
3164 | /// zext, trunc, fptoui, uitofp) for the target. |
3165 | virtual bool |
3166 | optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, |
3167 | const TargetTransformInfo &TTI) const { |
3168 | return false; |
3169 | } |
3170 | |
3171 | /// Return true if the target supplies and combines to a paired load |
3172 | /// two loaded values of type LoadedType next to each other in memory. |
3173 | /// RequiredAlignment gives the minimal alignment constraints that must be met |
3174 | /// to be able to select this paired load. |
3175 | /// |
3176 | /// This information is *not* used to generate actual paired loads, but it is |
3177 | /// used to generate a sequence of loads that is easier to combine into a |
3178 | /// paired load. |
3179 | /// For instance, something like this: |
3180 | /// a = load i64* addr |
3181 | /// b = trunc i64 a to i32 |
3182 | /// c = lshr i64 a, 32 |
3183 | /// d = trunc i64 c to i32 |
3184 | /// will be optimized into: |
3185 | /// b = load i32* addr1 |
3186 | /// d = load i32* addr2 |
3187 | /// Where addr1 = addr2 +/- sizeof(i32). |
3188 | /// |
3189 | /// In other words, unless the target performs a post-isel load combining, |
3190 | /// this information should not be provided because it will generate more |
3191 | /// loads. |
3192 | virtual bool hasPairedLoad(EVT /*LoadedType*/, |
3193 | Align & /*RequiredAlignment*/) const { |
3194 | return false; |
3195 | } |
3196 | |
3197 | /// Return true if the target has a vector blend instruction. |
3198 | virtual bool hasVectorBlend() const { return false; } |
3199 | |
3200 | /// Get the maximum supported factor for interleaved memory accesses. |
3201 | /// Default to be the minimum interleave factor: 2. |
3202 | virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; } |
3203 | |
3204 | /// Lower an interleaved load to target specific intrinsics. Return |
3205 | /// true on success. |
3206 | /// |
3207 | /// \p LI is the vector load instruction. |
3208 | /// \p Shuffles is the shufflevector list to DE-interleave the loaded vector. |
3209 | /// \p Indices is the corresponding indices for each shufflevector. |
3210 | /// \p Factor is the interleave factor. |
3211 | virtual bool lowerInterleavedLoad(LoadInst *LI, |
3212 | ArrayRef<ShuffleVectorInst *> Shuffles, |
3213 | ArrayRef<unsigned> Indices, |
3214 | unsigned Factor) const { |
3215 | return false; |
3216 | } |
3217 | |
3218 | /// Lower an interleaved store to target specific intrinsics. Return |
3219 | /// true on success. |
3220 | /// |
3221 | /// \p SI is the vector store instruction. |
3222 | /// \p SVI is the shufflevector to RE-interleave the stored vector. |
3223 | /// \p Factor is the interleave factor. |
3224 | virtual bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, |
3225 | unsigned Factor) const { |
3226 | return false; |
3227 | } |
3228 | |
3229 | /// Lower an interleaved load to target specific intrinsics. Return |
3230 | /// true on success. |
3231 | /// |
3232 | /// \p Load is a vp.load instruction. |
3233 | /// \p Mask is a mask value |
3234 | /// \p DeinterleaveRes is a list of deinterleaved results. |
3235 | virtual bool lowerInterleavedVPLoad(VPIntrinsic *Load, Value *Mask, |
3236 | ArrayRef<Value *> DeinterleaveRes) const { |
3237 | return false; |
3238 | } |
3239 | |
3240 | /// Lower an interleaved store to target specific intrinsics. Return |
3241 | /// true on success. |
3242 | /// |
3243 | /// \p Store is the vp.store instruction. |
3244 | /// \p Mask is a mask value |
3245 | /// \p InterleaveOps is a list of values being interleaved. |
3246 | virtual bool lowerInterleavedVPStore(VPIntrinsic *Store, Value *Mask, |
3247 | ArrayRef<Value *> InterleaveOps) const { |
3248 | return false; |
3249 | } |
3250 | |
3251 | /// Lower a deinterleave intrinsic to a target specific load intrinsic. |
3252 | /// Return true on success. Currently only supports |
3253 | /// llvm.vector.deinterleave{2,3,5,7} |
3254 | /// |
3255 | /// \p LI is the accompanying load instruction. |
3256 | /// \p DeinterleaveValues contains the deinterleaved values. |
3257 | virtual bool |
3258 | lowerDeinterleaveIntrinsicToLoad(LoadInst *LI, |
3259 | ArrayRef<Value *> DeinterleaveValues) const { |
3260 | return false; |
3261 | } |
3262 | |
3263 | /// Lower an interleave intrinsic to a target specific store intrinsic. |
3264 | /// Return true on success. Currently only supports |
3265 | /// llvm.vector.interleave{2,3,5,7} |
3266 | /// |
3267 | /// \p SI is the accompanying store instruction |
3268 | /// \p InterleaveValues contains the interleaved values. |
3269 | virtual bool |
3270 | lowerInterleaveIntrinsicToStore(StoreInst *SI, |
3271 | ArrayRef<Value *> InterleaveValues) const { |
3272 | return false; |
3273 | } |
3274 | |
3275 | /// Return true if an fpext operation is free (for instance, because |
3276 | /// single-precision floating-point numbers are implicitly extended to |
3277 | /// double-precision). |
3278 | virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const { |
3279 | assert(SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && |
3280 | "invalid fpext types"); |
3281 | return false; |
3282 | } |
3283 | |
3284 | /// Return true if an fpext operation input to an \p Opcode operation is free |
3285 | /// (for instance, because half-precision floating-point numbers are |
3286 | /// implicitly extended to float-precision) for an FMA instruction. |
3287 | virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, |
3288 | LLT DestTy, LLT SrcTy) const { |
3289 | return false; |
3290 | } |
3291 | |
3292 | /// Return true if an fpext operation input to an \p Opcode operation is free |
3293 | /// (for instance, because half-precision floating-point numbers are |
3294 | /// implicitly extended to float-precision) for an FMA instruction. |
3295 | virtual bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode, |
3296 | EVT DestVT, EVT SrcVT) const { |
3297 | assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && |
3298 | "invalid fpext types"); |
3299 | return isFPExtFree(DestVT, SrcVT); |
3300 | } |
3301 | |
3302 | /// Return true if folding a vector load into ExtVal (a sign, zero, or any |
3303 | /// extend node) is profitable. |
3304 | virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const { return false; } |
3305 | |
3306 | /// Return true if an fneg operation is free to the point where it is never |
3307 | /// worthwhile to replace it with a bitwise operation. |
3308 | virtual bool isFNegFree(EVT VT) const { |
3309 | assert(VT.isFloatingPoint()); |
3310 | return false; |
3311 | } |
3312 | |
3313 | /// Return true if an fabs operation is free to the point where it is never |
3314 | /// worthwhile to replace it with a bitwise operation. |
3315 | virtual bool isFAbsFree(EVT VT) const { |
3316 | assert(VT.isFloatingPoint()); |
3317 | return false; |
3318 | } |
3319 | |
3320 | /// Return true if an FMA operation is faster than a pair of fmul and fadd |
3321 | /// instructions. fmuladd intrinsics will be expanded to FMAs when this method |
3322 | /// returns true, otherwise fmuladd is expanded to fmul + fadd. |
3323 | /// |
3324 | /// NOTE: This may be called before legalization on types for which FMAs are |
3325 | /// not legal, but should return true if those types will eventually legalize |
3326 | /// to types that support FMAs. After legalization, it will only be called on |
3327 | /// types that support FMAs (via Legal or Custom actions) |
3328 | /// |
3329 | /// Targets that care about soft float support should return false when soft |
3330 | /// float code is being generated (i.e. use-soft-float). |
3331 | virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
3332 | EVT) const { |
3333 | return false; |
3334 | } |
3335 | |
3336 | /// Return true if an FMA operation is faster than a pair of fmul and fadd |
3337 | /// instructions. fmuladd intrinsics will be expanded to FMAs when this method |
3338 | /// returns true, otherwise fmuladd is expanded to fmul + fadd. |
3339 | /// |
3340 | /// NOTE: This may be called before legalization on types for which FMAs are |
3341 | /// not legal, but should return true if those types will eventually legalize |
3342 | /// to types that support FMAs. After legalization, it will only be called on |
3343 | /// types that support FMAs (via Legal or Custom actions) |
3344 | virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
3345 | LLT) const { |
3346 | return false; |
3347 | } |
3348 | |
3349 | /// IR version |
3350 | virtual bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *) const { |
3351 | return false; |
3352 | } |
3353 | |
3354 | /// Returns true if \p MI can be combined with another instruction to |
3355 | /// form TargetOpcode::G_FMAD. \p N may be an TargetOpcode::G_FADD, |
3356 | /// TargetOpcode::G_FSUB, or an TargetOpcode::G_FMUL which will be |
3357 | /// distributed into an fadd/fsub. |
3358 | virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const { |
3359 | assert((MI.getOpcode() == TargetOpcode::G_FADD || |
3360 | MI.getOpcode() == TargetOpcode::G_FSUB || |
3361 | MI.getOpcode() == TargetOpcode::G_FMUL) && |
3362 | "unexpected node in FMAD forming combine"); |
3363 | switch (Ty.getScalarSizeInBits()) { |
3364 | case 16: |
3365 | return isOperationLegal(TargetOpcode::G_FMAD, MVT::f16); |
3366 | case 32: |
3367 | return isOperationLegal(TargetOpcode::G_FMAD, MVT::f32); |
3368 | case 64: |
3369 | return isOperationLegal(TargetOpcode::G_FMAD, MVT::f64); |
3370 | default: |
3371 | break; |
3372 | } |
3373 | |
3374 | return false; |
3375 | } |
3376 | |
3377 | /// Returns true if be combined with to form an ISD::FMAD. \p N may be an |
3378 | /// ISD::FADD, ISD::FSUB, or an ISD::FMUL which will be distributed into an |
3379 | /// fadd/fsub. |
3380 | virtual bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const { |
3381 | assert((N->getOpcode() == ISD::FADD || N->getOpcode() == ISD::FSUB || |
3382 | N->getOpcode() == ISD::FMUL) && |
3383 | "unexpected node in FMAD forming combine"); |
3384 | return isOperationLegal(Op: ISD::FMAD, VT: N->getValueType(ResNo: 0)); |
3385 | } |
3386 | |
3387 | // Return true when the decision to generate FMA's (or FMS, FMLA etc) rather |
3388 | // than FMUL and ADD is delegated to the machine combiner. |
3389 | virtual bool generateFMAsInMachineCombiner(EVT VT, |
3390 | CodeGenOptLevel OptLevel) const { |
3391 | return false; |
3392 | } |
3393 | |
3394 | /// Return true if it's profitable to narrow operations of type SrcVT to |
3395 | /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not from |
3396 | /// i32 to i16. |
3397 | virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const { |
3398 | return false; |
3399 | } |
3400 | |
3401 | /// Return true if pulling a binary operation into a select with an identity |
3402 | /// constant is profitable. This is the inverse of an IR transform. |
3403 | /// Example: X + (Cond ? Y : 0) --> Cond ? (X + Y) : X |
3404 | virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, |
3405 | unsigned SelectOpcode, |
3406 | SDValue X, |
3407 | SDValue Y) const { |
3408 | return false; |
3409 | } |
3410 | |
3411 | /// Return true if it is beneficial to convert a load of a constant to |
3412 | /// just the constant itself. |
3413 | /// On some targets it might be more efficient to use a combination of |
3414 | /// arithmetic instructions to materialize the constant instead of loading it |
3415 | /// from a constant pool. |
3416 | virtual bool shouldConvertConstantLoadToIntImm(const APInt &Imm, |
3417 | Type *Ty) const { |
3418 | return false; |
3419 | } |
3420 | |
3421 | /// Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type |
3422 | /// from this source type with this index. This is needed because |
3423 | /// EXTRACT_SUBVECTOR usually has custom lowering that depends on the index of |
3424 | /// the first element, and only the target knows which lowering is cheap. |
3425 | virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, |
3426 | unsigned Index) const { |
3427 | return false; |
3428 | } |
3429 | |
3430 | /// Try to convert an extract element of a vector binary operation into an |
3431 | /// extract element followed by a scalar operation. |
3432 | virtual bool shouldScalarizeBinop(SDValue VecOp) const { |
3433 | return false; |
3434 | } |
3435 | |
3436 | /// Return true if extraction of a scalar element from the given vector type |
3437 | /// at the given index is cheap. For example, if scalar operations occur on |
3438 | /// the same register file as vector operations, then an extract element may |
3439 | /// be a sub-register rename rather than an actual instruction. |
3440 | virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const { |
3441 | return false; |
3442 | } |
3443 | |
3444 | /// Try to convert math with an overflow comparison into the corresponding DAG |
3445 | /// node operation. Targets may want to override this independently of whether |
3446 | /// the operation is legal/custom for the given type because it may obscure |
3447 | /// matching of other patterns. |
3448 | virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT, |
3449 | bool MathUsed) const { |
3450 | // TODO: The default logic is inherited from code in CodeGenPrepare. |
3451 | // The opcode should not make a difference by default? |
3452 | if (Opcode != ISD::UADDO) |
3453 | return false; |
3454 | |
3455 | // Allow the transform as long as we have an integer type that is not |
3456 | // obviously illegal and unsupported and if the math result is used |
3457 | // besides the overflow check. On some targets (e.g. SPARC), it is |
3458 | // not profitable to form on overflow op if the math result has no |
3459 | // concrete users. |
3460 | if (VT.isVector()) |
3461 | return false; |
3462 | return MathUsed && (VT.isSimple() || !isOperationExpand(Op: Opcode, VT)); |
3463 | } |
3464 | |
3465 | // Return true if it is profitable to use a scalar input to a BUILD_VECTOR |
3466 | // even if the vector itself has multiple uses. |
3467 | virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const { |
3468 | return false; |
3469 | } |
3470 | |
3471 | // Return true if CodeGenPrepare should consider splitting large offset of a |
3472 | // GEP to make the GEP fit into the addressing mode and can be sunk into the |
3473 | // same blocks of its users. |
3474 | virtual bool shouldConsiderGEPOffsetSplit() const { return false; } |
3475 | |
3476 | /// Return true if creating a shift of the type by the given |
3477 | /// amount is not profitable. |
3478 | virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const { |
3479 | return false; |
3480 | } |
3481 | |
3482 | // Should we fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) |
3483 | // A) where y has a single bit set? |
3484 | virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, |
3485 | const APInt &AndMask) const { |
3486 | unsigned ShCt = AndMask.getBitWidth() - 1; |
3487 | return !shouldAvoidTransformToShift(VT, Amount: ShCt); |
3488 | } |
3489 | |
3490 | /// Does this target require the clearing of high-order bits in a register |
3491 | /// passed to the fp16 to fp conversion library function. |
3492 | virtual bool shouldKeepZExtForFP16Conv() const { return false; } |
3493 | |
3494 | /// Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT |
3495 | /// from min(max(fptoi)) saturation patterns. |
3496 | virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const { |
3497 | return isOperationLegalOrCustom(Op, VT); |
3498 | } |
3499 | |
3500 | /// Should we expand [US]CMP nodes using two selects and two compares, or by |
3501 | /// doing arithmetic on boolean types |
3502 | virtual bool shouldExpandCmpUsingSelects(EVT VT) const { return false; } |
3503 | |
3504 | /// True if target has some particular form of dealing with pointer arithmetic |
3505 | /// semantics for pointers with the given value type. False if pointer |
3506 | /// arithmetic should not be preserved for passes such as instruction |
3507 | /// selection, and can fallback to regular arithmetic. |
3508 | /// This should be removed when PTRADD nodes are widely supported by backends. |
3509 | virtual bool shouldPreservePtrArith(const Function &F, EVT PtrVT) const { |
3510 | return false; |
3511 | } |
3512 | |
3513 | /// Does this target support complex deinterleaving |
3514 | virtual bool isComplexDeinterleavingSupported() const { return false; } |
3515 | |
3516 | /// Does this target support complex deinterleaving with the given operation |
3517 | /// and type |
3518 | virtual bool isComplexDeinterleavingOperationSupported( |
3519 | ComplexDeinterleavingOperation Operation, Type *Ty) const { |
3520 | return false; |
3521 | } |
3522 | |
3523 | // Get the preferred opcode for FP_TO_XINT nodes. |
3524 | // By default, this checks if the provded operation is an illegal FP_TO_UINT |
3525 | // and if so, checks if FP_TO_SINT is legal or custom for use as a |
3526 | // replacement. If both UINT and SINT conversions are Custom, we choose SINT |
3527 | // by default because that's the right thing on PPC. |
3528 | virtual unsigned getPreferredFPToIntOpcode(unsigned Op, EVT FromVT, |
3529 | EVT ToVT) const { |
3530 | if (isOperationLegal(Op, VT: ToVT)) |
3531 | return Op; |
3532 | switch (Op) { |
3533 | case ISD::FP_TO_UINT: |
3534 | if (isOperationLegalOrCustom(Op: ISD::FP_TO_SINT, VT: ToVT)) |
3535 | return ISD::FP_TO_SINT; |
3536 | break; |
3537 | case ISD::STRICT_FP_TO_UINT: |
3538 | if (isOperationLegalOrCustom(Op: ISD::STRICT_FP_TO_SINT, VT: ToVT)) |
3539 | return ISD::STRICT_FP_TO_SINT; |
3540 | break; |
3541 | case ISD::VP_FP_TO_UINT: |
3542 | if (isOperationLegalOrCustom(Op: ISD::VP_FP_TO_SINT, VT: ToVT)) |
3543 | return ISD::VP_FP_TO_SINT; |
3544 | break; |
3545 | default: |
3546 | break; |
3547 | } |
3548 | return Op; |
3549 | } |
3550 | |
3551 | /// Create the IR node for the given complex deinterleaving operation. |
3552 | /// If one cannot be created using all the given inputs, nullptr should be |
3553 | /// returned. |
3554 | virtual Value *createComplexDeinterleavingIR( |
3555 | IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, |
3556 | ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, |
3557 | Value *Accumulator = nullptr) const { |
3558 | return nullptr; |
3559 | } |
3560 | |
3561 | /// Rename the default libcall routine name for the specified libcall. |
3562 | void setLibcallName(RTLIB::Libcall Call, const char *Name) { |
3563 | Libcalls.setLibcallName(Call, Name); |
3564 | } |
3565 | |
3566 | void setLibcallName(ArrayRef<RTLIB::Libcall> Calls, const char *Name) { |
3567 | Libcalls.setLibcallName(Calls, Name); |
3568 | } |
3569 | |
3570 | /// Get the libcall routine name for the specified libcall. |
3571 | const char *getLibcallName(RTLIB::Libcall Call) const { |
3572 | return Libcalls.getLibcallName(Call); |
3573 | } |
3574 | |
3575 | /// Override the default CondCode to be used to test the result of the |
3576 | /// comparison libcall against zero. |
3577 | /// FIXME: This can't be merged with 'RuntimeLibcallsInfo' because of the ISD. |
3578 | void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC) { |
3579 | CmpLibcallCCs[Call] = CC; |
3580 | } |
3581 | |
3582 | |
3583 | /// Get the CondCode that's to be used to test the result of the comparison |
3584 | /// libcall against zero. |
3585 | /// FIXME: This can't be merged with 'RuntimeLibcallsInfo' because of the ISD. |
3586 | ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const { |
3587 | return CmpLibcallCCs[Call]; |
3588 | } |
3589 | |
3590 | |
3591 | /// Set the CallingConv that should be used for the specified libcall. |
3592 | void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) { |
3593 | Libcalls.setLibcallCallingConv(Call, CC); |
3594 | } |
3595 | |
3596 | /// Get the CallingConv that should be used for the specified libcall. |
3597 | CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const { |
3598 | return Libcalls.getLibcallCallingConv(Call); |
3599 | } |
3600 | |
3601 | /// Execute target specific actions to finalize target lowering. |
3602 | /// This is used to set extra flags in MachineFrameInformation and freezing |
3603 | /// the set of reserved registers. |
3604 | /// The default implementation just freezes the set of reserved registers. |
3605 | virtual void finalizeLowering(MachineFunction &MF) const; |
3606 | |
3607 | /// Returns true if it's profitable to allow merging store of loads when there |
3608 | /// are functions calls between the load and the store. |
3609 | virtual bool shouldMergeStoreOfLoadsOverCall(EVT, EVT) const { return true; } |
3610 | |
3611 | //===----------------------------------------------------------------------===// |
3612 | // GlobalISel Hooks |
3613 | //===----------------------------------------------------------------------===// |
3614 | /// Check whether or not \p MI needs to be moved close to its uses. |
3615 | virtual bool shouldLocalize(const MachineInstr &MI, const TargetTransformInfo *TTI) const; |
3616 | |
3617 | |
3618 | private: |
3619 | const TargetMachine &TM; |
3620 | |
3621 | /// Tells the code generator that the target has multiple (allocatable) |
3622 | /// condition registers that can be used to store the results of comparisons |
3623 | /// for use by selects and conditional branches. With multiple condition |
3624 | /// registers, the code generator will not aggressively sink comparisons into |
3625 | /// the blocks of their users. |
3626 | bool HasMultipleConditionRegisters; |
3627 | |
3628 | /// Tells the code generator that the target has BitExtract instructions. |
3629 | /// The code generator will aggressively sink "shift"s into the blocks of |
3630 | /// their users if the users will generate "and" instructions which can be |
3631 | /// combined with "shift" to BitExtract instructions. |
3632 | bool HasExtractBitsInsn; |
3633 | |
3634 | /// Tells the code generator to bypass slow divide or remainder |
3635 | /// instructions. For example, BypassSlowDivWidths[32,8] tells the code |
3636 | /// generator to bypass 32-bit integer div/rem with an 8-bit unsigned integer |
3637 | /// div/rem when the operands are positive and less than 256. |
3638 | DenseMap <unsigned int, unsigned int> BypassSlowDivWidths; |
3639 | |
3640 | /// Tells the code generator that it shouldn't generate extra flow control |
3641 | /// instructions and should attempt to combine flow control instructions via |
3642 | /// predication. |
3643 | bool JumpIsExpensive; |
3644 | |
3645 | /// Information about the contents of the high-bits in boolean values held in |
3646 | /// a type wider than i1. See getBooleanContents. |
3647 | BooleanContent BooleanContents; |
3648 | |
3649 | /// Information about the contents of the high-bits in boolean values held in |
3650 | /// a type wider than i1. See getBooleanContents. |
3651 | BooleanContent BooleanFloatContents; |
3652 | |
3653 | /// Information about the contents of the high-bits in boolean vector values |
3654 | /// when the element type is wider than i1. See getBooleanContents. |
3655 | BooleanContent BooleanVectorContents; |
3656 | |
3657 | /// The target scheduling preference: shortest possible total cycles or lowest |
3658 | /// register usage. |
3659 | Sched::Preference SchedPreferenceInfo; |
3660 | |
3661 | /// The minimum alignment that any argument on the stack needs to have. |
3662 | Align MinStackArgumentAlignment; |
3663 | |
3664 | /// The minimum function alignment (used when optimizing for size, and to |
3665 | /// prevent explicitly provided alignment from leading to incorrect code). |
3666 | Align MinFunctionAlignment; |
3667 | |
3668 | /// The preferred function alignment (used when alignment unspecified and |
3669 | /// optimizing for speed). |
3670 | Align PrefFunctionAlignment; |
3671 | |
3672 | /// The preferred loop alignment (in log2 bot in bytes). |
3673 | Align PrefLoopAlignment; |
3674 | /// The maximum amount of bytes permitted to be emitted for alignment. |
3675 | unsigned MaxBytesForAlignment; |
3676 | |
3677 | /// Size in bits of the maximum atomics size the backend supports. |
3678 | /// Accesses larger than this will be expanded by AtomicExpandPass. |
3679 | unsigned MaxAtomicSizeInBitsSupported; |
3680 | |
3681 | /// Size in bits of the maximum div/rem size the backend supports. |
3682 | /// Larger operations will be expanded by ExpandLargeDivRem. |
3683 | unsigned MaxDivRemBitWidthSupported; |
3684 | |
3685 | /// Size in bits of the maximum fp to/from int conversion size the |
3686 | /// backend supports. Larger operations will be expanded by |
3687 | /// ExpandFp. |
3688 | unsigned MaxLargeFPConvertBitWidthSupported; |
3689 | |
3690 | /// Size in bits of the minimum cmpxchg or ll/sc operation the |
3691 | /// backend supports. |
3692 | unsigned MinCmpXchgSizeInBits; |
3693 | |
3694 | /// This indicates if the target supports unaligned atomic operations. |
3695 | bool SupportsUnalignedAtomics; |
3696 | |
3697 | /// If set to a physical register, this specifies the register that |
3698 | /// llvm.savestack/llvm.restorestack should save and restore. |
3699 | Register StackPointerRegisterToSaveRestore; |
3700 | |
3701 | /// This indicates the default register class to use for each ValueType the |
3702 | /// target supports natively. |
3703 | const TargetRegisterClass *RegClassForVT[MVT::VALUETYPE_SIZE]; |
3704 | uint16_t NumRegistersForVT[MVT::VALUETYPE_SIZE]; |
3705 | MVT RegisterTypeForVT[MVT::VALUETYPE_SIZE]; |
3706 | |
3707 | /// This indicates the "representative" register class to use for each |
3708 | /// ValueType the target supports natively. This information is used by the |
3709 | /// scheduler to track register pressure. By default, the representative |
3710 | /// register class is the largest legal super-reg register class of the |
3711 | /// register class of the specified type. e.g. On x86, i8, i16, and i32's |
3712 | /// representative class would be GR32. |
3713 | const TargetRegisterClass *RepRegClassForVT[MVT::VALUETYPE_SIZE] = {0}; |
3714 | |
3715 | /// This indicates the "cost" of the "representative" register class for each |
3716 | /// ValueType. The cost is used by the scheduler to approximate register |
3717 | /// pressure. |
3718 | uint8_t RepRegClassCostForVT[MVT::VALUETYPE_SIZE]; |
3719 | |
3720 | /// For any value types we are promoting or expanding, this contains the value |
3721 | /// type that we are changing to. For Expanded types, this contains one step |
3722 | /// of the expand (e.g. i64 -> i32), even if there are multiple steps required |
3723 | /// (e.g. i64 -> i16). For types natively supported by the system, this holds |
3724 | /// the same type (e.g. i32 -> i32). |
3725 | MVT TransformToType[MVT::VALUETYPE_SIZE]; |
3726 | |
3727 | /// For each operation and each value type, keep a LegalizeAction that |
3728 | /// indicates how instruction selection should deal with the operation. Most |
3729 | /// operations are Legal (aka, supported natively by the target), but |
3730 | /// operations that are not should be described. Note that operations on |
3731 | /// non-legal value types are not described here. |
3732 | LegalizeAction OpActions[MVT::VALUETYPE_SIZE][ISD::BUILTIN_OP_END]; |
3733 | |
3734 | /// For each load extension type and each value type, keep a LegalizeAction |
3735 | /// that indicates how instruction selection should deal with a load of a |
3736 | /// specific value type and extension type. Uses 4-bits to store the action |
3737 | /// for each of the 4 load ext types. |
3738 | uint16_t LoadExtActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE]; |
3739 | |
3740 | /// Similar to LoadExtActions, but for atomic loads. Only Legal or Expand |
3741 | /// (default) values are supported. |
3742 | uint16_t AtomicLoadExtActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE]; |
3743 | |
3744 | /// For each value type pair keep a LegalizeAction that indicates whether a |
3745 | /// truncating store of a specific value type and truncating type is legal. |
3746 | LegalizeAction TruncStoreActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE]; |
3747 | |
3748 | /// For each indexed mode and each value type, keep a quad of LegalizeAction |
3749 | /// that indicates how instruction selection should deal with the load / |
3750 | /// store / maskedload / maskedstore. |
3751 | /// |
3752 | /// The first dimension is the value_type for the reference. The second |
3753 | /// dimension represents the various modes for load store. |
3754 | uint16_t IndexedModeActions[MVT::VALUETYPE_SIZE][ISD::LAST_INDEXED_MODE]; |
3755 | |
3756 | /// For each condition code (ISD::CondCode) keep a LegalizeAction that |
3757 | /// indicates how instruction selection should deal with the condition code. |
3758 | /// |
3759 | /// Because each CC action takes up 4 bits, we need to have the array size be |
3760 | /// large enough to fit all of the value types. This can be done by rounding |
3761 | /// up the MVT::VALUETYPE_SIZE value to the next multiple of 8. |
3762 | uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::VALUETYPE_SIZE + 7) / 8]; |
3763 | |
3764 | using PartialReduceActionTypes = |
3765 | std::tuple<unsigned, MVT::SimpleValueType, MVT::SimpleValueType>; |
3766 | /// For each partial reduce opcode, result type and input type combination, |
3767 | /// keep a LegalizeAction which indicates how instruction selection should |
3768 | /// deal with this operation. |
3769 | DenseMap<PartialReduceActionTypes, LegalizeAction> PartialReduceMLAActions; |
3770 | |
3771 | ValueTypeActionImpl ValueTypeActions; |
3772 | |
3773 | private: |
3774 | /// Targets can specify ISD nodes that they would like PerformDAGCombine |
3775 | /// callbacks for by calling setTargetDAGCombine(), which sets a bit in this |
3776 | /// array. |
3777 | unsigned char |
3778 | TargetDAGCombineArray[(ISD::BUILTIN_OP_END+CHAR_BIT-1)/CHAR_BIT]; |
3779 | |
3780 | /// For operations that must be promoted to a specific type, this holds the |
3781 | /// destination type. This map should be sparse, so don't hold it as an |
3782 | /// array. |
3783 | /// |
3784 | /// Targets add entries to this map with AddPromotedToType(..), clients access |
3785 | /// this with getTypeToPromoteTo(..). |
3786 | std::map<std::pair<unsigned, MVT::SimpleValueType>, MVT::SimpleValueType> |
3787 | PromoteToType; |
3788 | |
3789 | /// The list of libcalls that the target will use. |
3790 | RTLIB::RuntimeLibcallsInfo Libcalls; |
3791 | |
3792 | /// The ISD::CondCode that should be used to test the result of each of the |
3793 | /// comparison libcall against zero. |
3794 | ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL]; |
3795 | |
3796 | /// The bits of IndexedModeActions used to store the legalisation actions |
3797 | /// We store the data as | ML | MS | L | S | each taking 4 bits. |
3798 | enum IndexedModeActionsBits { |
3799 | IMAB_Store = 0, |
3800 | IMAB_Load = 4, |
3801 | IMAB_MaskedStore = 8, |
3802 | IMAB_MaskedLoad = 12 |
3803 | }; |
3804 | |
3805 | void setIndexedModeAction(unsigned IdxMode, MVT VT, unsigned Shift, |
3806 | LegalizeAction Action) { |
3807 | assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && |
3808 | (unsigned)Action < 0xf && "Table isn't big enough!"); |
3809 | unsigned Ty = (unsigned)VT.SimpleTy; |
3810 | IndexedModeActions[Ty][IdxMode] &= ~(0xf << Shift); |
3811 | IndexedModeActions[Ty][IdxMode] |= ((uint16_t)Action) << Shift; |
3812 | } |
3813 | |
3814 | LegalizeAction getIndexedModeAction(unsigned IdxMode, MVT VT, |
3815 | unsigned Shift) const { |
3816 | assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && |
3817 | "Table isn't big enough!"); |
3818 | unsigned Ty = (unsigned)VT.SimpleTy; |
3819 | return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] >> Shift) & 0xf); |
3820 | } |
3821 | |
3822 | protected: |
3823 | /// Return true if the extension represented by \p I is free. |
3824 | /// \pre \p I is a sign, zero, or fp extension and |
3825 | /// is[Z|FP]ExtFree of the related types is not true. |
3826 | virtual bool isExtFreeImpl(const Instruction *I) const { return false; } |
3827 | |
3828 | /// Depth that GatherAllAliases should continue looking for chain |
3829 | /// dependencies when trying to find a more preferable chain. As an |
3830 | /// approximation, this should be more than the number of consecutive stores |
3831 | /// expected to be merged. |
3832 | unsigned GatherAllAliasesMaxDepth; |
3833 | |
3834 | /// \brief Specify maximum number of store instructions per memset call. |
3835 | /// |
3836 | /// When lowering \@llvm.memset this field specifies the maximum number of |
3837 | /// store operations that may be substituted for the call to memset. Targets |
3838 | /// must set this value based on the cost threshold for that target. Targets |
3839 | /// should assume that the memset will be done using as many of the largest |
3840 | /// store operations first, followed by smaller ones, if necessary, per |
3841 | /// alignment restrictions. For example, storing 9 bytes on a 32-bit machine |
3842 | /// with 16-bit alignment would result in four 2-byte stores and one 1-byte |
3843 | /// store. This only applies to setting a constant array of a constant size. |
3844 | unsigned MaxStoresPerMemset; |
3845 | /// Likewise for functions with the OptSize attribute. |
3846 | unsigned MaxStoresPerMemsetOptSize; |
3847 | |
3848 | /// \brief Specify maximum number of store instructions per memcpy call. |
3849 | /// |
3850 | /// When lowering \@llvm.memcpy this field specifies the maximum number of |
3851 | /// store operations that may be substituted for a call to memcpy. Targets |
3852 | /// must set this value based on the cost threshold for that target. Targets |
3853 | /// should assume that the memcpy will be done using as many of the largest |
3854 | /// store operations first, followed by smaller ones, if necessary, per |
3855 | /// alignment restrictions. For example, storing 7 bytes on a 32-bit machine |
3856 | /// with 32-bit alignment would result in one 4-byte store, a one 2-byte store |
3857 | /// and one 1-byte store. This only applies to copying a constant array of |
3858 | /// constant size. |
3859 | unsigned MaxStoresPerMemcpy; |
3860 | /// Likewise for functions with the OptSize attribute. |
3861 | unsigned MaxStoresPerMemcpyOptSize; |
3862 | /// \brief Specify max number of store instructions to glue in inlined memcpy. |
3863 | /// |
3864 | /// When memcpy is inlined based on MaxStoresPerMemcpy, specify maximum number |
3865 | /// of store instructions to keep together. This helps in pairing and |
3866 | // vectorization later on. |
3867 | unsigned MaxGluedStoresPerMemcpy = 0; |
3868 | |
3869 | /// \brief Specify maximum number of load instructions per memcmp call. |
3870 | /// |
3871 | /// When lowering \@llvm.memcmp this field specifies the maximum number of |
3872 | /// pairs of load operations that may be substituted for a call to memcmp. |
3873 | /// Targets must set this value based on the cost threshold for that target. |
3874 | /// Targets should assume that the memcmp will be done using as many of the |
3875 | /// largest load operations first, followed by smaller ones, if necessary, per |
3876 | /// alignment restrictions. For example, loading 7 bytes on a 32-bit machine |
3877 | /// with 32-bit alignment would result in one 4-byte load, a one 2-byte load |
3878 | /// and one 1-byte load. This only applies to copying a constant array of |
3879 | /// constant size. |
3880 | unsigned MaxLoadsPerMemcmp; |
3881 | /// Likewise for functions with the OptSize attribute. |
3882 | unsigned MaxLoadsPerMemcmpOptSize; |
3883 | |
3884 | /// \brief Specify maximum number of store instructions per memmove call. |
3885 | /// |
3886 | /// When lowering \@llvm.memmove this field specifies the maximum number of |
3887 | /// store instructions that may be substituted for a call to memmove. Targets |
3888 | /// must set this value based on the cost threshold for that target. Targets |
3889 | /// should assume that the memmove will be done using as many of the largest |
3890 | /// store operations first, followed by smaller ones, if necessary, per |
3891 | /// alignment restrictions. For example, moving 9 bytes on a 32-bit machine |
3892 | /// with 8-bit alignment would result in nine 1-byte stores. This only |
3893 | /// applies to copying a constant array of constant size. |
3894 | unsigned MaxStoresPerMemmove; |
3895 | /// Likewise for functions with the OptSize attribute. |
3896 | unsigned MaxStoresPerMemmoveOptSize; |
3897 | |
3898 | /// Tells the code generator that select is more expensive than a branch if |
3899 | /// the branch is usually predicted right. |
3900 | bool PredictableSelectIsExpensive; |
3901 | |
3902 | /// \see enableExtLdPromotion. |
3903 | bool EnableExtLdPromotion; |
3904 | |
3905 | /// Return true if the value types that can be represented by the specified |
3906 | /// register class are all legal. |
3907 | bool isLegalRC(const TargetRegisterInfo &TRI, |
3908 | const TargetRegisterClass &RC) const; |
3909 | |
3910 | /// Replace/modify any TargetFrameIndex operands with a targte-dependent |
3911 | /// sequence of memory operands that is recognized by PrologEpilogInserter. |
3912 | MachineBasicBlock *emitPatchPoint(MachineInstr &MI, |
3913 | MachineBasicBlock *MBB) const; |
3914 | |
3915 | bool IsStrictFPEnabled; |
3916 | }; |
3917 | |
3918 | /// This class defines information used to lower LLVM code to legal SelectionDAG |
3919 | /// operators that the target instruction selector can accept natively. |
3920 | /// |
3921 | /// This class also defines callbacks that targets must implement to lower |
3922 | /// target-specific constructs to SelectionDAG operators. |
3923 | class LLVM_ABI TargetLowering : public TargetLoweringBase { |
3924 | public: |
3925 | struct DAGCombinerInfo; |
3926 | struct MakeLibCallOptions; |
3927 | |
3928 | TargetLowering(const TargetLowering &) = delete; |
3929 | TargetLowering &operator=(const TargetLowering &) = delete; |
3930 | |
3931 | explicit TargetLowering(const TargetMachine &TM); |
3932 | ~TargetLowering() override; |
3933 | |
3934 | bool isPositionIndependent() const; |
3935 | |
3936 | virtual bool isSDNodeSourceOfDivergence(const SDNode *N, |
3937 | FunctionLoweringInfo *FLI, |
3938 | UniformityInfo *UA) const { |
3939 | return false; |
3940 | } |
3941 | |
3942 | // Lets target to control the following reassociation of operands: (op (op x, |
3943 | // c1), y) -> (op (op x, y), c1) where N0 is (op x, c1) and N1 is y. By |
3944 | // default consider profitable any case where N0 has single use. This |
3945 | // behavior reflects the condition replaced by this target hook call in the |
3946 | // DAGCombiner. Any particular target can implement its own heuristic to |
3947 | // restrict common combiner. |
3948 | virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, |
3949 | SDValue N1) const { |
3950 | return N0.hasOneUse(); |
3951 | } |
3952 | |
3953 | // Lets target to control the following reassociation of operands: (op (op x, |
3954 | // c1), y) -> (op (op x, y), c1) where N0 is (op x, c1) and N1 is y. By |
3955 | // default consider profitable any case where N0 has single use. This |
3956 | // behavior reflects the condition replaced by this target hook call in the |
3957 | // combiner. Any particular target can implement its own heuristic to |
3958 | // restrict common combiner. |
3959 | virtual bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0, |
3960 | Register N1) const { |
3961 | return MRI.hasOneNonDBGUse(RegNo: N0); |
3962 | } |
3963 | |
3964 | virtual bool isSDNodeAlwaysUniform(const SDNode * N) const { |
3965 | return false; |
3966 | } |
3967 | |
3968 | /// Returns true by value, base pointer and offset pointer and addressing mode |
3969 | /// by reference if the node's address can be legally represented as |
3970 | /// pre-indexed load / store address. |
3971 | virtual bool getPreIndexedAddressParts(SDNode * /*N*/, SDValue &/*Base*/, |
3972 | SDValue &/*Offset*/, |
3973 | ISD::MemIndexedMode &/*AM*/, |
3974 | SelectionDAG &/*DAG*/) const { |
3975 | return false; |
3976 | } |
3977 | |
3978 | /// Returns true by value, base pointer and offset pointer and addressing mode |
3979 | /// by reference if this node can be combined with a load / store to form a |
3980 | /// post-indexed load / store. |
3981 | virtual bool getPostIndexedAddressParts(SDNode * /*N*/, SDNode * /*Op*/, |
3982 | SDValue &/*Base*/, |
3983 | SDValue &/*Offset*/, |
3984 | ISD::MemIndexedMode &/*AM*/, |
3985 | SelectionDAG &/*DAG*/) const { |
3986 | return false; |
3987 | } |
3988 | |
3989 | /// Returns true if the specified base+offset is a legal indexed addressing |
3990 | /// mode for this target. \p MI is the load or store instruction that is being |
3991 | /// considered for transformation. |
3992 | virtual bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset, |
3993 | bool IsPre, MachineRegisterInfo &MRI) const { |
3994 | return false; |
3995 | } |
3996 | |
3997 | /// Return the entry encoding for a jump table in the current function. The |
3998 | /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. |
3999 | virtual unsigned getJumpTableEncoding() const; |
4000 | |
4001 | virtual MVT getJumpTableRegTy(const DataLayout &DL) const { |
4002 | return getPointerTy(DL); |
4003 | } |
4004 | |
4005 | virtual const MCExpr * |
4006 | LowerCustomJumpTableEntry(const MachineJumpTableInfo * /*MJTI*/, |
4007 | const MachineBasicBlock * /*MBB*/, unsigned /*uid*/, |
4008 | MCContext &/*Ctx*/) const { |
4009 | llvm_unreachable("Need to implement this hook if target has custom JTIs"); |
4010 | } |
4011 | |
4012 | /// Returns relocation base for the given PIC jumptable. |
4013 | virtual SDValue getPICJumpTableRelocBase(SDValue Table, |
4014 | SelectionDAG &DAG) const; |
4015 | |
4016 | /// This returns the relocation base for the given PIC jumptable, the same as |
4017 | /// getPICJumpTableRelocBase, but as an MCExpr. |
4018 | virtual const MCExpr * |
4019 | getPICJumpTableRelocBaseExpr(const MachineFunction *MF, |
4020 | unsigned JTI, MCContext &Ctx) const; |
4021 | |
4022 | /// Return true if folding a constant offset with the given GlobalAddress is |
4023 | /// legal. It is frequently not legal in PIC relocation models. |
4024 | virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; |
4025 | |
4026 | /// On x86, return true if the operand with index OpNo is a CALL or JUMP |
4027 | /// instruction, which can use either a memory constraint or an address |
4028 | /// constraint. -fasm-blocks "__asm call foo" lowers to |
4029 | /// call void asm sideeffect inteldialect "call ${0:P}", "*m..." |
4030 | /// |
4031 | /// This function is used by a hack to choose the address constraint, |
4032 | /// lowering to a direct call. |
4033 | virtual bool |
4034 | isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs, |
4035 | unsigned OpNo) const { |
4036 | return false; |
4037 | } |
4038 | |
4039 | bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, |
4040 | SDValue &Chain) const; |
4041 | |
4042 | void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, |
4043 | SDValue &NewRHS, ISD::CondCode &CCCode, |
4044 | const SDLoc &DL, const SDValue OldLHS, |
4045 | const SDValue OldRHS) const; |
4046 | |
4047 | void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, |
4048 | SDValue &NewRHS, ISD::CondCode &CCCode, |
4049 | const SDLoc &DL, const SDValue OldLHS, |
4050 | const SDValue OldRHS, SDValue &Chain, |
4051 | bool IsSignaling = false) const; |
4052 | |
4053 | virtual SDValue visitMaskedLoad(SelectionDAG &DAG, const SDLoc &DL, |
4054 | SDValue Chain, MachineMemOperand *MMO, |
4055 | SDValue &NewLoad, SDValue Ptr, |
4056 | SDValue PassThru, SDValue Mask) const { |
4057 | llvm_unreachable("Not Implemented"); |
4058 | } |
4059 | |
4060 | virtual SDValue visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL, |
4061 | SDValue Chain, MachineMemOperand *MMO, |
4062 | SDValue Ptr, SDValue Val, |
4063 | SDValue Mask) const { |
4064 | llvm_unreachable("Not Implemented"); |
4065 | } |
4066 | |
4067 | /// Returns a pair of (return value, chain). |
4068 | /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC. |
4069 | std::pair<SDValue, SDValue> makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, |
4070 | EVT RetVT, ArrayRef<SDValue> Ops, |
4071 | MakeLibCallOptions CallOptions, |
4072 | const SDLoc &dl, |
4073 | SDValue Chain = SDValue()) const; |
4074 | |
4075 | /// Check whether parameters to a call that are passed in callee saved |
4076 | /// registers are the same as from the calling function. This needs to be |
4077 | /// checked for tail call eligibility. |
4078 | bool parametersInCSRMatch(const MachineRegisterInfo &MRI, |
4079 | const uint32_t *CallerPreservedMask, |
4080 | const SmallVectorImpl<CCValAssign> &ArgLocs, |
4081 | const SmallVectorImpl<SDValue> &OutVals) const; |
4082 | |
4083 | //===--------------------------------------------------------------------===// |
4084 | // TargetLowering Optimization Methods |
4085 | // |
4086 | |
4087 | /// A convenience struct that encapsulates a DAG, and two SDValues for |
4088 | /// returning information from TargetLowering to its clients that want to |
4089 | /// combine. |
4090 | struct TargetLoweringOpt { |
4091 | SelectionDAG &DAG; |
4092 | bool LegalTys; |
4093 | bool LegalOps; |
4094 | SDValue Old; |
4095 | SDValue New; |
4096 | |
4097 | explicit TargetLoweringOpt(SelectionDAG &InDAG, |
4098 | bool LT, bool LO) : |
4099 | DAG(InDAG), LegalTys(LT), LegalOps(LO) {} |
4100 | |
4101 | bool LegalTypes() const { return LegalTys; } |
4102 | bool LegalOperations() const { return LegalOps; } |
4103 | |
4104 | bool CombineTo(SDValue O, SDValue N) { |
4105 | Old = O; |
4106 | New = N; |
4107 | return true; |
4108 | } |
4109 | }; |
4110 | |
4111 | /// Determines the optimal series of memory ops to replace the memset / memcpy. |
4112 | /// Return true if the number of memory ops is below the threshold (Limit). |
4113 | /// Note that this is always the case when Limit is ~0. |
4114 | /// It returns the types of the sequence of memory ops to perform |
4115 | /// memset / memcpy by reference. |
4116 | virtual bool |
4117 | findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit, |
4118 | const MemOp &Op, unsigned DstAS, unsigned SrcAS, |
4119 | const AttributeList &FuncAttributes) const; |
4120 | |
4121 | /// Check to see if the specified operand of the specified instruction is a |
4122 | /// constant integer. If so, check to see if there are any bits set in the |
4123 | /// constant that are not demanded. If so, shrink the constant and return |
4124 | /// true. |
4125 | bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, |
4126 | const APInt &DemandedElts, |
4127 | TargetLoweringOpt &TLO) const; |
4128 | |
4129 | /// Helper wrapper around ShrinkDemandedConstant, demanding all elements. |
4130 | bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, |
4131 | TargetLoweringOpt &TLO) const; |
4132 | |
4133 | // Target hook to do target-specific const optimization, which is called by |
4134 | // ShrinkDemandedConstant. This function should return true if the target |
4135 | // doesn't want ShrinkDemandedConstant to further optimize the constant. |
4136 | virtual bool targetShrinkDemandedConstant(SDValue Op, |
4137 | const APInt &DemandedBits, |
4138 | const APInt &DemandedElts, |
4139 | TargetLoweringOpt &TLO) const { |
4140 | return false; |
4141 | } |
4142 | |
4143 | /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. |
4144 | /// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast, |
4145 | /// but it could be generalized for targets with other types of implicit |
4146 | /// widening casts. |
4147 | bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, |
4148 | const APInt &DemandedBits, |
4149 | TargetLoweringOpt &TLO) const; |
4150 | |
4151 | /// Look at Op. At this point, we know that only the DemandedBits bits of the |
4152 | /// result of Op are ever used downstream. If we can use this information to |
4153 | /// simplify Op, create a new simplified DAG node and return true, returning |
4154 | /// the original and new nodes in Old and New. Otherwise, analyze the |
4155 | /// expression and return a mask of KnownOne and KnownZero bits for the |
4156 | /// expression (used to simplify the caller). The KnownZero/One bits may only |
4157 | /// be accurate for those bits in the Demanded masks. |
4158 | /// \p AssumeSingleUse When this parameter is true, this function will |
4159 | /// attempt to simplify \p Op even if there are multiple uses. |
4160 | /// Callers are responsible for correctly updating the DAG based on the |
4161 | /// results of this function, because simply replacing TLO.Old |
4162 | /// with TLO.New will be incorrect when this parameter is true and TLO.Old |
4163 | /// has multiple uses. |
4164 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, |
4165 | const APInt &DemandedElts, KnownBits &Known, |
4166 | TargetLoweringOpt &TLO, unsigned Depth = 0, |
4167 | bool AssumeSingleUse = false) const; |
4168 | |
4169 | /// Helper wrapper around SimplifyDemandedBits, demanding all elements. |
4170 | /// Adds Op back to the worklist upon success. |
4171 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, |
4172 | KnownBits &Known, TargetLoweringOpt &TLO, |
4173 | unsigned Depth = 0, |
4174 | bool AssumeSingleUse = false) const; |
4175 | |
4176 | /// Helper wrapper around SimplifyDemandedBits. |
4177 | /// Adds Op back to the worklist upon success. |
4178 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, |
4179 | DAGCombinerInfo &DCI) const; |
4180 | |
4181 | /// Helper wrapper around SimplifyDemandedBits. |
4182 | /// Adds Op back to the worklist upon success. |
4183 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, |
4184 | const APInt &DemandedElts, |
4185 | DAGCombinerInfo &DCI) const; |
4186 | |
4187 | /// More limited version of SimplifyDemandedBits that can be used to "look |
4188 | /// through" ops that don't contribute to the DemandedBits/DemandedElts - |
4189 | /// bitwise ops etc. |
4190 | SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, |
4191 | const APInt &DemandedElts, |
4192 | SelectionDAG &DAG, |
4193 | unsigned Depth = 0) const; |
4194 | |
4195 | /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all |
4196 | /// elements. |
4197 | SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, |
4198 | SelectionDAG &DAG, |
4199 | unsigned Depth = 0) const; |
4200 | |
4201 | /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all |
4202 | /// bits from only some vector elements. |
4203 | SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, |
4204 | const APInt &DemandedElts, |
4205 | SelectionDAG &DAG, |
4206 | unsigned Depth = 0) const; |
4207 | |
4208 | /// Look at Vector Op. At this point, we know that only the DemandedElts |
4209 | /// elements of the result of Op are ever used downstream. If we can use |
4210 | /// this information to simplify Op, create a new simplified DAG node and |
4211 | /// return true, storing the original and new nodes in TLO. |
4212 | /// Otherwise, analyze the expression and return a mask of KnownUndef and |
4213 | /// KnownZero elements for the expression (used to simplify the caller). |
4214 | /// The KnownUndef/Zero elements may only be accurate for those bits |
4215 | /// in the DemandedMask. |
4216 | /// \p AssumeSingleUse When this parameter is true, this function will |
4217 | /// attempt to simplify \p Op even if there are multiple uses. |
4218 | /// Callers are responsible for correctly updating the DAG based on the |
4219 | /// results of this function, because simply replacing TLO.Old |
4220 | /// with TLO.New will be incorrect when this parameter is true and TLO.Old |
4221 | /// has multiple uses. |
4222 | bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, |
4223 | APInt &KnownUndef, APInt &KnownZero, |
4224 | TargetLoweringOpt &TLO, unsigned Depth = 0, |
4225 | bool AssumeSingleUse = false) const; |
4226 | |
4227 | /// Helper wrapper around SimplifyDemandedVectorElts. |
4228 | /// Adds Op back to the worklist upon success. |
4229 | bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts, |
4230 | DAGCombinerInfo &DCI) const; |
4231 | |
4232 | /// Return true if the target supports simplifying demanded vector elements by |
4233 | /// converting them to undefs. |
4234 | virtual bool |
4235 | shouldSimplifyDemandedVectorElts(SDValue Op, |
4236 | const TargetLoweringOpt &TLO) const { |
4237 | return true; |
4238 | } |
4239 | |
4240 | /// Determine which of the bits specified in Mask are known to be either zero |
4241 | /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts |
4242 | /// argument allows us to only collect the known bits that are shared by the |
4243 | /// requested vector elements. |
4244 | virtual void computeKnownBitsForTargetNode(const SDValue Op, |
4245 | KnownBits &Known, |
4246 | const APInt &DemandedElts, |
4247 | const SelectionDAG &DAG, |
4248 | unsigned Depth = 0) const; |
4249 | |
4250 | /// Determine which of the bits specified in Mask are known to be either zero |
4251 | /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts |
4252 | /// argument allows us to only collect the known bits that are shared by the |
4253 | /// requested vector elements. This is for GISel. |
4254 | virtual void computeKnownBitsForTargetInstr(GISelValueTracking &Analysis, |
4255 | Register R, KnownBits &Known, |
4256 | const APInt &DemandedElts, |
4257 | const MachineRegisterInfo &MRI, |
4258 | unsigned Depth = 0) const; |
4259 | |
4260 | virtual void computeKnownFPClassForTargetInstr(GISelValueTracking &Analysis, |
4261 | Register R, |
4262 | KnownFPClass &Known, |
4263 | const APInt &DemandedElts, |
4264 | const MachineRegisterInfo &MRI, |
4265 | unsigned Depth = 0) const; |
4266 | |
4267 | /// Determine the known alignment for the pointer value \p R. This is can |
4268 | /// typically be inferred from the number of low known 0 bits. However, for a |
4269 | /// pointer with a non-integral address space, the alignment value may be |
4270 | /// independent from the known low bits. |
4271 | virtual Align computeKnownAlignForTargetInstr(GISelValueTracking &Analysis, |
4272 | Register R, |
4273 | const MachineRegisterInfo &MRI, |
4274 | unsigned Depth = 0) const; |
4275 | |
4276 | /// Determine which of the bits of FrameIndex \p FIOp are known to be 0. |
4277 | /// Default implementation computes low bits based on alignment |
4278 | /// information. This should preserve known bits passed into it. |
4279 | virtual void computeKnownBitsForFrameIndex(int FIOp, |
4280 | KnownBits &Known, |
4281 | const MachineFunction &MF) const; |
4282 | |
4283 | /// This method can be implemented by targets that want to expose additional |
4284 | /// information about sign bits to the DAG Combiner. The DemandedElts |
4285 | /// argument allows us to only collect the minimum sign bits that are shared |
4286 | /// by the requested vector elements. |
4287 | virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, |
4288 | const APInt &DemandedElts, |
4289 | const SelectionDAG &DAG, |
4290 | unsigned Depth = 0) const; |
4291 | |
4292 | /// This method can be implemented by targets that want to expose additional |
4293 | /// information about sign bits to GlobalISel combiners. The DemandedElts |
4294 | /// argument allows us to only collect the minimum sign bits that are shared |
4295 | /// by the requested vector elements. |
4296 | virtual unsigned computeNumSignBitsForTargetInstr( |
4297 | GISelValueTracking &Analysis, Register R, const APInt &DemandedElts, |
4298 | const MachineRegisterInfo &MRI, unsigned Depth = 0) const; |
4299 | |
4300 | /// Attempt to simplify any target nodes based on the demanded vector |
4301 | /// elements, returning true on success. Otherwise, analyze the expression and |
4302 | /// return a mask of KnownUndef and KnownZero elements for the expression |
4303 | /// (used to simplify the caller). The KnownUndef/Zero elements may only be |
4304 | /// accurate for those bits in the DemandedMask. |
4305 | virtual bool SimplifyDemandedVectorEltsForTargetNode( |
4306 | SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, |
4307 | APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const; |
4308 | |
4309 | /// Attempt to simplify any target nodes based on the demanded bits/elts, |
4310 | /// returning true on success. Otherwise, analyze the |
4311 | /// expression and return a mask of KnownOne and KnownZero bits for the |
4312 | /// expression (used to simplify the caller). The KnownZero/One bits may only |
4313 | /// be accurate for those bits in the Demanded masks. |
4314 | virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, |
4315 | const APInt &DemandedBits, |
4316 | const APInt &DemandedElts, |
4317 | KnownBits &Known, |
4318 | TargetLoweringOpt &TLO, |
4319 | unsigned Depth = 0) const; |
4320 | |
4321 | /// More limited version of SimplifyDemandedBits that can be used to "look |
4322 | /// through" ops that don't contribute to the DemandedBits/DemandedElts - |
4323 | /// bitwise ops etc. |
4324 | virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode( |
4325 | SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, |
4326 | SelectionDAG &DAG, unsigned Depth) const; |
4327 | |
4328 | /// Return true if this function can prove that \p Op is never poison |
4329 | /// and, if \p PoisonOnly is false, does not have undef bits. The DemandedElts |
4330 | /// argument limits the check to the requested vector elements. |
4331 | virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode( |
4332 | SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, |
4333 | bool PoisonOnly, unsigned Depth) const; |
4334 | |
4335 | /// Return true if Op can create undef or poison from non-undef & non-poison |
4336 | /// operands. The DemandedElts argument limits the check to the requested |
4337 | /// vector elements. |
4338 | virtual bool |
4339 | canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, |
4340 | const SelectionDAG &DAG, bool PoisonOnly, |
4341 | bool ConsiderFlags, unsigned Depth) const; |
4342 | |
4343 | /// Tries to build a legal vector shuffle using the provided parameters |
4344 | /// or equivalent variations. The Mask argument maybe be modified as the |
4345 | /// function tries different variations. |
4346 | /// Returns an empty SDValue if the operation fails. |
4347 | SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, |
4348 | SDValue N1, MutableArrayRef<int> Mask, |
4349 | SelectionDAG &DAG) const; |
4350 | |
4351 | /// This method returns the constant pool value that will be loaded by LD. |
4352 | /// NOTE: You must check for implicit extensions of the constant by LD. |
4353 | virtual const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const; |
4354 | |
4355 | /// If \p SNaN is false, \returns true if \p Op is known to never be any |
4356 | /// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling |
4357 | /// NaN. |
4358 | virtual bool isKnownNeverNaNForTargetNode(SDValue Op, |
4359 | const APInt &DemandedElts, |
4360 | const SelectionDAG &DAG, |
4361 | bool SNaN = false, |
4362 | unsigned Depth = 0) const; |
4363 | |
4364 | /// Return true if vector \p Op has the same value across all \p DemandedElts, |
4365 | /// indicating any elements which may be undef in the output \p UndefElts. |
4366 | virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, |
4367 | APInt &UndefElts, |
4368 | const SelectionDAG &DAG, |
4369 | unsigned Depth = 0) const; |
4370 | |
4371 | /// Returns true if the given Opc is considered a canonical constant for the |
4372 | /// target, which should not be transformed back into a BUILD_VECTOR. |
4373 | virtual bool isTargetCanonicalConstantNode(SDValue Op) const { |
4374 | return Op.getOpcode() == ISD::SPLAT_VECTOR || |
4375 | Op.getOpcode() == ISD::SPLAT_VECTOR_PARTS; |
4376 | } |
4377 | |
4378 | struct DAGCombinerInfo { |
4379 | void *DC; // The DAG Combiner object. |
4380 | CombineLevel Level; |
4381 | bool CalledByLegalizer; |
4382 | |
4383 | public: |
4384 | SelectionDAG &DAG; |
4385 | |
4386 | DAGCombinerInfo(SelectionDAG &dag, CombineLevel level, bool cl, void *dc) |
4387 | : DC(dc), Level(level), CalledByLegalizer(cl), DAG(dag) {} |
4388 | |
4389 | bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; } |
4390 | bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; } |
4391 | bool isAfterLegalizeDAG() const { return Level >= AfterLegalizeDAG; } |
4392 | CombineLevel getDAGCombineLevel() { return Level; } |
4393 | bool isCalledByLegalizer() const { return CalledByLegalizer; } |
4394 | |
4395 | LLVM_ABI void AddToWorklist(SDNode *N); |
4396 | LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef<SDValue> To, |
4397 | bool AddTo = true); |
4398 | LLVM_ABI SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true); |
4399 | LLVM_ABI SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, |
4400 | bool AddTo = true); |
4401 | |
4402 | LLVM_ABI bool recursivelyDeleteUnusedNodes(SDNode *N); |
4403 | |
4404 | LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO); |
4405 | }; |
4406 | |
4407 | /// Return if the N is a constant or constant vector equal to the true value |
4408 | /// from getBooleanContents(). |
4409 | bool isConstTrueVal(SDValue N) const; |
4410 | |
4411 | /// Return if the N is a constant or constant vector equal to the false value |
4412 | /// from getBooleanContents(). |
4413 | bool isConstFalseVal(SDValue N) const; |
4414 | |
4415 | /// Return if \p N is a True value when extended to \p VT. |
4416 | bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const; |
4417 | |
4418 | /// Try to simplify a setcc built with the specified operands and cc. If it is |
4419 | /// unable to simplify it, return a null SDValue. |
4420 | SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, |
4421 | bool foldBooleans, DAGCombinerInfo &DCI, |
4422 | const SDLoc &dl) const; |
4423 | |
4424 | // For targets which wrap address, unwrap for analysis. |
4425 | virtual SDValue unwrapAddress(SDValue N) const { return N; } |
4426 | |
4427 | /// Returns true (and the GlobalValue and the offset) if the node is a |
4428 | /// GlobalAddress + offset. |
4429 | virtual bool |
4430 | isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const; |
4431 | |
4432 | /// This method will be invoked for all target nodes and for any |
4433 | /// target-independent nodes that the target has registered with invoke it |
4434 | /// for. |
4435 | /// |
4436 | /// The semantics are as follows: |
4437 | /// Return Value: |
4438 | /// SDValue.Val == 0 - No change was made |
4439 | /// SDValue.Val == N - N was replaced, is dead, and is already handled. |
4440 | /// otherwise - N should be replaced by the returned Operand. |
4441 | /// |
4442 | /// In addition, methods provided by DAGCombinerInfo may be used to perform |
4443 | /// more complex transformations. |
4444 | /// |
4445 | virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
4446 | |
4447 | /// Return true if it is profitable to move this shift by a constant amount |
4448 | /// through its operand, adjusting any immediate operands as necessary to |
4449 | /// preserve semantics. This transformation may not be desirable if it |
4450 | /// disrupts a particularly auspicious target-specific tree (e.g. bitfield |
4451 | /// extraction in AArch64). By default, it returns true. |
4452 | /// |
4453 | /// @param N the shift node |
4454 | /// @param Level the current DAGCombine legalization level. |
4455 | virtual bool isDesirableToCommuteWithShift(const SDNode *N, |
4456 | CombineLevel Level) const { |
4457 | SDValue ShiftLHS = N->getOperand(Num: 0); |
4458 | if (!ShiftLHS->hasOneUse()) |
4459 | return false; |
4460 | if (ShiftLHS.getOpcode() == ISD::SIGN_EXTEND && |
4461 | !ShiftLHS.getOperand(i: 0)->hasOneUse()) |
4462 | return false; |
4463 | return true; |
4464 | } |
4465 | |
4466 | /// GlobalISel - return true if it is profitable to move this shift by a |
4467 | /// constant amount through its operand, adjusting any immediate operands as |
4468 | /// necessary to preserve semantics. This transformation may not be desirable |
4469 | /// if it disrupts a particularly auspicious target-specific tree (e.g. |
4470 | /// bitfield extraction in AArch64). By default, it returns true. |
4471 | /// |
4472 | /// @param MI the shift instruction |
4473 | /// @param IsAfterLegal true if running after legalization. |
4474 | virtual bool isDesirableToCommuteWithShift(const MachineInstr &MI, |
4475 | bool IsAfterLegal) const { |
4476 | return true; |
4477 | } |
4478 | |
4479 | /// GlobalISel - return true if it's profitable to perform the combine: |
4480 | /// shl ([sza]ext x), y => zext (shl x, y) |
4481 | virtual bool isDesirableToPullExtFromShl(const MachineInstr &MI) const { |
4482 | return true; |
4483 | } |
4484 | |
4485 | // Return AndOrSETCCFoldKind::{AddAnd, ABS} if its desirable to try and |
4486 | // optimize LogicOp(SETCC0, SETCC1). An example (what is implemented as of |
4487 | // writing this) is: |
4488 | // With C as a power of 2 and C != 0 and C != INT_MIN: |
4489 | // AddAnd: |
4490 | // (icmp eq A, C) | (icmp eq A, -C) |
4491 | // -> (icmp eq and(add(A, C), ~(C + C)), 0) |
4492 | // (icmp ne A, C) & (icmp ne A, -C)w |
4493 | // -> (icmp ne and(add(A, C), ~(C + C)), 0) |
4494 | // ABS: |
4495 | // (icmp eq A, C) | (icmp eq A, -C) |
4496 | // -> (icmp eq Abs(A), C) |
4497 | // (icmp ne A, C) & (icmp ne A, -C)w |
4498 | // -> (icmp ne Abs(A), C) |
4499 | // |
4500 | // @param LogicOp the logic op |
4501 | // @param SETCC0 the first of the SETCC nodes |
4502 | // @param SETCC0 the second of the SETCC nodes |
4503 | virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC( |
4504 | const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const { |
4505 | return AndOrSETCCFoldKind::None; |
4506 | } |
4507 | |
4508 | /// Return true if it is profitable to combine an XOR of a logical shift |
4509 | /// to create a logical shift of NOT. This transformation may not be desirable |
4510 | /// if it disrupts a particularly auspicious target-specific tree (e.g. |
4511 | /// BIC on ARM/AArch64). By default, it returns true. |
4512 | virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const { |
4513 | return true; |
4514 | } |
4515 | |
4516 | /// Return true if the target has native support for the specified value type |
4517 | /// and it is 'desirable' to use the type for the given node type. e.g. On x86 |
4518 | /// i16 is legal, but undesirable since i16 instruction encodings are longer |
4519 | /// and some i16 instructions are slow. |
4520 | virtual bool isTypeDesirableForOp(unsigned /*Opc*/, EVT VT) const { |
4521 | // By default, assume all legal types are desirable. |
4522 | return isTypeLegal(VT); |
4523 | } |
4524 | |
4525 | /// Return true if it is profitable for dag combiner to transform a floating |
4526 | /// point op of specified opcode to a equivalent op of an integer |
4527 | /// type. e.g. f32 load -> i32 load can be profitable on ARM. |
4528 | virtual bool isDesirableToTransformToIntegerOp(unsigned /*Opc*/, |
4529 | EVT /*VT*/) const { |
4530 | return false; |
4531 | } |
4532 | |
4533 | /// This method query the target whether it is beneficial for dag combiner to |
4534 | /// promote the specified node. If true, it should return the desired |
4535 | /// promotion type by reference. |
4536 | virtual bool IsDesirableToPromoteOp(SDValue /*Op*/, EVT &/*PVT*/) const { |
4537 | return false; |
4538 | } |
4539 | |
4540 | /// Return true if the target supports swifterror attribute. It optimizes |
4541 | /// loads and stores to reading and writing a specific register. |
4542 | virtual bool supportSwiftError() const { |
4543 | return false; |
4544 | } |
4545 | |
4546 | /// Return true if the target supports that a subset of CSRs for the given |
4547 | /// machine function is handled explicitly via copies. |
4548 | virtual bool supportSplitCSR(MachineFunction *MF) const { |
4549 | return false; |
4550 | } |
4551 | |
4552 | /// Return true if the target supports kcfi operand bundles. |
4553 | virtual bool supportKCFIBundles() const { return false; } |
4554 | |
4555 | /// Return true if the target supports ptrauth operand bundles. |
4556 | virtual bool supportPtrAuthBundles() const { return false; } |
4557 | |
4558 | /// Perform necessary initialization to handle a subset of CSRs explicitly |
4559 | /// via copies. This function is called at the beginning of instruction |
4560 | /// selection. |
4561 | virtual void initializeSplitCSR(MachineBasicBlock *Entry) const { |
4562 | llvm_unreachable("Not Implemented"); |
4563 | } |
4564 | |
4565 | /// Insert explicit copies in entry and exit blocks. We copy a subset of |
4566 | /// CSRs to virtual registers in the entry block, and copy them back to |
4567 | /// physical registers in the exit blocks. This function is called at the end |
4568 | /// of instruction selection. |
4569 | virtual void insertCopiesSplitCSR( |
4570 | MachineBasicBlock *Entry, |
4571 | const SmallVectorImpl<MachineBasicBlock *> &Exits) const { |
4572 | llvm_unreachable("Not Implemented"); |
4573 | } |
4574 | |
4575 | /// Return the newly negated expression if the cost is not expensive and |
4576 | /// set the cost in \p Cost to indicate that if it is cheaper or neutral to |
4577 | /// do the negation. |
4578 | virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, |
4579 | bool LegalOps, bool OptForSize, |
4580 | NegatibleCost &Cost, |
4581 | unsigned Depth = 0) const; |
4582 | |
4583 | SDValue getCheaperOrNeutralNegatedExpression( |
4584 | SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, |
4585 | const NegatibleCost CostThreshold = NegatibleCost::Neutral, |
4586 | unsigned Depth = 0) const { |
4587 | NegatibleCost Cost = NegatibleCost::Expensive; |
4588 | SDValue Neg = |
4589 | getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth); |
4590 | if (!Neg) |
4591 | return SDValue(); |
4592 | |
4593 | if (Cost <= CostThreshold) |
4594 | return Neg; |
4595 | |
4596 | // Remove the new created node to avoid the side effect to the DAG. |
4597 | if (Neg->use_empty()) |
4598 | DAG.RemoveDeadNode(N: Neg.getNode()); |
4599 | return SDValue(); |
4600 | } |
4601 | |
4602 | /// This is the helper function to return the newly negated expression only |
4603 | /// when the cost is cheaper. |
4604 | SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, |
4605 | bool LegalOps, bool OptForSize, |
4606 | unsigned Depth = 0) const { |
4607 | return getCheaperOrNeutralNegatedExpression(Op, DAG, LegalOps, OptForSize, |
4608 | CostThreshold: NegatibleCost::Cheaper, Depth); |
4609 | } |
4610 | |
4611 | /// This is the helper function to return the newly negated expression if |
4612 | /// the cost is not expensive. |
4613 | SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, |
4614 | bool OptForSize, unsigned Depth = 0) const { |
4615 | NegatibleCost Cost = NegatibleCost::Expensive; |
4616 | return getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth); |
4617 | } |
4618 | |
4619 | //===--------------------------------------------------------------------===// |
4620 | // Lowering methods - These methods must be implemented by targets so that |
4621 | // the SelectionDAGBuilder code knows how to lower these. |
4622 | // |
4623 | |
4624 | /// Target-specific splitting of values into parts that fit a register |
4625 | /// storing a legal type |
4626 | virtual bool splitValueIntoRegisterParts( |
4627 | SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, |
4628 | unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { |
4629 | return false; |
4630 | } |
4631 | |
4632 | /// Allows the target to handle physreg-carried dependency |
4633 | /// in target-specific way. Used from the ScheduleDAGSDNodes to decide whether |
4634 | /// to add the edge to the dependency graph. |
4635 | /// Def - input: Selection DAG node defininfg physical register |
4636 | /// User - input: Selection DAG node using physical register |
4637 | /// Op - input: Number of User operand |
4638 | /// PhysReg - inout: set to the physical register if the edge is |
4639 | /// necessary, unchanged otherwise |
4640 | /// Cost - inout: physical register copy cost. |
4641 | /// Returns 'true' is the edge is necessary, 'false' otherwise |
4642 | virtual bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, |
4643 | const TargetRegisterInfo *TRI, |
4644 | const TargetInstrInfo *TII, |
4645 | MCRegister &PhysReg, int &Cost) const { |
4646 | return false; |
4647 | } |
4648 | |
4649 | /// Target-specific combining of register parts into its original value |
4650 | virtual SDValue |
4651 | joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, |
4652 | const SDValue *Parts, unsigned NumParts, |
4653 | MVT PartVT, EVT ValueVT, |
4654 | std::optional<CallingConv::ID> CC) const { |
4655 | return SDValue(); |
4656 | } |
4657 | |
4658 | /// This hook must be implemented to lower the incoming (formal) arguments, |
4659 | /// described by the Ins array, into the specified DAG. The implementation |
4660 | /// should fill in the InVals array with legal-type argument values, and |
4661 | /// return the resulting token chain value. |
4662 | virtual SDValue LowerFormalArguments( |
4663 | SDValue /*Chain*/, CallingConv::ID /*CallConv*/, bool /*isVarArg*/, |
4664 | const SmallVectorImpl<ISD::InputArg> & /*Ins*/, const SDLoc & /*dl*/, |
4665 | SelectionDAG & /*DAG*/, SmallVectorImpl<SDValue> & /*InVals*/) const { |
4666 | llvm_unreachable("Not Implemented"); |
4667 | } |
4668 | |
4669 | virtual void markLibCallAttributes(MachineFunction *MF, unsigned CC, |
4670 | ArgListTy &Args) const {} |
4671 | |
4672 | /// This structure contains the information necessary for lowering |
4673 | /// pointer-authenticating indirect calls. It is equivalent to the "ptrauth" |
4674 | /// operand bundle found on the call instruction, if any. |
4675 | struct PtrAuthInfo { |
4676 | uint64_t Key; |
4677 | SDValue Discriminator; |
4678 | }; |
4679 | |
4680 | /// This structure contains all information that is necessary for lowering |
4681 | /// calls. It is passed to TLI::LowerCallTo when the SelectionDAG builder |
4682 | /// needs to lower a call, and targets will see this struct in their LowerCall |
4683 | /// implementation. |
4684 | struct CallLoweringInfo { |
4685 | SDValue Chain; |
4686 | Type *RetTy = nullptr; |
4687 | bool RetSExt : 1; |
4688 | bool RetZExt : 1; |
4689 | bool IsVarArg : 1; |
4690 | bool IsInReg : 1; |
4691 | bool DoesNotReturn : 1; |
4692 | bool IsReturnValueUsed : 1; |
4693 | bool IsConvergent : 1; |
4694 | bool IsPatchPoint : 1; |
4695 | bool IsPreallocated : 1; |
4696 | bool NoMerge : 1; |
4697 | |
4698 | // IsTailCall should be modified by implementations of |
4699 | // TargetLowering::LowerCall that perform tail call conversions. |
4700 | bool IsTailCall = false; |
4701 | |
4702 | // Is Call lowering done post SelectionDAG type legalization. |
4703 | bool IsPostTypeLegalization = false; |
4704 | |
4705 | unsigned NumFixedArgs = -1; |
4706 | CallingConv::ID CallConv = CallingConv::C; |
4707 | SDValue Callee; |
4708 | ArgListTy Args; |
4709 | SelectionDAG &DAG; |
4710 | SDLoc DL; |
4711 | const CallBase *CB = nullptr; |
4712 | SmallVector<ISD::OutputArg, 32> Outs; |
4713 | SmallVector<SDValue, 32> OutVals; |
4714 | SmallVector<ISD::InputArg, 32> Ins; |
4715 | SmallVector<SDValue, 4> InVals; |
4716 | const ConstantInt *CFIType = nullptr; |
4717 | SDValue ConvergenceControlToken; |
4718 | |
4719 | std::optional<PtrAuthInfo> PAI; |
4720 | |
4721 | CallLoweringInfo(SelectionDAG &DAG) |
4722 | : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false), |
4723 | DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false), |
4724 | IsPatchPoint(false), IsPreallocated(false), NoMerge(false), |
4725 | DAG(DAG) {} |
4726 | |
4727 | CallLoweringInfo &setDebugLoc(const SDLoc &dl) { |
4728 | DL = dl; |
4729 | return *this; |
4730 | } |
4731 | |
4732 | CallLoweringInfo &setChain(SDValue InChain) { |
4733 | Chain = InChain; |
4734 | return *this; |
4735 | } |
4736 | |
4737 | // setCallee with target/module-specific attributes |
4738 | CallLoweringInfo &setLibCallee(CallingConv::ID CC, Type *ResultType, |
4739 | SDValue Target, ArgListTy &&ArgsList) { |
4740 | RetTy = ResultType; |
4741 | Callee = Target; |
4742 | CallConv = CC; |
4743 | NumFixedArgs = ArgsList.size(); |
4744 | Args = std::move(ArgsList); |
4745 | |
4746 | DAG.getTargetLoweringInfo().markLibCallAttributes( |
4747 | MF: &(DAG.getMachineFunction()), CC, Args); |
4748 | return *this; |
4749 | } |
4750 | |
4751 | CallLoweringInfo &setCallee(CallingConv::ID CC, Type *ResultType, |
4752 | SDValue Target, ArgListTy &&ArgsList, |
4753 | AttributeSet ResultAttrs = {}) { |
4754 | RetTy = ResultType; |
4755 | IsInReg = ResultAttrs.hasAttribute(Attribute::InReg); |
4756 | RetSExt = ResultAttrs.hasAttribute(Attribute::SExt); |
4757 | RetZExt = ResultAttrs.hasAttribute(Attribute::ZExt); |
4758 | NoMerge = ResultAttrs.hasAttribute(Attribute::NoMerge); |
4759 | |
4760 | Callee = Target; |
4761 | CallConv = CC; |
4762 | NumFixedArgs = ArgsList.size(); |
4763 | Args = std::move(ArgsList); |
4764 | return *this; |
4765 | } |
4766 | |
4767 | CallLoweringInfo &setCallee(Type *ResultType, FunctionType *FTy, |
4768 | SDValue Target, ArgListTy &&ArgsList, |
4769 | const CallBase &Call) { |
4770 | RetTy = ResultType; |
4771 | |
4772 | IsInReg = Call.hasRetAttr(Attribute::InReg); |
4773 | DoesNotReturn = |
4774 | Call.doesNotReturn() || |
4775 | (!isa<InvokeInst>(Val: Call) && isa<UnreachableInst>(Val: Call.getNextNode())); |
4776 | IsVarArg = FTy->isVarArg(); |
4777 | IsReturnValueUsed = !Call.use_empty(); |
4778 | RetSExt = Call.hasRetAttr(Attribute::SExt); |
4779 | RetZExt = Call.hasRetAttr(Attribute::ZExt); |
4780 | NoMerge = Call.hasFnAttr(Attribute::NoMerge); |
4781 | |
4782 | Callee = Target; |
4783 | |
4784 | CallConv = Call.getCallingConv(); |
4785 | NumFixedArgs = FTy->getNumParams(); |
4786 | Args = std::move(ArgsList); |
4787 | |
4788 | CB = &Call; |
4789 | |
4790 | return *this; |
4791 | } |
4792 | |
4793 | CallLoweringInfo &setInRegister(bool Value = true) { |
4794 | IsInReg = Value; |
4795 | return *this; |
4796 | } |
4797 | |
4798 | CallLoweringInfo &setNoReturn(bool Value = true) { |
4799 | DoesNotReturn = Value; |
4800 | return *this; |
4801 | } |
4802 | |
4803 | CallLoweringInfo &setVarArg(bool Value = true) { |
4804 | IsVarArg = Value; |
4805 | return *this; |
4806 | } |
4807 | |
4808 | CallLoweringInfo &setTailCall(bool Value = true) { |
4809 | IsTailCall = Value; |
4810 | return *this; |
4811 | } |
4812 | |
4813 | CallLoweringInfo &setDiscardResult(bool Value = true) { |
4814 | IsReturnValueUsed = !Value; |
4815 | return *this; |
4816 | } |
4817 | |
4818 | CallLoweringInfo &setConvergent(bool Value = true) { |
4819 | IsConvergent = Value; |
4820 | return *this; |
4821 | } |
4822 | |
4823 | CallLoweringInfo &setSExtResult(bool Value = true) { |
4824 | RetSExt = Value; |
4825 | return *this; |
4826 | } |
4827 | |
4828 | CallLoweringInfo &setZExtResult(bool Value = true) { |
4829 | RetZExt = Value; |
4830 | return *this; |
4831 | } |
4832 | |
4833 | CallLoweringInfo &setIsPatchPoint(bool Value = true) { |
4834 | IsPatchPoint = Value; |
4835 | return *this; |
4836 | } |
4837 | |
4838 | CallLoweringInfo &setIsPreallocated(bool Value = true) { |
4839 | IsPreallocated = Value; |
4840 | return *this; |
4841 | } |
4842 | |
4843 | CallLoweringInfo &setPtrAuth(PtrAuthInfo Value) { |
4844 | PAI = Value; |
4845 | return *this; |
4846 | } |
4847 | |
4848 | CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) { |
4849 | IsPostTypeLegalization = Value; |
4850 | return *this; |
4851 | } |
4852 | |
4853 | CallLoweringInfo &setCFIType(const ConstantInt *Type) { |
4854 | CFIType = Type; |
4855 | return *this; |
4856 | } |
4857 | |
4858 | CallLoweringInfo &setConvergenceControlToken(SDValue Token) { |
4859 | ConvergenceControlToken = Token; |
4860 | return *this; |
4861 | } |
4862 | |
4863 | ArgListTy &getArgs() { |
4864 | return Args; |
4865 | } |
4866 | }; |
4867 | |
4868 | /// This structure is used to pass arguments to makeLibCall function. |
4869 | struct MakeLibCallOptions { |
4870 | // By passing type list before soften to makeLibCall, the target hook |
4871 | // shouldExtendTypeInLibCall can get the original type before soften. |
4872 | ArrayRef<EVT> OpsVTBeforeSoften; |
4873 | EVT RetVTBeforeSoften; |
4874 | ArrayRef<Type *> OpsTypeOverrides; |
4875 | |
4876 | bool IsSigned : 1; |
4877 | bool DoesNotReturn : 1; |
4878 | bool IsReturnValueUsed : 1; |
4879 | bool IsPostTypeLegalization : 1; |
4880 | bool IsSoften : 1; |
4881 | |
4882 | MakeLibCallOptions() |
4883 | : IsSigned(false), DoesNotReturn(false), IsReturnValueUsed(true), |
4884 | IsPostTypeLegalization(false), IsSoften(false) {} |
4885 | |
4886 | MakeLibCallOptions &setIsSigned(bool Value = true) { |
4887 | IsSigned = Value; |
4888 | return *this; |
4889 | } |
4890 | |
4891 | MakeLibCallOptions &setNoReturn(bool Value = true) { |
4892 | DoesNotReturn = Value; |
4893 | return *this; |
4894 | } |
4895 | |
4896 | MakeLibCallOptions &setDiscardResult(bool Value = true) { |
4897 | IsReturnValueUsed = !Value; |
4898 | return *this; |
4899 | } |
4900 | |
4901 | MakeLibCallOptions &setIsPostTypeLegalization(bool Value = true) { |
4902 | IsPostTypeLegalization = Value; |
4903 | return *this; |
4904 | } |
4905 | |
4906 | MakeLibCallOptions &setTypeListBeforeSoften(ArrayRef<EVT> OpsVT, EVT RetVT, |
4907 | bool Value = true) { |
4908 | OpsVTBeforeSoften = OpsVT; |
4909 | RetVTBeforeSoften = RetVT; |
4910 | IsSoften = Value; |
4911 | return *this; |
4912 | } |
4913 | |
4914 | /// Override the argument type for an operand. Leave the type as null to use |
4915 | /// the type from the operand's node. |
4916 | MakeLibCallOptions &setOpsTypeOverrides(ArrayRef<Type *> OpsTypes) { |
4917 | OpsTypeOverrides = OpsTypes; |
4918 | return *this; |
4919 | } |
4920 | }; |
4921 | |
4922 | /// This function lowers an abstract call to a function into an actual call. |
4923 | /// This returns a pair of operands. The first element is the return value |
4924 | /// for the function (if RetTy is not VoidTy). The second element is the |
4925 | /// outgoing token chain. It calls LowerCall to do the actual lowering. |
4926 | std::pair<SDValue, SDValue> LowerCallTo(CallLoweringInfo &CLI) const; |
4927 | |
4928 | /// This hook must be implemented to lower calls into the specified |
4929 | /// DAG. The outgoing arguments to the call are described by the Outs array, |
4930 | /// and the values to be returned by the call are described by the Ins |
4931 | /// array. The implementation should fill in the InVals array with legal-type |
4932 | /// return values from the call, and return the resulting token chain value. |
4933 | virtual SDValue |
4934 | LowerCall(CallLoweringInfo &/*CLI*/, |
4935 | SmallVectorImpl<SDValue> &/*InVals*/) const { |
4936 | llvm_unreachable("Not Implemented"); |
4937 | } |
4938 | |
4939 | /// Target-specific cleanup for formal ByVal parameters. |
4940 | virtual void HandleByVal(CCState *, unsigned &, Align) const {} |
4941 | |
4942 | /// This hook should be implemented to check whether the return values |
4943 | /// described by the Outs array can fit into the return registers. If false |
4944 | /// is returned, an sret-demotion is performed. |
4945 | virtual bool CanLowerReturn(CallingConv::ID /*CallConv*/, |
4946 | MachineFunction &/*MF*/, bool /*isVarArg*/, |
4947 | const SmallVectorImpl<ISD::OutputArg> &/*Outs*/, |
4948 | LLVMContext &/*Context*/, const Type *RetTy) const |
4949 | { |
4950 | // Return true by default to get preexisting behavior. |
4951 | return true; |
4952 | } |
4953 | |
4954 | /// This hook must be implemented to lower outgoing return values, described |
4955 | /// by the Outs array, into the specified DAG. The implementation should |
4956 | /// return the resulting token chain value. |
4957 | virtual SDValue LowerReturn(SDValue /*Chain*/, CallingConv::ID /*CallConv*/, |
4958 | bool /*isVarArg*/, |
4959 | const SmallVectorImpl<ISD::OutputArg> & /*Outs*/, |
4960 | const SmallVectorImpl<SDValue> & /*OutVals*/, |
4961 | const SDLoc & /*dl*/, |
4962 | SelectionDAG & /*DAG*/) const { |
4963 | llvm_unreachable("Not Implemented"); |
4964 | } |
4965 | |
4966 | /// Return true if result of the specified node is used by a return node |
4967 | /// only. It also compute and return the input chain for the tail call. |
4968 | /// |
4969 | /// This is used to determine whether it is possible to codegen a libcall as |
4970 | /// tail call at legalization time. |
4971 | virtual bool isUsedByReturnOnly(SDNode *, SDValue &/*Chain*/) const { |
4972 | return false; |
4973 | } |
4974 | |
4975 | /// Return true if the target may be able emit the call instruction as a tail |
4976 | /// call. This is used by optimization passes to determine if it's profitable |
4977 | /// to duplicate return instructions to enable tailcall optimization. |
4978 | virtual bool mayBeEmittedAsTailCall(const CallInst *) const { |
4979 | return false; |
4980 | } |
4981 | |
4982 | /// Return the register ID of the name passed in. Used by named register |
4983 | /// global variables extension. There is no target-independent behaviour |
4984 | /// so the default action is to bail. |
4985 | virtual Register getRegisterByName(const char* RegName, LLT Ty, |
4986 | const MachineFunction &MF) const { |
4987 | report_fatal_error(reason: "Named registers not implemented for this target"); |
4988 | } |
4989 | |
4990 | /// Return the type that should be used to zero or sign extend a |
4991 | /// zeroext/signext integer return value. FIXME: Some C calling conventions |
4992 | /// require the return type to be promoted, but this is not true all the time, |
4993 | /// e.g. i1/i8/i16 on x86/x86_64. It is also not necessary for non-C calling |
4994 | /// conventions. The frontend should handle this and include all of the |
4995 | /// necessary information. |
4996 | virtual EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, |
4997 | ISD::NodeType /*ExtendKind*/) const { |
4998 | EVT MinVT = getRegisterType(MVT::i32); |
4999 | return VT.bitsLT(VT: MinVT) ? MinVT : VT; |
5000 | } |
5001 | |
5002 | /// For some targets, an LLVM struct type must be broken down into multiple |
5003 | /// simple types, but the calling convention specifies that the entire struct |
5004 | /// must be passed in a block of consecutive registers. |
5005 | virtual bool |
5006 | functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, |
5007 | bool isVarArg, |
5008 | const DataLayout &DL) const { |
5009 | return false; |
5010 | } |
5011 | |
5012 | /// For most targets, an LLVM type must be broken down into multiple |
5013 | /// smaller types. Usually the halves are ordered according to the endianness |
5014 | /// but for some platform that would break. So this method will default to |
5015 | /// matching the endianness but can be overridden. |
5016 | virtual bool |
5017 | shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const { |
5018 | return DL.isLittleEndian(); |
5019 | } |
5020 | |
5021 | /// Returns a 0 terminated array of registers that can be safely used as |
5022 | /// scratch registers. |
5023 | virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const { |
5024 | return nullptr; |
5025 | } |
5026 | |
5027 | /// Returns a 0 terminated array of rounding control registers that can be |
5028 | /// attached into strict FP call. |
5029 | virtual ArrayRef<MCPhysReg> getRoundingControlRegisters() const { |
5030 | return ArrayRef<MCPhysReg>(); |
5031 | } |
5032 | |
5033 | /// This callback is used to prepare for a volatile or atomic load. |
5034 | /// It takes a chain node as input and returns the chain for the load itself. |
5035 | /// |
5036 | /// Having a callback like this is necessary for targets like SystemZ, |
5037 | /// which allows a CPU to reuse the result of a previous load indefinitely, |
5038 | /// even if a cache-coherent store is performed by another CPU. The default |
5039 | /// implementation does nothing. |
5040 | virtual SDValue prepareVolatileOrAtomicLoad(SDValue Chain, const SDLoc &DL, |
5041 | SelectionDAG &DAG) const { |
5042 | return Chain; |
5043 | } |
5044 | |
5045 | /// This callback is invoked by the type legalizer to legalize nodes with an |
5046 | /// illegal operand type but legal result types. It replaces the |
5047 | /// LowerOperation callback in the type Legalizer. The reason we can not do |
5048 | /// away with LowerOperation entirely is that LegalizeDAG isn't yet ready to |
5049 | /// use this callback. |
5050 | /// |
5051 | /// TODO: Consider merging with ReplaceNodeResults. |
5052 | /// |
5053 | /// The target places new result values for the node in Results (their number |
5054 | /// and types must exactly match those of the original return values of |
5055 | /// the node), or leaves Results empty, which indicates that the node is not |
5056 | /// to be custom lowered after all. |
5057 | /// The default implementation calls LowerOperation. |
5058 | virtual void LowerOperationWrapper(SDNode *N, |
5059 | SmallVectorImpl<SDValue> &Results, |
5060 | SelectionDAG &DAG) const; |
5061 | |
5062 | /// This callback is invoked for operations that are unsupported by the |
5063 | /// target, which are registered to use 'custom' lowering, and whose defined |
5064 | /// values are all legal. If the target has no operations that require custom |
5065 | /// lowering, it need not implement this. The default implementation of this |
5066 | /// aborts. |
5067 | virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; |
5068 | |
5069 | /// This callback is invoked when a node result type is illegal for the |
5070 | /// target, and the operation was registered to use 'custom' lowering for that |
5071 | /// result type. The target places new result values for the node in Results |
5072 | /// (their number and types must exactly match those of the original return |
5073 | /// values of the node), or leaves Results empty, which indicates that the |
5074 | /// node is not to be custom lowered after all. |
5075 | /// |
5076 | /// If the target has no operations that require custom lowering, it need not |
5077 | /// implement this. The default implementation aborts. |
5078 | virtual void ReplaceNodeResults(SDNode * /*N*/, |
5079 | SmallVectorImpl<SDValue> &/*Results*/, |
5080 | SelectionDAG &/*DAG*/) const { |
5081 | llvm_unreachable("ReplaceNodeResults not implemented for this target!"); |
5082 | } |
5083 | |
5084 | /// This method returns the name of a target specific DAG node. |
5085 | virtual const char *getTargetNodeName(unsigned Opcode) const; |
5086 | |
5087 | /// This method returns a target specific FastISel object, or null if the |
5088 | /// target does not support "fast" ISel. |
5089 | virtual FastISel *createFastISel(FunctionLoweringInfo &, |
5090 | const TargetLibraryInfo *) const { |
5091 | return nullptr; |
5092 | } |
5093 | |
5094 | bool verifyReturnAddressArgumentIsConstant(SDValue Op, |
5095 | SelectionDAG &DAG) const; |
5096 | |
5097 | //===--------------------------------------------------------------------===// |
5098 | // Inline Asm Support hooks |
5099 | // |
5100 | |
5101 | /// This hook allows the target to expand an inline asm call to be explicit |
5102 | /// llvm code if it wants to. This is useful for turning simple inline asms |
5103 | /// into LLVM intrinsics, which gives the compiler more information about the |
5104 | /// behavior of the code. |
5105 | virtual bool ExpandInlineAsm(CallInst *) const { |
5106 | return false; |
5107 | } |
5108 | |
5109 | enum ConstraintType { |
5110 | C_Register, // Constraint represents specific register(s). |
5111 | C_RegisterClass, // Constraint represents any of register(s) in class. |
5112 | C_Memory, // Memory constraint. |
5113 | C_Address, // Address constraint. |
5114 | C_Immediate, // Requires an immediate. |
5115 | C_Other, // Something else. |
5116 | C_Unknown // Unsupported constraint. |
5117 | }; |
5118 | |
5119 | enum ConstraintWeight { |
5120 | // Generic weights. |
5121 | CW_Invalid = -1, // No match. |
5122 | CW_Okay = 0, // Acceptable. |
5123 | CW_Good = 1, // Good weight. |
5124 | CW_Better = 2, // Better weight. |
5125 | CW_Best = 3, // Best weight. |
5126 | |
5127 | // Well-known weights. |
5128 | CW_SpecificReg = CW_Okay, // Specific register operands. |
5129 | CW_Register = CW_Good, // Register operands. |
5130 | CW_Memory = CW_Better, // Memory operands. |
5131 | CW_Constant = CW_Best, // Constant operand. |
5132 | CW_Default = CW_Okay // Default or don't know type. |
5133 | }; |
5134 | |
5135 | /// This contains information for each constraint that we are lowering. |
5136 | struct AsmOperandInfo : public InlineAsm::ConstraintInfo { |
5137 | /// This contains the actual string for the code, like "m". TargetLowering |
5138 | /// picks the 'best' code from ConstraintInfo::Codes that most closely |
5139 | /// matches the operand. |
5140 | std::string ConstraintCode; |
5141 | |
5142 | /// Information about the constraint code, e.g. Register, RegisterClass, |
5143 | /// Memory, Other, Unknown. |
5144 | TargetLowering::ConstraintType ConstraintType = TargetLowering::C_Unknown; |
5145 | |
5146 | /// If this is the result output operand or a clobber, this is null, |
5147 | /// otherwise it is the incoming operand to the CallInst. This gets |
5148 | /// modified as the asm is processed. |
5149 | Value *CallOperandVal = nullptr; |
5150 | |
5151 | /// The ValueType for the operand value. |
5152 | MVT ConstraintVT = MVT::Other; |
5153 | |
5154 | /// Copy constructor for copying from a ConstraintInfo. |
5155 | AsmOperandInfo(InlineAsm::ConstraintInfo Info) |
5156 | : InlineAsm::ConstraintInfo(std::move(Info)) {} |
5157 | |
5158 | /// Return true of this is an input operand that is a matching constraint |
5159 | /// like "4". |
5160 | LLVM_ABI bool isMatchingInputConstraint() const; |
5161 | |
5162 | /// If this is an input matching constraint, this method returns the output |
5163 | /// operand it matches. |
5164 | LLVM_ABI unsigned getMatchedOperand() const; |
5165 | }; |
5166 | |
5167 | using AsmOperandInfoVector = std::vector<AsmOperandInfo>; |
5168 | |
5169 | /// Split up the constraint string from the inline assembly value into the |
5170 | /// specific constraints and their prefixes, and also tie in the associated |
5171 | /// operand values. If this returns an empty vector, and if the constraint |
5172 | /// string itself isn't empty, there was an error parsing. |
5173 | virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, |
5174 | const TargetRegisterInfo *TRI, |
5175 | const CallBase &Call) const; |
5176 | |
5177 | /// Examine constraint type and operand type and determine a weight value. |
5178 | /// The operand object must already have been set up with the operand type. |
5179 | virtual ConstraintWeight getMultipleConstraintMatchWeight( |
5180 | AsmOperandInfo &info, int maIndex) const; |
5181 | |
5182 | /// Examine constraint string and operand type and determine a weight value. |
5183 | /// The operand object must already have been set up with the operand type. |
5184 | virtual ConstraintWeight getSingleConstraintMatchWeight( |
5185 | AsmOperandInfo &info, const char *constraint) const; |
5186 | |
5187 | /// Determines the constraint code and constraint type to use for the specific |
5188 | /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType. |
5189 | /// If the actual operand being passed in is available, it can be passed in as |
5190 | /// Op, otherwise an empty SDValue can be passed. |
5191 | virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, |
5192 | SDValue Op, |
5193 | SelectionDAG *DAG = nullptr) const; |
5194 | |
5195 | /// Given a constraint, return the type of constraint it is for this target. |
5196 | virtual ConstraintType getConstraintType(StringRef Constraint) const; |
5197 | |
5198 | using ConstraintPair = std::pair<StringRef, TargetLowering::ConstraintType>; |
5199 | using ConstraintGroup = SmallVector<ConstraintPair>; |
5200 | /// Given an OpInfo with list of constraints codes as strings, return a |
5201 | /// sorted Vector of pairs of constraint codes and their types in priority of |
5202 | /// what we'd prefer to lower them as. This may contain immediates that |
5203 | /// cannot be lowered, but it is meant to be a machine agnostic order of |
5204 | /// preferences. |
5205 | ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const; |
5206 | |
5207 | /// Given a physical register constraint (e.g. {edx}), return the register |
5208 | /// number and the register class for the register. |
5209 | /// |
5210 | /// Given a register class constraint, like 'r', if this corresponds directly |
5211 | /// to an LLVM register class, return a register of 0 and the register class |
5212 | /// pointer. |
5213 | /// |
5214 | /// This should only be used for C_Register constraints. On error, this |
5215 | /// returns a register number of 0 and a null register class pointer. |
5216 | virtual std::pair<unsigned, const TargetRegisterClass *> |
5217 | getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
5218 | StringRef Constraint, MVT VT) const; |
5219 | |
5220 | virtual InlineAsm::ConstraintCode |
5221 | getInlineAsmMemConstraint(StringRef ConstraintCode) const { |
5222 | if (ConstraintCode == "m") |
5223 | return InlineAsm::ConstraintCode::m; |
5224 | if (ConstraintCode == "o") |
5225 | return InlineAsm::ConstraintCode::o; |
5226 | if (ConstraintCode == "X") |
5227 | return InlineAsm::ConstraintCode::X; |
5228 | if (ConstraintCode == "p") |
5229 | return InlineAsm::ConstraintCode::p; |
5230 | return InlineAsm::ConstraintCode::Unknown; |
5231 | } |
5232 | |
5233 | /// Try to replace an X constraint, which matches anything, with another that |
5234 | /// has more specific requirements based on the type of the corresponding |
5235 | /// operand. This returns null if there is no replacement to make. |
5236 | virtual const char *LowerXConstraint(EVT ConstraintVT) const; |
5237 | |
5238 | /// Lower the specified operand into the Ops vector. If it is invalid, don't |
5239 | /// add anything to Ops. |
5240 | virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, |
5241 | std::vector<SDValue> &Ops, |
5242 | SelectionDAG &DAG) const; |
5243 | |
5244 | // Lower custom output constraints. If invalid, return SDValue(). |
5245 | virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, |
5246 | const SDLoc &DL, |
5247 | const AsmOperandInfo &OpInfo, |
5248 | SelectionDAG &DAG) const; |
5249 | |
5250 | // Targets may override this function to collect operands from the CallInst |
5251 | // and for example, lower them into the SelectionDAG operands. |
5252 | virtual void CollectTargetIntrinsicOperands(const CallInst &I, |
5253 | SmallVectorImpl<SDValue> &Ops, |
5254 | SelectionDAG &DAG) const; |
5255 | |
5256 | //===--------------------------------------------------------------------===// |
5257 | // Div utility functions |
5258 | // |
5259 | |
5260 | SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, |
5261 | bool IsAfterLegalTypes, |
5262 | SmallVectorImpl<SDNode *> &Created) const; |
5263 | SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, |
5264 | bool IsAfterLegalTypes, |
5265 | SmallVectorImpl<SDNode *> &Created) const; |
5266 | // Build sdiv by power-of-2 with conditional move instructions |
5267 | SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, |
5268 | SelectionDAG &DAG, |
5269 | SmallVectorImpl<SDNode *> &Created) const; |
5270 | |
5271 | /// Targets may override this function to provide custom SDIV lowering for |
5272 | /// power-of-2 denominators. If the target returns an empty SDValue, LLVM |
5273 | /// assumes SDIV is expensive and replaces it with a series of other integer |
5274 | /// operations. |
5275 | virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, |
5276 | SelectionDAG &DAG, |
5277 | SmallVectorImpl<SDNode *> &Created) const; |
5278 | |
5279 | /// Targets may override this function to provide custom SREM lowering for |
5280 | /// power-of-2 denominators. If the target returns an empty SDValue, LLVM |
5281 | /// assumes SREM is expensive and replaces it with a series of other integer |
5282 | /// operations. |
5283 | virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, |
5284 | SelectionDAG &DAG, |
5285 | SmallVectorImpl<SDNode *> &Created) const; |
5286 | |
5287 | /// Indicate whether this target prefers to combine FDIVs with the same |
5288 | /// divisor. If the transform should never be done, return zero. If the |
5289 | /// transform should be done, return the minimum number of divisor uses |
5290 | /// that must exist. |
5291 | virtual unsigned combineRepeatedFPDivisors() const { |
5292 | return 0; |
5293 | } |
5294 | |
5295 | /// Hooks for building estimates in place of slower divisions and square |
5296 | /// roots. |
5297 | |
5298 | /// Return either a square root or its reciprocal estimate value for the input |
5299 | /// operand. |
5300 | /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or |
5301 | /// 'Enabled' as set by a potential default override attribute. |
5302 | /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson |
5303 | /// refinement iterations required to generate a sufficient (though not |
5304 | /// necessarily IEEE-754 compliant) estimate is returned in that parameter. |
5305 | /// The boolean UseOneConstNR output is used to select a Newton-Raphson |
5306 | /// algorithm implementation that uses either one or two constants. |
5307 | /// The boolean Reciprocal is used to select whether the estimate is for the |
5308 | /// square root of the input operand or the reciprocal of its square root. |
5309 | /// A target may choose to implement its own refinement within this function. |
5310 | /// If that's true, then return '0' as the number of RefinementSteps to avoid |
5311 | /// any further refinement of the estimate. |
5312 | /// An empty SDValue return means no estimate sequence can be created. |
5313 | virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, |
5314 | int Enabled, int &RefinementSteps, |
5315 | bool &UseOneConstNR, bool Reciprocal) const { |
5316 | return SDValue(); |
5317 | } |
5318 | |
5319 | /// Try to convert the fminnum/fmaxnum to a compare/select sequence. This is |
5320 | /// required for correctness since InstCombine might have canonicalized a |
5321 | /// fcmp+select sequence to a FMINNUM/FMAXNUM intrinsic. If we were to fall |
5322 | /// through to the default expansion/soften to libcall, we might introduce a |
5323 | /// link-time dependency on libm into a file that originally did not have one. |
5324 | SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const; |
5325 | |
5326 | /// Return a reciprocal estimate value for the input operand. |
5327 | /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or |
5328 | /// 'Enabled' as set by a potential default override attribute. |
5329 | /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson |
5330 | /// refinement iterations required to generate a sufficient (though not |
5331 | /// necessarily IEEE-754 compliant) estimate is returned in that parameter. |
5332 | /// A target may choose to implement its own refinement within this function. |
5333 | /// If that's true, then return '0' as the number of RefinementSteps to avoid |
5334 | /// any further refinement of the estimate. |
5335 | /// An empty SDValue return means no estimate sequence can be created. |
5336 | virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, |
5337 | int Enabled, int &RefinementSteps) const { |
5338 | return SDValue(); |
5339 | } |
5340 | |
5341 | /// Return a target-dependent comparison result if the input operand is |
5342 | /// suitable for use with a square root estimate calculation. For example, the |
5343 | /// comparison may check if the operand is NAN, INF, zero, normal, etc. The |
5344 | /// result should be used as the condition operand for a select or branch. |
5345 | virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, |
5346 | const DenormalMode &Mode) const; |
5347 | |
5348 | /// Return a target-dependent result if the input operand is not suitable for |
5349 | /// use with a square root estimate calculation. |
5350 | virtual SDValue getSqrtResultForDenormInput(SDValue Operand, |
5351 | SelectionDAG &DAG) const { |
5352 | return DAG.getConstantFP(Val: 0.0, DL: SDLoc(Operand), VT: Operand.getValueType()); |
5353 | } |
5354 | |
5355 | //===--------------------------------------------------------------------===// |
5356 | // Legalization utility functions |
5357 | // |
5358 | |
5359 | /// Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, |
5360 | /// respectively, each computing an n/2-bit part of the result. |
5361 | /// \param Result A vector that will be filled with the parts of the result |
5362 | /// in little-endian order. |
5363 | /// \param LL Low bits of the LHS of the MUL. You can use this parameter |
5364 | /// if you want to control how low bits are extracted from the LHS. |
5365 | /// \param LH High bits of the LHS of the MUL. See LL for meaning. |
5366 | /// \param RL Low bits of the RHS of the MUL. See LL for meaning |
5367 | /// \param RH High bits of the RHS of the MUL. See LL for meaning. |
5368 | /// \returns true if the node has been expanded, false if it has not |
5369 | bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, |
5370 | SDValue RHS, SmallVectorImpl<SDValue> &Result, EVT HiLoVT, |
5371 | SelectionDAG &DAG, MulExpansionKind Kind, |
5372 | SDValue LL = SDValue(), SDValue LH = SDValue(), |
5373 | SDValue RL = SDValue(), SDValue RH = SDValue()) const; |
5374 | |
5375 | /// Expand a MUL into two nodes. One that computes the high bits of |
5376 | /// the result and one that computes the low bits. |
5377 | /// \param HiLoVT The value type to use for the Lo and Hi nodes. |
5378 | /// \param LL Low bits of the LHS of the MUL. You can use this parameter |
5379 | /// if you want to control how low bits are extracted from the LHS. |
5380 | /// \param LH High bits of the LHS of the MUL. See LL for meaning. |
5381 | /// \param RL Low bits of the RHS of the MUL. See LL for meaning |
5382 | /// \param RH High bits of the RHS of the MUL. See LL for meaning. |
5383 | /// \returns true if the node has been expanded. false if it has not |
5384 | bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, |
5385 | SelectionDAG &DAG, MulExpansionKind Kind, |
5386 | SDValue LL = SDValue(), SDValue LH = SDValue(), |
5387 | SDValue RL = SDValue(), SDValue RH = SDValue()) const; |
5388 | |
5389 | /// Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit |
5390 | /// urem by constant and other arithmetic ops. The n/2-bit urem by constant |
5391 | /// will be expanded by DAGCombiner. This is not possible for all constant |
5392 | /// divisors. |
5393 | /// \param N Node to expand |
5394 | /// \param Result A vector that will be filled with the lo and high parts of |
5395 | /// the results. For *DIVREM, this will be the quotient parts followed |
5396 | /// by the remainder parts. |
5397 | /// \param HiLoVT The value type to use for the Lo and Hi parts. Should be |
5398 | /// half of VT. |
5399 | /// \param LL Low bits of the LHS of the operation. You can use this |
5400 | /// parameter if you want to control how low bits are extracted from |
5401 | /// the LHS. |
5402 | /// \param LH High bits of the LHS of the operation. See LL for meaning. |
5403 | /// \returns true if the node has been expanded, false if it has not. |
5404 | bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl<SDValue> &Result, |
5405 | EVT HiLoVT, SelectionDAG &DAG, |
5406 | SDValue LL = SDValue(), |
5407 | SDValue LH = SDValue()) const; |
5408 | |
5409 | /// Expand funnel shift. |
5410 | /// \param N Node to expand |
5411 | /// \returns The expansion if successful, SDValue() otherwise |
5412 | SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const; |
5413 | |
5414 | /// Expand rotations. |
5415 | /// \param N Node to expand |
5416 | /// \param AllowVectorOps expand vector rotate, this should only be performed |
5417 | /// if the legalization is happening outside of LegalizeVectorOps |
5418 | /// \returns The expansion if successful, SDValue() otherwise |
5419 | SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const; |
5420 | |
5421 | /// Expand shift-by-parts. |
5422 | /// \param N Node to expand |
5423 | /// \param Lo lower-output-part after conversion |
5424 | /// \param Hi upper-output-part after conversion |
5425 | void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, |
5426 | SelectionDAG &DAG) const; |
5427 | |
5428 | /// Expand float(f32) to SINT(i64) conversion |
5429 | /// \param N Node to expand |
5430 | /// \param Result output after conversion |
5431 | /// \returns True, if the expansion was successful, false otherwise |
5432 | bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; |
5433 | |
5434 | /// Expand float to UINT conversion |
5435 | /// \param N Node to expand |
5436 | /// \param Result output after conversion |
5437 | /// \param Chain output chain after conversion |
5438 | /// \returns True, if the expansion was successful, false otherwise |
5439 | bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, |
5440 | SelectionDAG &DAG) const; |
5441 | |
5442 | /// Expand UINT(i64) to double(f64) conversion |
5443 | /// \param N Node to expand |
5444 | /// \param Result output after conversion |
5445 | /// \param Chain output chain after conversion |
5446 | /// \returns True, if the expansion was successful, false otherwise |
5447 | bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, |
5448 | SelectionDAG &DAG) const; |
5449 | |
5450 | /// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs. |
5451 | SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const; |
5452 | |
5453 | /// Expand fminimum/fmaximum into multiple comparison with selects. |
5454 | SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const; |
5455 | |
5456 | /// Expand fminimumnum/fmaximumnum into multiple comparison with selects. |
5457 | SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const; |
5458 | |
5459 | /// Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max. |
5460 | /// \param N Node to expand |
5461 | /// \returns The expansion result |
5462 | SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const; |
5463 | |
5464 | /// Truncate Op to ResultVT. If the result is exact, leave it alone. If it is |
5465 | /// not exact, force the result to be odd. |
5466 | /// \param ResultVT The type of result. |
5467 | /// \param Op The value to round. |
5468 | /// \returns The expansion result |
5469 | SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, |
5470 | SelectionDAG &DAG) const; |
5471 | |
5472 | /// Expand round(fp) to fp conversion |
5473 | /// \param N Node to expand |
5474 | /// \returns The expansion result |
5475 | SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const; |
5476 | |
5477 | /// Expand check for floating point class. |
5478 | /// \param ResultVT The type of intrinsic call result. |
5479 | /// \param Op The tested value. |
5480 | /// \param Test The test to perform. |
5481 | /// \param Flags The optimization flags. |
5482 | /// \returns The expansion result or SDValue() if it fails. |
5483 | SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, |
5484 | SDNodeFlags Flags, const SDLoc &DL, |
5485 | SelectionDAG &DAG) const; |
5486 | |
5487 | /// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes, |
5488 | /// vector nodes can only succeed if all operations are legal/custom. |
5489 | /// \param N Node to expand |
5490 | /// \returns The expansion result or SDValue() if it fails. |
5491 | SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const; |
5492 | |
5493 | /// Expand VP_CTPOP nodes. |
5494 | /// \returns The expansion result or SDValue() if it fails. |
5495 | SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const; |
5496 | |
5497 | /// Expand CTLZ/CTLZ_ZERO_UNDEF nodes. Expands vector/scalar CTLZ nodes, |
5498 | /// vector nodes can only succeed if all operations are legal/custom. |
5499 | /// \param N Node to expand |
5500 | /// \returns The expansion result or SDValue() if it fails. |
5501 | SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const; |
5502 | |
5503 | /// Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes. |
5504 | /// \param N Node to expand |
5505 | /// \returns The expansion result or SDValue() if it fails. |
5506 | SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const; |
5507 | |
5508 | /// Expand CTTZ via Table Lookup. |
5509 | /// \param N Node to expand |
5510 | /// \returns The expansion result or SDValue() if it fails. |
5511 | SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, |
5512 | SDValue Op, unsigned NumBitsPerElt) const; |
5513 | |
5514 | /// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes, |
5515 | /// vector nodes can only succeed if all operations are legal/custom. |
5516 | /// \param N Node to expand |
5517 | /// \returns The expansion result or SDValue() if it fails. |
5518 | SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const; |
5519 | |
5520 | /// Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes. |
5521 | /// \param N Node to expand |
5522 | /// \returns The expansion result or SDValue() if it fails. |
5523 | SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const; |
5524 | |
5525 | /// Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_UNDEF nodes. |
5526 | /// \param N Node to expand |
5527 | /// \returns The expansion result or SDValue() if it fails. |
5528 | SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const; |
5529 | |
5530 | /// Expand VECTOR_FIND_LAST_ACTIVE nodes |
5531 | /// \param N Node to expand |
5532 | /// \returns The expansion result or SDValue() if it fails. |
5533 | SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const; |
5534 | |
5535 | /// Expand ABS nodes. Expands vector/scalar ABS nodes, |
5536 | /// vector nodes can only succeed if all operations are legal/custom. |
5537 | /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size)) |
5538 | /// \param N Node to expand |
5539 | /// \param IsNegative indicate negated abs |
5540 | /// \returns The expansion result or SDValue() if it fails. |
5541 | SDValue expandABS(SDNode *N, SelectionDAG &DAG, |
5542 | bool IsNegative = false) const; |
5543 | |
5544 | /// Expand ABDS/ABDU nodes. Expands vector/scalar ABDS/ABDU nodes. |
5545 | /// \param N Node to expand |
5546 | /// \returns The expansion result or SDValue() if it fails. |
5547 | SDValue expandABD(SDNode *N, SelectionDAG &DAG) const; |
5548 | |
5549 | /// Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes. |
5550 | /// \param N Node to expand |
5551 | /// \returns The expansion result or SDValue() if it fails. |
5552 | SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const; |
5553 | |
5554 | /// Expand BSWAP nodes. Expands scalar/vector BSWAP nodes with i16/i32/i64 |
5555 | /// scalar types. Returns SDValue() if expand fails. |
5556 | /// \param N Node to expand |
5557 | /// \returns The expansion result or SDValue() if it fails. |
5558 | SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const; |
5559 | |
5560 | /// Expand VP_BSWAP nodes. Expands VP_BSWAP nodes with |
5561 | /// i16/i32/i64 scalar types. Returns SDValue() if expand fails. \param N Node |
5562 | /// to expand \returns The expansion result or SDValue() if it fails. |
5563 | SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const; |
5564 | |
5565 | /// Expand BITREVERSE nodes. Expands scalar/vector BITREVERSE nodes. |
5566 | /// Returns SDValue() if expand fails. |
5567 | /// \param N Node to expand |
5568 | /// \returns The expansion result or SDValue() if it fails. |
5569 | SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const; |
5570 | |
5571 | /// Expand VP_BITREVERSE nodes. Expands VP_BITREVERSE nodes with |
5572 | /// i8/i16/i32/i64 scalar types. \param N Node to expand \returns The |
5573 | /// expansion result or SDValue() if it fails. |
5574 | SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const; |
5575 | |
5576 | /// Turn load of vector type into a load of the individual elements. |
5577 | /// \param LD load to expand |
5578 | /// \returns BUILD_VECTOR and TokenFactor nodes. |
5579 | std::pair<SDValue, SDValue> scalarizeVectorLoad(LoadSDNode *LD, |
5580 | SelectionDAG &DAG) const; |
5581 | |
5582 | // Turn a store of a vector type into stores of the individual elements. |
5583 | /// \param ST Store with a vector value type |
5584 | /// \returns TokenFactor of the individual store chains. |
5585 | SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const; |
5586 | |
5587 | /// Expands an unaligned load to 2 half-size loads for an integer, and |
5588 | /// possibly more for vectors. |
5589 | std::pair<SDValue, SDValue> expandUnalignedLoad(LoadSDNode *LD, |
5590 | SelectionDAG &DAG) const; |
5591 | |
5592 | /// Expands an unaligned store to 2 half-size stores for integer values, and |
5593 | /// possibly more for vectors. |
5594 | SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const; |
5595 | |
5596 | /// Increments memory address \p Addr according to the type of the value |
5597 | /// \p DataVT that should be stored. If the data is stored in compressed |
5598 | /// form, the memory address should be incremented according to the number of |
5599 | /// the stored elements. This number is equal to the number of '1's bits |
5600 | /// in the \p Mask. |
5601 | /// \p DataVT is a vector type. \p Mask is a vector value. |
5602 | /// \p DataVT and \p Mask have the same number of vector elements. |
5603 | SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, |
5604 | EVT DataVT, SelectionDAG &DAG, |
5605 | bool IsCompressedMemory) const; |
5606 | |
5607 | /// Get a pointer to vector element \p Idx located in memory for a vector of |
5608 | /// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out of |
5609 | /// bounds the returned pointer is unspecified, but will be within the vector |
5610 | /// bounds. |
5611 | SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, |
5612 | SDValue Index) const; |
5613 | |
5614 | /// Get a pointer to a sub-vector of type \p SubVecVT at index \p Idx located |
5615 | /// in memory for a vector of type \p VecVT starting at a base address of |
5616 | /// \p VecPtr. If \p Idx plus the size of \p SubVecVT is out of bounds the |
5617 | /// returned pointer is unspecified, but the value returned will be such that |
5618 | /// the entire subvector would be within the vector bounds. |
5619 | SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, |
5620 | EVT SubVecVT, SDValue Index) const; |
5621 | |
5622 | /// Method for building the DAG expansion of ISD::[US][MIN|MAX]. This |
5623 | /// method accepts integers as its arguments. |
5624 | SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const; |
5625 | |
5626 | /// Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT. This |
5627 | /// method accepts integers as its arguments. |
5628 | SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const; |
5629 | |
5630 | /// Method for building the DAG expansion of ISD::[US]CMP. This |
5631 | /// method accepts integers as its arguments |
5632 | SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const; |
5633 | |
5634 | /// Method for building the DAG expansion of ISD::[US]SHLSAT. This |
5635 | /// method accepts integers as its arguments. |
5636 | SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const; |
5637 | |
5638 | /// Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT]. This |
5639 | /// method accepts integers as its arguments. |
5640 | SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const; |
5641 | |
5642 | /// Method for building the DAG expansion of ISD::[US]DIVFIX[SAT]. This |
5643 | /// method accepts integers as its arguments. |
5644 | /// Note: This method may fail if the division could not be performed |
5645 | /// within the type. Clients must retry with a wider type if this happens. |
5646 | SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, |
5647 | SDValue LHS, SDValue RHS, |
5648 | unsigned Scale, SelectionDAG &DAG) const; |
5649 | |
5650 | /// Method for building the DAG expansion of ISD::U(ADD|SUB)O. Expansion |
5651 | /// always suceeds and populates the Result and Overflow arguments. |
5652 | void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, |
5653 | SelectionDAG &DAG) const; |
5654 | |
5655 | /// Method for building the DAG expansion of ISD::S(ADD|SUB)O. Expansion |
5656 | /// always suceeds and populates the Result and Overflow arguments. |
5657 | void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, |
5658 | SelectionDAG &DAG) const; |
5659 | |
5660 | /// Method for building the DAG expansion of ISD::[US]MULO. Returns whether |
5661 | /// expansion was successful and populates the Result and Overflow arguments. |
5662 | bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, |
5663 | SelectionDAG &DAG) const; |
5664 | |
5665 | /// Calculate the product twice the width of LHS and RHS. If HiLHS/HiRHS are |
5666 | /// non-null they will be included in the multiplication. The expansion works |
5667 | /// by splitting the 2 inputs into 4 pieces that we can multiply and add |
5668 | /// together without neding MULH or MUL_LOHI. |
5669 | void forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl, bool Signed, |
5670 | SDValue &Lo, SDValue &Hi, SDValue LHS, SDValue RHS, |
5671 | SDValue HiLHS = SDValue(), |
5672 | SDValue HiRHS = SDValue()) const; |
5673 | |
5674 | /// Calculate full product of LHS and RHS either via a libcall or through |
5675 | /// brute force expansion of the multiplication. The expansion works by |
5676 | /// splitting the 2 inputs into 4 pieces that we can multiply and add together |
5677 | /// without needing MULH or MUL_LOHI. |
5678 | void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, |
5679 | const SDValue LHS, const SDValue RHS, SDValue &Lo, |
5680 | SDValue &Hi) const; |
5681 | |
5682 | /// Expand a VECREDUCE_* into an explicit calculation. If Count is specified, |
5683 | /// only the first Count elements of the vector are used. |
5684 | SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const; |
5685 | |
5686 | /// Expand a VECREDUCE_SEQ_* into an explicit ordered calculation. |
5687 | SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const; |
5688 | |
5689 | /// Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal. |
5690 | /// Returns true if the expansion was successful. |
5691 | bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const; |
5692 | |
5693 | /// Method for building the DAG expansion of ISD::VECTOR_SPLICE. This |
5694 | /// method accepts vectors as its arguments. |
5695 | SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const; |
5696 | |
5697 | /// Expand a vector VECTOR_COMPRESS into a sequence of extract element, store |
5698 | /// temporarily, advance store position, before re-loading the final vector. |
5699 | SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const; |
5700 | |
5701 | /// Expands PARTIAL_REDUCE_S/UMLA nodes to a series of simpler operations, |
5702 | /// consisting of zext/sext, extract_subvector, mul and add operations. |
5703 | SDValue expandPartialReduceMLA(SDNode *Node, SelectionDAG &DAG) const; |
5704 | |
5705 | /// Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC |
5706 | /// on the current target. A VP_SETCC will additionally be given a Mask |
5707 | /// and/or EVL not equal to SDValue(). |
5708 | /// |
5709 | /// If the SETCC has been legalized using AND / OR, then the legalized node |
5710 | /// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert |
5711 | /// will be set to false. This will also hold if the VP_SETCC has been |
5712 | /// legalized using VP_AND / VP_OR. |
5713 | /// |
5714 | /// If the SETCC / VP_SETCC has been legalized by using |
5715 | /// getSetCCSwappedOperands(), then the values of LHS and RHS will be |
5716 | /// swapped, CC will be set to the new condition, and NeedInvert will be set |
5717 | /// to false. |
5718 | /// |
5719 | /// If the SETCC / VP_SETCC has been legalized using the inverse condcode, |
5720 | /// then LHS and RHS will be unchanged, CC will set to the inverted condcode, |
5721 | /// and NeedInvert will be set to true. The caller must invert the result of |
5722 | /// the SETCC with SelectionDAG::getLogicalNOT() or take equivalent action to |
5723 | /// swap the effect of a true/false result. |
5724 | /// |
5725 | /// \returns true if the SETCC / VP_SETCC has been legalized, false if it |
5726 | /// hasn't. |
5727 | bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, |
5728 | SDValue &RHS, SDValue &CC, SDValue Mask, |
5729 | SDValue EVL, bool &NeedInvert, const SDLoc &dl, |
5730 | SDValue &Chain, bool IsSignaling = false) const; |
5731 | |
5732 | //===--------------------------------------------------------------------===// |
5733 | // Instruction Emitting Hooks |
5734 | // |
5735 | |
5736 | /// This method should be implemented by targets that mark instructions with |
5737 | /// the 'usesCustomInserter' flag. These instructions are special in various |
5738 | /// ways, which require special support to insert. The specified MachineInstr |
5739 | /// is created but not inserted into any basic blocks, and this method is |
5740 | /// called to expand it into a sequence of instructions, potentially also |
5741 | /// creating new basic blocks and control flow. |
5742 | /// As long as the returned basic block is different (i.e., we created a new |
5743 | /// one), the custom inserter is free to modify the rest of \p MBB. |
5744 | virtual MachineBasicBlock * |
5745 | EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const; |
5746 | |
5747 | /// This method should be implemented by targets that mark instructions with |
5748 | /// the 'hasPostISelHook' flag. These instructions must be adjusted after |
5749 | /// instruction selection by target hooks. e.g. To fill in optional defs for |
5750 | /// ARM 's' setting instructions. |
5751 | virtual void AdjustInstrPostInstrSelection(MachineInstr &MI, |
5752 | SDNode *Node) const; |
5753 | |
5754 | /// If this function returns true, SelectionDAGBuilder emits a |
5755 | /// LOAD_STACK_GUARD node when it is lowering Intrinsic::stackprotector. |
5756 | virtual bool useLoadStackGuardNode(const Module &M) const { return false; } |
5757 | |
5758 | virtual SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, |
5759 | const SDLoc &DL) const { |
5760 | llvm_unreachable("not implemented for this target"); |
5761 | } |
5762 | |
5763 | /// Lower TLS global address SDNode for target independent emulated TLS model. |
5764 | virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, |
5765 | SelectionDAG &DAG) const; |
5766 | |
5767 | /// Expands target specific indirect branch for the case of JumpTable |
5768 | /// expansion. |
5769 | virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, |
5770 | SDValue Addr, int JTI, |
5771 | SelectionDAG &DAG) const; |
5772 | |
5773 | // seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits))) |
5774 | // If we're comparing for equality to zero and isCtlzFast is true, expose the |
5775 | // fact that this can be implemented as a ctlz/srl pair, so that the dag |
5776 | // combiner can fold the new nodes. |
5777 | SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const; |
5778 | |
5779 | // Return true if `X & Y eq/ne 0` is preferable to `X & Y ne/eq Y` |
5780 | virtual bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode, EVT) const { |
5781 | return true; |
5782 | } |
5783 | |
5784 | // Expand vector operation by dividing it into smaller length operations and |
5785 | // joining their results. SDValue() is returned when expansion did not happen. |
5786 | SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const; |
5787 | |
5788 | /// Replace an extraction of a load with a narrowed load. |
5789 | /// |
5790 | /// \param ResultVT type of the result extraction. |
5791 | /// \param InVecVT type of the input vector to with bitcasts resolved. |
5792 | /// \param EltNo index of the vector element to load. |
5793 | /// \param OriginalLoad vector load that to be replaced. |
5794 | /// \returns \p ResultVT Load on success SDValue() on failure. |
5795 | SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL, |
5796 | EVT InVecVT, SDValue EltNo, |
5797 | LoadSDNode *OriginalLoad, |
5798 | SelectionDAG &DAG) const; |
5799 | |
5800 | private: |
5801 | SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, |
5802 | const SDLoc &DL, DAGCombinerInfo &DCI) const; |
5803 | SDValue foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, |
5804 | const SDLoc &DL, DAGCombinerInfo &DCI) const; |
5805 | |
5806 | SDValue optimizeSetCCOfSignedTruncationCheck(EVT SCCVT, SDValue N0, |
5807 | SDValue N1, ISD::CondCode Cond, |
5808 | DAGCombinerInfo &DCI, |
5809 | const SDLoc &DL) const; |
5810 | |
5811 | // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0 |
5812 | SDValue optimizeSetCCByHoistingAndByConstFromLogicalShift( |
5813 | EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, |
5814 | DAGCombinerInfo &DCI, const SDLoc &DL) const; |
5815 | |
5816 | SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, |
5817 | SDValue CompTargetNode, ISD::CondCode Cond, |
5818 | DAGCombinerInfo &DCI, const SDLoc &DL, |
5819 | SmallVectorImpl<SDNode *> &Created) const; |
5820 | SDValue buildUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, |
5821 | ISD::CondCode Cond, DAGCombinerInfo &DCI, |
5822 | const SDLoc &DL) const; |
5823 | |
5824 | SDValue prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, |
5825 | SDValue CompTargetNode, ISD::CondCode Cond, |
5826 | DAGCombinerInfo &DCI, const SDLoc &DL, |
5827 | SmallVectorImpl<SDNode *> &Created) const; |
5828 | SDValue buildSREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, |
5829 | ISD::CondCode Cond, DAGCombinerInfo &DCI, |
5830 | const SDLoc &DL) const; |
5831 | }; |
5832 | |
5833 | /// Given an LLVM IR type and return type attributes, compute the return value |
5834 | /// EVTs and flags, and optionally also the offsets, if the return value is |
5835 | /// being lowered to memory. |
5836 | LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, |
5837 | AttributeList attr, |
5838 | SmallVectorImpl<ISD::OutputArg> &Outs, |
5839 | const TargetLowering &TLI, const DataLayout &DL); |
5840 | |
5841 | } // end namespace llvm |
5842 | |
5843 | #endif // LLVM_CODEGEN_TARGETLOWERING_H |
5844 |
Definitions
- Preference
- MemOp
- Copy
- Set
- size
- getDstAlign
- isFixedDstAlign
- allowOverlap
- isMemset
- isMemcpy
- isMemcpyWithFixedDstAlign
- isZeroMemset
- isMemcpyStrSrc
- getSrcAlign
- isSrcAligned
- isDstAligned
- isAligned
- TargetLoweringBase
- LegalizeAction
- LegalizeTypeAction
- BooleanContent
- SelectSupportKind
- AtomicExpansionKind
- MulExpansionKind
- NegatibleCost
- AndOrSETCCFoldKind
- ArgListEntry
- ArgListEntry
- getExtendForContent
- TargetLoweringBase
- operator=
- isStrictFPEnabled
- getTargetMachine
- useSoftFloat
- getPointerTy
- getPointerMemTy
- getFrameIndexTy
- getProgramPointerTy
- getFenceOperandTy
- getPreferredShiftAmountTy
- getVectorIdxWidth
- getVectorIdxTy
- getVectorIdxLLT
- getVPExplicitVectorLengthTy
- getTargetMMOFlags
- getTargetMMOFlags
- isSelectSupported
- shouldExpandPartialReductionIntrinsic
- shouldExpandGetActiveLaneMask
- shouldExpandGetVectorLength
- shouldExpandCttzElements
- shouldExpandVectorMatch
- shouldReassociateReduction
- reduceSelectOfFPConstantLoads
- hasMultipleConditionRegisters
- hasExtractBitsInsn
- getPreferredVectorAction
- softPromoteHalfType
- useFPRegsForHalfType
- shouldExpandBuildVectorWithShuffles
- isIntDivCheap
- hasStandaloneRem
- isFsqrtCheap
- ReciprocalEstimate
- isSlowDivBypassed
- getBypassSlowDivWidths
- isVScaleKnownToBeAPowerOfTwo
- isJumpExpensive
- CondMergingParams
- getJumpConditionMergingParams
- isPredictableSelectExpensive
- fallBackToDAGISel
- isStoreBitCastBeneficial
- storeOfVectorConstantIsCheap
- mergeStoresAfterLegalization
- canMergeStoresTo
- isCheapToSpeculateCttz
- isCheapToSpeculateCtlz
- isCtlzFast
- isCtpopFast
- getCustomCtpopCost
- isEqualityCmpFoldedWithSignedCmp
- preferZeroCompareBranch
- isMultiStoresCheaperThanBitsMerge
- isMaskAndCmp0FoldingBeneficial
- areTwoSDNodeTargetMMOFlagsMergeable
- convertSetCCLogicToBitwiseLogic
- hasFastEqualityCompare
- hasAndNotCompare
- hasAndNot
- hasBitTest
- shouldFoldMaskToVariableShiftPair
- shouldFoldConstantShiftPairToMask
- shouldTransformSignedTruncationCheck
- shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd
- optimizeFMulOrFDivAsShiftAddBitcast
- preferedOpcodeForCmpEqPiecesOfOperand
- preferIncOfAddToSubOfNot
- preferABDSToABSWithNSW
- preferScalarizeSplat
- preferSextInRegOfTruncate
- enableExtLdPromotion
- canCombineStoreAndExtract
- shallExtractConstSplatVectorElementToStore
- shouldSplatInsEltVarIndex
- enableAggressiveFMAFusion
- enableAggressiveFMAFusion
- getBooleanContents
- getBooleanContents
- promoteTargetBoolean
- getSchedulingPreference
- getSchedulingPreference
- getRegClassFor
- requiresUniformRegister
- getRepRegClassFor
- getRepRegClassCostFor
- ShiftLegalizationStrategy
- preferredShiftLegalizationStrategy
- isTypeLegal
- ValueTypeActionImpl
- ValueTypeActionImpl
- getTypeAction
- setTypeAction
- getValueTypeActions
- getTypeAction
- getTypeAction
- getTypeToTransformTo
- getTypeToExpandTo
- getVectorTypeBreakdownForCallingConv
- IntrinsicInfo
- IntrinsicInfo
- getTgtMemIntrinsic
- isFPImmLegal
- isShuffleMaskLegal
- isVectorClearMaskLegal
- getCustomOperationAction
- getOperationAction
- isSupportedFixedPointOperation
- getFixedPointOperationAction
- getStrictFPOperationAction
- isOperationLegalOrCustom
- isOperationLegalOrPromote
- isOperationLegalOrCustomOrPromote
- isOperationCustom
- areJTsAllowed
- rangeFitsInWord
- isSuitableForBitTests
- isOperationExpand
- isOperationLegal
- getLoadExtAction
- isLoadExtLegal
- isLoadExtLegalOrCustom
- getAtomicLoadExtAction
- isAtomicLoadExtLegal
- getTruncStoreAction
- isTruncStoreLegal
- isTruncStoreLegalOrCustom
- canCombineTruncStore
- getIndexedLoadAction
- isIndexedLoadLegal
- getIndexedStoreAction
- isIndexedStoreLegal
- getIndexedMaskedLoadAction
- isIndexedMaskedLoadLegal
- getIndexedMaskedStoreAction
- isIndexedMaskedStoreLegal
- shouldExtendGSIndex
- shouldRemoveExtendFromGSIndex
- isLegalScaleForGatherScatter
- getCondCodeAction
- isCondCodeLegal
- isCondCodeLegalOrCustom
- getPartialReduceMLAAction
- isPartialReduceMLALegalOrCustom
- getTypeToPromoteTo
- getAsmOperandValueType
- getValueType
- getMemValueType
- getSimpleValueType
- getRegisterType
- getRegisterType
- getNumRegisters
- getRegisterTypeForCallingConv
- getNumRegistersForCallingConv
- getABIAlignmentForCallingConv
- ShouldShrinkFPConstant
- shouldReduceLoadWidth
- shouldRemoveRedundantExtend
- isPaddedAtMostSignificantBitsWhenStored
- hasBigEndianPartOrdering
- hasTargetDAGCombine
- getGatherAllAliasesMaxDepth
- getVaListSizeInBits
- getMaxStoresPerMemset
- getMaxStoresPerMemcpy
- getMaxGluedStoresPerMemcpy
- getMaxExpandSizeMemcmp
- getMaxStoresPerMemmove
- allowsMisalignedMemoryAccesses
- allowsMisalignedMemoryAccesses
- getOptimalMemOpType
- getOptimalMemOpLLT
- isSafeMemOpType
- getStackPointerRegisterToSaveRestore
- getExceptionPointerRegister
- getExceptionSelectorRegister
- needsFixedCatchObjects
- getMinStackArgumentAlignment
- getMinFunctionAlignment
- getPrefFunctionAlignment
- alignLoopsWithOptSize
- useStackGuardXorFP
- hasStackProbeSymbol
- hasInlineStackProbe
- getStackProbeSymbolName
- shouldAlignPointerArgs
- getMaxAtomicSizeInBitsSupported
- getMaxDivRemBitWidthSupported
- getMaxLargeFPConvertBitWidthSupported
- getMinCmpXchgSizeInBits
- supportsUnalignedAtomics
- shouldInsertFencesForAtomic
- atomicOperationOrderAfterFenceSplit
- shouldInsertTrailingFenceForAtomicStore
- emitLoadLinked
- emitStoreConditional
- emitMaskedAtomicRMWIntrinsic
- emitExpandAtomicRMW
- emitExpandAtomicCmpXchg
- emitBitTestAtomicRMWIntrinsic
- emitCmpArithAtomicRMWIntrinsic
- emitMaskedAtomicCmpXchgIntrinsic
- EmitKCFICheck
- emitAtomicCmpXchgNoStoreLLBalance
- shouldSignExtendTypeInLibCall
- shouldExtendTypeInLibCall
- shouldExpandAtomicLoadInIR
- shouldCastAtomicLoadInIR
- shouldExpandAtomicStoreInIR
- shouldCastAtomicStoreInIR
- shouldExpandAtomicCmpXchgInIR
- shouldExpandAtomicRMWInIR
- shouldCastAtomicRMWIInIR
- lowerIdempotentRMWIntoFencedLoad
- getExtendForAtomicOps
- getExtendForAtomicCmpSwapArg
- shouldNormalizeToSelectSequence
- isProfitableToCombineMinNumMaxNum
- convertSelectOfConstantsToMath
- decomposeMulByConstant
- isMulAddWithConstProfitable
- shouldUseStrictFP_TO_INT
- isBeneficialToExpandPowI
- setBooleanContents
- setBooleanContents
- setBooleanVectorContents
- setSchedulingPreference
- setStackPointerRegisterToSaveRestore
- setHasMultipleConditionRegisters
- setHasExtractBitsInsn
- addBypassSlowDiv
- addRegisterClass
- setOperationAction
- setOperationAction
- setOperationAction
- setLoadExtAction
- setLoadExtAction
- setLoadExtAction
- setAtomicLoadExtAction
- setAtomicLoadExtAction
- setAtomicLoadExtAction
- setTruncStoreAction
- setIndexedLoadAction
- setIndexedLoadAction
- setIndexedStoreAction
- setIndexedStoreAction
- setIndexedMaskedLoadAction
- setIndexedMaskedStoreAction
- setCondCodeAction
- setCondCodeAction
- setPartialReduceMLAAction
- setPartialReduceMLAAction
- AddPromotedToType
- setOperationPromotedToType
- setOperationPromotedToType
- setTargetDAGCombine
- setMinFunctionAlignment
- setPrefFunctionAlignment
- setPrefLoopAlignment
- setMaxBytesForAlignment
- setMinStackArgumentAlignment
- setMaxAtomicSizeInBitsSupported
- setMaxDivRemBitWidthSupported
- setMaxLargeFPConvertBitWidthSupported
- setMinCmpXchgSizeInBits
- setSupportsUnalignedAtomics
- getAddrModeArguments
- AddrMode
- AddrMode
- addressingModeSupportsTLS
- getPreferredLargeGEPBaseOffset
- isLegalICmpImmediate
- isLegalAddImmediate
- isLegalAddScalableImmediate
- isLegalStoreImmediate
- shouldConvertSplatType
- shouldConvertPhiType
- isCommutativeBinOp
- isBinOp
- isTruncateFree
- allowTruncateForTailCall
- isTruncateFree
- isTruncateFree
- isTruncateFree
- isProfitableToHoist
- isExtFree
- isExtLoad
- isZExtFree
- isZExtFree
- isZExtFree
- isZExtFree
- isSExtCheaperThanZExt
- signExtendConstant
- optimizeExtendOrTruncateConversion
- hasPairedLoad
- hasVectorBlend
- getMaxSupportedInterleaveFactor
- lowerInterleavedLoad
- lowerInterleavedStore
- lowerInterleavedVPLoad
- lowerInterleavedVPStore
- lowerDeinterleaveIntrinsicToLoad
- lowerInterleaveIntrinsicToStore
- isFPExtFree
- isFPExtFoldable
- isFPExtFoldable
- isVectorLoadExtDesirable
- isFNegFree
- isFAbsFree
- isFMAFasterThanFMulAndFAdd
- isFMAFasterThanFMulAndFAdd
- isFMAFasterThanFMulAndFAdd
- isFMADLegal
- isFMADLegal
- generateFMAsInMachineCombiner
- isNarrowingProfitable
- shouldFoldSelectWithIdentityConstant
- shouldConvertConstantLoadToIntImm
- isExtractSubvectorCheap
- shouldScalarizeBinop
- isExtractVecEltCheap
- shouldFormOverflowOp
- aggressivelyPreferBuildVectorSources
- shouldConsiderGEPOffsetSplit
- shouldAvoidTransformToShift
- shouldFoldSelectWithSingleBitTest
- shouldKeepZExtForFP16Conv
- shouldConvertFpToSat
- shouldExpandCmpUsingSelects
- shouldPreservePtrArith
- isComplexDeinterleavingSupported
- isComplexDeinterleavingOperationSupported
- getPreferredFPToIntOpcode
- createComplexDeinterleavingIR
- setLibcallName
- setLibcallName
- getLibcallName
- setCmpLibcallCC
- getCmpLibcallCC
- setLibcallCallingConv
- getLibcallCallingConv
- shouldMergeStoreOfLoadsOverCall
- IndexedModeActionsBits
- setIndexedModeAction
- getIndexedModeAction
- isExtFreeImpl
- TargetLowering
- TargetLowering
- operator=
- isSDNodeSourceOfDivergence
- isReassocProfitable
- isReassocProfitable
- isSDNodeAlwaysUniform
- getPreIndexedAddressParts
- getPostIndexedAddressParts
- isIndexingLegal
- getJumpTableRegTy
- LowerCustomJumpTableEntry
- isInlineAsmTargetBranch
- visitMaskedLoad
- visitMaskedStore
- TargetLoweringOpt
- TargetLoweringOpt
- LegalTypes
- LegalOperations
- CombineTo
- targetShrinkDemandedConstant
- shouldSimplifyDemandedVectorElts
- isTargetCanonicalConstantNode
- DAGCombinerInfo
- DAGCombinerInfo
- isBeforeLegalize
- isBeforeLegalizeOps
- isAfterLegalizeDAG
- getDAGCombineLevel
- isCalledByLegalizer
- unwrapAddress
- isDesirableToCommuteWithShift
- isDesirableToCommuteWithShift
- isDesirableToPullExtFromShl
- isDesirableToCombineLogicOpOfSETCC
- isDesirableToCommuteXorWithShift
- isTypeDesirableForOp
- isDesirableToTransformToIntegerOp
- IsDesirableToPromoteOp
- supportSwiftError
- supportSplitCSR
- supportKCFIBundles
- supportPtrAuthBundles
- initializeSplitCSR
- insertCopiesSplitCSR
- getCheaperOrNeutralNegatedExpression
- getCheaperNegatedExpression
- getNegatedExpression
- splitValueIntoRegisterParts
- checkForPhysRegDependency
- joinRegisterPartsIntoValue
- LowerFormalArguments
- markLibCallAttributes
- PtrAuthInfo
- CallLoweringInfo
- CallLoweringInfo
- setDebugLoc
- setChain
- setLibCallee
- setCallee
- setCallee
- setInRegister
- setNoReturn
- setVarArg
- setTailCall
- setDiscardResult
- setConvergent
- setSExtResult
- setZExtResult
- setIsPatchPoint
- setIsPreallocated
- setPtrAuth
- setIsPostTypeLegalization
- setCFIType
- setConvergenceControlToken
- getArgs
- MakeLibCallOptions
- MakeLibCallOptions
- setIsSigned
- setNoReturn
- setDiscardResult
- setIsPostTypeLegalization
- setTypeListBeforeSoften
- setOpsTypeOverrides
- LowerCall
- HandleByVal
- CanLowerReturn
- LowerReturn
- isUsedByReturnOnly
- mayBeEmittedAsTailCall
- getRegisterByName
- getTypeForExtReturn
- functionArgumentNeedsConsecutiveRegisters
- shouldSplitFunctionArgumentsAsLittleEndian
- getScratchRegisters
- getRoundingControlRegisters
- prepareVolatileOrAtomicLoad
- ReplaceNodeResults
- createFastISel
- ExpandInlineAsm
- ConstraintType
- ConstraintWeight
- AsmOperandInfo
- AsmOperandInfo
- getInlineAsmMemConstraint
- combineRepeatedFPDivisors
- getSqrtEstimate
- getRecipEstimate
- getSqrtResultForDenormInput
- useLoadStackGuardNode
- emitStackGuardXorFP
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more